From 4f08938043a3d708478e9e293e5985c48a7f5e06 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Thu, 4 Nov 2021 12:38:51 +0100 Subject: [PATCH] Avoid handling a deploy failure twice In some cases we handle the same exception twice in a row: in agent_base and in deployments.do_next_deploy_step. This change avoids it. Also make deploy step error messages more uniform across the board. Change-Id: Ic84c04118b1a85b10a761fc58796827583a5b086 --- ironic/conductor/deployments.py | 43 +++++++++++-------- ironic/drivers/modules/agent_base.py | 9 ++++ .../tests/unit/conductor/test_deployments.py | 33 +++++++++----- 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index daff728266..6799f059a1 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -265,6 +265,16 @@ def do_next_deploy_step(task, step_index): {'step': step, 'node': node.uuid}) try: result = interface.execute_deploy_step(task, step) + except exception.AgentInProgress as e: + LOG.info('Conductor attempted to process deploy step for ' + 'node %(node)s. Agent indicated it is presently ' + 'executing a command. Error: %(error)s', + {'node': task.node.uuid, + 'error': e}) + driver_internal_info['skip_current_deploy_step'] = False + node.driver_internal_info = driver_internal_info + task.process_event('wait') + return except exception.IronicException as e: if isinstance(e, exception.AgentConnectionFailed): if task.node.driver_internal_info.get('deployment_reboot'): @@ -276,23 +286,17 @@ def do_next_deploy_step(task, step_index): node.driver_internal_info = driver_internal_info task.process_event('wait') return - if isinstance(e, exception.AgentInProgress): - LOG.info('Conductor attempted to process deploy step for ' - 'node %(node)s. Agent indicated it is presently ' - 'executing a command. Error: %(error)s', - {'node': task.node.uuid, - 'error': e}) - driver_internal_info['skip_current_deploy_step'] = False - node.driver_internal_info = driver_internal_info - task.process_event('wait') - return - log_msg = ('Node %(node)s failed deploy step %(step)s. Error: ' - '%(err)s' % - {'node': node.uuid, 'step': node.deploy_step, 'err': e}) - utils.deploying_error_handler( - task, log_msg, - _("Deploy step %(step)s failed: %(err)s.") - % {'step': conductor_steps.step_id(step), 'err': e}) + + # Avoid double handling of failures. For example, set_failed_state + # from deploy_utils already calls deploying_error_handler. + if task.node.provision_state != states.DEPLOYFAIL: + log_msg = ('Node %(node)s failed deploy step %(step)s. Error: ' + '%(err)s' % {'node': node.uuid, + 'step': node.deploy_step, 'err': e}) + utils.deploying_error_handler( + task, log_msg, + _("Deploy step %(step)s failed: %(err)s.") + % {'step': conductor_steps.step_id(step), 'err': e}) return except Exception as e: log_msg = ('Node %(node)s failed deploy step %(step)s with ' @@ -300,7 +304,10 @@ def do_next_deploy_step(task, step_index): {'node': node.uuid, 'step': node.deploy_step, 'err': e}) utils.deploying_error_handler( task, log_msg, - _("Failed to deploy. Exception: %s") % e, traceback=True) + _("Deploy step %(step)s failed with %(exc)s: %(err)s.") + % {'step': conductor_steps.step_id(step), 'err': e, + 'exc': e.__class__.__name__}, + traceback=True) return if task.node.provision_state == states.DEPLOYFAIL: diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 43b2ba8539..51d96ec4d4 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -292,6 +292,15 @@ def log_and_raise_deployment_error(task, msg, collect_logs=True, exc=None): """ log_traceback = (exc is not None and not isinstance(exc, exception.IronicException)) + + # Replicate the logic in do_next_deploy_step to prepend the current step + step_id = (conductor_steps.step_id(task.node.deploy_step) + if task.node.deploy_step else None) + if step_id and step_id not in msg: + msg = _("Deploy step %(step)s failed: %(err)s") % { + 'step': step_id, 'err': msg + } + LOG.error(msg, exc_info=log_traceback) deploy_utils.set_failed_state(task, msg, collect_logs=collect_logs) raise exception.InstanceDeployFailure(msg) diff --git a/ironic/tests/unit/conductor/test_deployments.py b/ironic/tests/unit/conductor/test_deployments.py index 0207359846..65bb1eca13 100644 --- a/ironic/tests/unit/conductor/test_deployments.py +++ b/ironic/tests/unit/conductor/test_deployments.py @@ -111,9 +111,9 @@ class DoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertEqual(states.ACTIVE, node.target_provision_state) self.assertIsNotNone(node.last_error) if unexpected: - self.assertIn('Exception', node.last_error) + self.assertIn(exc.__class__.__name__, node.last_error) else: - self.assertNotIn('Exception', node.last_error) + self.assertNotIn(exc.__class__.__name__, node.last_error) mock_deploy.assert_called_once_with(mock.ANY, task) @@ -636,20 +636,23 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, @mock.patch.object(conductor_utils, 'LOG', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', autospec=True) - def _do_next_deploy_step_execute_fail(self, exc, traceback, + def _do_next_deploy_step_execute_fail(self, exc, traceback, handled, mock_execute, mock_log): # When a deploy step fails, go to DEPLOYFAIL driver_internal_info = {'deploy_step_index': None, 'deploy_steps': self.deploy_steps} self._start_service() + node = obj_utils.create_test_node( self.context, driver='fake-hardware', driver_internal_info=driver_internal_info, + provision_state=states.DEPLOYFAIL if handled else states.DEPLOYING, + target_provision_state=states.ACTIVE, + last_error='existing error' if handled else None, deploy_step={}) mock_execute.side_effect = exc task = task_manager.TaskManager(self.context, node.uuid) - task.process_event('deploy') deployments.do_next_deploy_step(task, 0) @@ -657,20 +660,30 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, node.refresh() self.assertEqual(states.DEPLOYFAIL, node.provision_state) self.assertEqual(states.ACTIVE, node.target_provision_state) - self.assertEqual({}, node.deploy_step) - self.assertNotIn('deploy_step_index', node.driver_internal_info) - self.assertIsNotNone(node.last_error) + if handled: + self.assertEqual('existing error', node.last_error) + else: + self.assertEqual({}, node.deploy_step) + self.assertNotIn('deploy_step_index', node.driver_internal_info) + self.assertIsNotNone(node.last_error) + self.assertIn(f"Deploy step deploy.{self.deploy_steps[0]['step']}", + node.last_error) + mock_log.error.assert_called_once_with(mock.ANY, + exc_info=traceback) self.assertFalse(node.maintenance) mock_execute.assert_called_once_with(mock.ANY, mock.ANY, self.deploy_steps[0]) - mock_log.error.assert_called_once_with(mock.ANY, exc_info=traceback) def test_do_next_deploy_step_execute_ironic_exception(self): self._do_next_deploy_step_execute_fail( - exception.IronicException('foo'), False) + exception.IronicException('foo'), False, False) def test_do_next_deploy_step_execute_exception(self): - self._do_next_deploy_step_execute_fail(Exception('foo'), True) + self._do_next_deploy_step_execute_fail(Exception('foo'), True, False) + + def test_do_next_deploy_step_execute_handled_exception(self): + self._do_next_deploy_step_execute_fail( + exception.IronicException('foo'), False, True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', autospec=True)