diff --git a/ironic/common/exception.py b/ironic/common/exception.py index b3dcdc673c..1f19d6e6b8 100644 --- a/ironic/common/exception.py +++ b/ironic/common/exception.py @@ -830,3 +830,7 @@ class NodeHistoryNotFound(NotFound): class IncorrectConfiguration(IronicException): _msg_fmt = _("Supplied configuration is incorrect and must be fixed. " "Error: %(error)s") + + +class NodeVerifyFailure(IronicException): + _msg_fmt = _("Failed to verify node %(node)s: %(reason)s") diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py index 2a6636aca6..05e07ced05 100644 --- a/ironic/conductor/steps.py +++ b/ironic/conductor/steps.py @@ -50,6 +50,25 @@ DEPLOYING_INTERFACE_PRIORITY = { 'raid': 1, } +VERIFYING_INTERFACE_PRIORITY = { + # When two verify steps have the same priority, their order is determined + # by which interface is implementing the verify step. The verifying step of + # the interface with the highest value here, will be executed first in + # that case. + 'power': 12, + 'management': 11, + 'boot': 8, + 'inspect': 10, + 'deploy': 9, + 'bios': 7, + 'raid': 6, + 'vendor': 5, + 'network': 4, + 'storage': 3, + 'console': 2, + 'rescue': 1, +} + def _clean_step_key(step): """Sort by priority, then interface priority in event of tie. @@ -69,6 +88,15 @@ def _deploy_step_key(step): DEPLOYING_INTERFACE_PRIORITY[step.get('interface')]) +def _verify_step_key(step): + """Sort by priority, then interface priority in event of tie. + + :param step: verify step dict to get priority for. + """ + return (step.get('priority'), + VERIFYING_INTERFACE_PRIORITY[step.get('interface')]) + + def _sorted_steps(steps, sort_step_key): """Return a sorted list of steps. @@ -194,6 +222,37 @@ def _get_deployment_steps(task, enabled=False, sort=True): enabled=enabled, sort_step_key=sort_key) +def _get_verify_steps(task, enabled=False, sort=True): + """Get verify steps for task.node. + + :param task: A TaskManager object + :param enabled: If True, returns only enabled (priority > 0) steps. If + False, returns all verify steps. + :param sort: If True, the steps are sorted from highest priority to lowest + priority. For steps having the same priority, they are sorted from + highest interface priority to lowest. + :raises: NodeVerifyFailure if there was a problem getting the + verify steps. + :returns: A list of verify step dictionaries + """ + sort_key = _verify_step_key if sort else None + if CONF.conductor.verify_step_priority_override: + vsp_override = {} + for element in CONF.conductor.verify_step_priority_override: + vsp_override.update(element) + + verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY, + 'get_verify_steps', enabled=enabled, + sort_step_key=sort_key, + prio_overrides=vsp_override) + + else: + verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY, + 'get_verify_steps', enabled=enabled, + sort_step_key=sort_key) + return verify_steps + + def set_node_cleaning_steps(task, disable_ramdisk=False): """Set up the node with clean step information for cleaning. diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 59b60a5da9..fa63ba094d 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -631,6 +631,32 @@ def fail_on_error(error_callback, msg, *error_args, **error_kwargs): return wrapper +def verifying_error_handler(task, logmsg, errmsg=None, traceback=False): + + """Handle errors during verification steps + + :param task: the task + :param logmsg: message to be logged + :param errmsg: message for the user + :param traceback: Boolean; True to log a traceback + """ + errmsg = errmsg or logmsg + node = task.node + LOG.error(logmsg, exc_info=traceback) + node_history_record(node, event=errmsg, event_type=states.VERIFYING, + error=True) + node.save() + + node.refresh() + if node.provision_state in ( + states.VERIFYING): + # Clear verifying step; we leave the list of verify steps + # in node.driver_internal_info for debugging purposes. + node.verify_step = {} + + node.save() + + @task_manager.require_exclusive_lock def abort_on_conductor_take_over(task): """Set node's state when a task was aborted due to conductor take over. diff --git a/ironic/conductor/verify.py b/ironic/conductor/verify.py index b611e7e4e8..ee1b387847 100644 --- a/ironic/conductor/verify.py +++ b/ironic/conductor/verify.py @@ -18,6 +18,7 @@ from ironic.common import exception from ironic.common.i18n import _ from ironic.common import states from ironic.conductor import notification_utils as notify_utils +from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager from ironic.conductor import utils @@ -49,6 +50,37 @@ def do_node_verify(task): {'node': node.uuid, 'msg': e}) log_traceback = not isinstance(e, exception.IronicException) LOG.error(error, exc_info=log_traceback) + + verify_steps = conductor_steps._get_verify_steps(task, + enabled=True, + sort=True) + for step in verify_steps: + interface = getattr(task.driver, step.get('interface')) + LOG.info('Executing %(step)s on node %(node)s', + {'step': step, 'node': node.uuid}) + try: + result = interface.execute_verify_step(task, step) + except Exception as e: + error = ('Node %(node)s failed verify step %(step)s ' + 'with unexpected error: %(err)s' % + {'node': node.uuid, 'step': node.verify_step, + 'err': e}) + utils.verifying_error_handler( + task, error, + _("Failed to verify. Exception: %s") % e, + traceback=True) + if result is not None: + # NOTE(rloo): This is an internal/dev error; shouldn't + # happen. + error = (_('While executing verify step %(step)s on ' + 'node %(node)s, step returned unexpected ' + 'state: %(val)s') + % {'step': step, 'node': node.uuid, + 'val': result}) + utils.verifying_error_handler( + task, error, + _("Failed to verify: %s") % node.deploy_step) + if error is None: # NOTE(janders) this can eventually move to driver-specific # verify steps, will leave this for a follow-up change. @@ -67,6 +99,10 @@ def do_node_verify(task): else: task.process_event('done') + + LOG.info('Successfully verified node %(node)s', + {'node': node.uuid}) + else: utils.node_history_record(task.node, event=error, event_type=states.VERIFY, diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index 61d6b32478..a2d168fd0f 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -334,6 +334,16 @@ opts = [ 'node_history_max_entries setting as users of ' 'this setting are anticipated to need to retain ' 'history by policy.')), + cfg.MultiOpt('verify_step_priority_override', + item_type=types.Dict(), + default={}, + mutable=True, + help=_('Priority to run automated verify steps ' + 'provided in interface.step_name:priority format,' + 'e.g. management.clear_job_queue:123. The option can ' + 'be specified multiple times to define priorities ' + 'for multiple steps. If set to 0, this specific step ' + 'will not run during cleaning. ')), ] diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py index 2b1700e382..bdd017b91b 100644 --- a/ironic/drivers/base.py +++ b/ironic/drivers/base.py @@ -226,8 +226,9 @@ class BaseInterface(object, metaclass=abc.ABCMeta): """ def __new__(cls, *args, **kwargs): - # Get the list of clean steps and deploy steps, when the interface is - # initialized by the conductor. We use __new__ instead of __init___ + # Get the list of clean steps, deploy steps and verify steps when the + # interface is initialized by the conductor. + # We use __new__ instead of __init___ # to avoid breaking backwards compatibility with all the drivers. # We want to return all steps, regardless of priority. @@ -238,6 +239,7 @@ class BaseInterface(object, metaclass=abc.ABCMeta): instance = super_new(cls, *args, **kwargs) instance.clean_steps = [] instance.deploy_steps = [] + instance.verify_steps = [] for n, method in inspect.getmembers(instance, inspect.ismethod): if getattr(method, '_is_clean_step', False): # Create a CleanStep to represent this method @@ -256,6 +258,13 @@ class BaseInterface(object, metaclass=abc.ABCMeta): 'argsinfo': method._deploy_step_argsinfo, 'interface': instance.interface_type} instance.deploy_steps.append(step) + if getattr(method, '_is_verify_step', False): + # Create a VerifyStep to represent this method + step = {'step': method.__name__, + 'priority': method._verify_step_priority, + 'interface': instance.interface_type} + instance.verify_steps.append(step) + if instance.clean_steps: LOG.debug('Found clean steps %(steps)s for interface ' '%(interface)s', @@ -266,6 +275,12 @@ class BaseInterface(object, metaclass=abc.ABCMeta): '%(interface)s', {'steps': instance.deploy_steps, 'interface': instance.interface_type}) + if instance.verify_steps: + LOG.debug('Found verify steps %(steps)s for interface ' + '%(interface)s', + {'steps': instance.deploy_steps, + 'interface': instance.interface_type}) + return instance def _execute_step(self, task, step): @@ -360,6 +375,35 @@ class BaseInterface(object, metaclass=abc.ABCMeta): """ return self._execute_step(task, step) + def get_verify_steps(self, task): + """Get a list of (enabled and disabled) verify steps for the interface. + + This function will return all verify steps (both enabled and disabled) + for the interface, in an unordered list. + + :param task: A TaskManager object, useful for interfaces overriding + this function + :raises NodeVerifyFailure: if there is a problem getting the steps + from the driver. For example, when a node (using an agent driver) + has just been enrolled and the agent isn't alive yet to be queried + for the available verify steps. + :returns: A list of deploy step dictionaries + """ + return self.verify_steps + + def execute_verify_step(self, task, step): + """Execute the verify step on task.node. + + A verify step must take a single positional argument: a TaskManager + object. It does not take keyword variable arguments. + + :param task: A TaskManager object + :param step: The deploy step dictionary representing the step to + execute + :returns: None if this method has completed synchronously + """ + return self._execute_step(task, step) + class DeployInterface(BaseInterface): """Interface for deploy-related actions.""" @@ -1868,3 +1912,44 @@ def deploy_step(priority, argsinfo=None): func._deploy_step_argsinfo = argsinfo return func return decorator + + +def verify_step(priority): + """Decorator for verify steps. + + Only steps with priorities greater than 0 are used. These steps are + ordered by priority from highest value to lowest value. + For steps with the same priority, they are ordered by driver + interface priority (see conductor.steps.VERIFY_INTERFACE_PRIORITY). + execute_verify_step() will be called on each step. + + Decorated verify steps must take as the only positional argument, a + TaskManager object. + + Verify steps are synchronous and should return `None` when finished, + and the conductor will continue on to the next step. While the verify step + is executing, the node will be in `states.VERIFYING` provision state. + + Examples:: + + class MyInterface(base.BaseInterface): + @base.verify_step(priority=100) + def example_verifying(self, task): + # do some verifying + + :param priority: an integer (>=0) priority; used for determining the order + in which the step is run in the verification process. + + :raises InvalidParameterValue: if any of the arguments are invalid + """ + def decorator(func): + func._is_verify_step = True + if isinstance(priority, int) and priority >= 0: + func._verify_step_priority = priority + else: + raise exception.InvalidParameterValue( + _('"priority" must be an integer value >= 0, instead of "%s"') + % priority) + + return func + return decorator diff --git a/ironic/tests/unit/conductor/test_steps.py b/ironic/tests/unit/conductor/test_steps.py index 1cba2c7c05..d96670303d 100644 --- a/ironic/tests/unit/conductor/test_steps.py +++ b/ironic/tests/unit/conductor/test_steps.py @@ -19,6 +19,7 @@ from ironic.common import exception from ironic.common import states from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager +from ironic.conductor import verify as verify_steps from ironic import objects from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -1138,3 +1139,163 @@ class ValidateUserDeployStepsTestCase(db_base.DbTestCase): result = conductor_steps._get_validated_user_deploy_steps(task) self.assertEqual([], result) mock_validated.assert_not_called() + + +class NodeVerifyStepsTestCase(db_base.DbTestCase): + def setUp(self): + super(NodeVerifyStepsTestCase, self).setUp() + + self.management_reset_idrac = { + 'step': 'reset_idrac', 'priority': 0, + 'interface': 'management'} + self.management_clear_job_queue = { + 'step': 'clear_job_queue', 'priority': 10, + 'interface': 'management'} + self.verify_steps = [self.management_clear_job_queue, + self.management_reset_idrac] + self.verify_steps_only_enabled = [self.management_clear_job_queue] + + @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps', + autospec=True) + def test__get_verify_steps_priority_override_ok(self, + mock_verify_steps): + # Test verify_step_priority_override + cfg.CONF.set_override('verify_step_priority_override', + ["management.clear_job_queue:9"], + 'conductor') + + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.ENROLL, + target_provision_state=states.MANAGEABLE) + + mock_verify_steps.return_value = [self.management_clear_job_queue] + + with task_manager.acquire( + self.context, node.uuid, shared=True) as task: + steps = conductor_steps._get_verify_steps(task, enabled=True) + + expected_step_priorities = [{'interface': 'management', 'priority': 9, + 'step': 'clear_job_queue'}] + + self.assertEqual(expected_step_priorities, steps) + + @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps', + autospec=True) + def test__get_verify_steps_priority_override_fail(self, + mock_verify_steps): + # Test verify_step_priority_override for failure + cfg.CONF.set_override('verify_step_priority_override', + ["management.clear_job_queue:9"], + 'conductor') + + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.ENROLL, + target_provision_state=states.MANAGEABLE) + + mock_verify_steps.return_value = [self.management_clear_job_queue] + + with task_manager.acquire( + self.context, node.uuid, shared=True) as task: + steps = conductor_steps._get_verify_steps(task, enabled=True) + + expected_step_priorities = [{'interface': 'management', 'priority': 10, + 'step': 'clear_job_queue'}] + + self.assertNotEqual(expected_step_priorities, steps) + + @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps', + autospec=True) + def test__get_verify_steps_priority_override_off(self, + mock_verify_steps): + # Test disabling steps with verify_step_priority_override + cfg.CONF.set_override('verify_step_priority_override', + ["management.clear_job_queue:0"], + 'conductor') + + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.ENROLL, + target_provision_state=states.MANAGEABLE) + + mock_verify_steps.return_value = [self.management_clear_job_queue] + + with task_manager.acquire( + self.context, node.uuid, shared=True) as task: + steps = conductor_steps._get_verify_steps(task, enabled=True) + + expected_step_priorities = [] + + self.assertEqual(expected_step_priorities, steps) + + @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps', + autospec=True) + def test__get_verify_steps(self, mock_verify_steps): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.ENROLL, + target_provision_state=states.MANAGEABLE) + + mock_verify_steps.return_value = [self.management_clear_job_queue, + self.management_reset_idrac] + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + steps = conductor_steps._get_verify_steps(task, enabled=False) + + self.assertEqual(self.verify_steps, steps) + + @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps', + autospec=True) + def test__get_verify_steps_unsorted(self, mock_verify_steps): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.ENROLL, + target_provision_state=states.MANAGEABLE) + + mock_verify_steps.return_value = [self.management_clear_job_queue, + self.management_reset_idrac] + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + steps = conductor_steps._get_verify_steps(task, enabled=False, + sort=False) + + self.assertEqual(self.verify_steps, steps) + + @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps', + autospec=True) + def test__get_verify_steps_only_enabled(self, mock_verify_steps): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.ENROLL, + target_provision_state=states.MANAGEABLE) + + mock_verify_steps.return_value = [self.management_clear_job_queue] + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + steps = conductor_steps._get_verify_steps(task, enabled=True) + + self.assertEqual(self.verify_steps_only_enabled, steps) + + @mock.patch('ironic.drivers.modules.fake.FakeManagement.' + 'execute_verify_step', + autospec=True) + @mock.patch.object(conductor_steps, '_get_verify_steps', autospec=True) + def test_execute_verify_step(self, mock_steps, mock_execute): + mock_steps.return_value = self.verify_steps + + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.VERIFYING, + target_provision_state=states.MANAGEABLE, + last_error=None, + clean_step=None) + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + verify_steps.do_node_verify(task) + node.refresh() + self.assertTrue(mock_execute.called) diff --git a/releasenotes/notes/add-verify-steps-support-2b34a74e86f89cb4.yaml b/releasenotes/notes/add-verify-steps-support-2b34a74e86f89cb4.yaml new file mode 100644 index 0000000000..8f4711db96 --- /dev/null +++ b/releasenotes/notes/add-verify-steps-support-2b34a74e86f89cb4.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Adds support for verify steps - a mechanism for running optional, + actions pre-defined in the driver while the node is in transition from + enroll to managable state, prior to inspection.