From 3ddf003ad6b7eac9cbf2cc005971ee19a6395942 Mon Sep 17 00:00:00 2001 From: Vladyslav Drok Date: Thu, 17 Sep 2015 19:51:58 +0300 Subject: [PATCH] Add retries to ssh._get_hosts_name_for_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible for ssh._get_hosts_name_for_node to return None when the driver is not able to find a VM corresponding to given MAC addresses or when incorrect information about nodes is returned by SSH call to VM manager. In the latter case _get_hosts_name_for_node should be retried. This adds two new configs to the [ssh] group: - 'get_vm_name_attempts': number of attempts to try to get VM name corresponding to a node's MAC address - 'get_vm_name_retry_interval': time (secs) to wait between   attempts to get VM name Closes-bug: #1493748 Change-Id: I0457113d57f82cccc5cb353123fb087f6ba63aa2 --- etc/ironic/ironic.conf.sample | 15 +- ironic/drivers/modules/ssh.py | 148 +++++++++++------- ironic/tests/unit/drivers/modules/test_ssh.py | 49 +++--- 3 files changed, 131 insertions(+), 81 deletions(-) diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index 5f67e28737..d1fe8c8bd4 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -987,8 +987,8 @@ # Maximum retries for iBoot operations (integer value) #max_retry=3 -# Time between retry attempts for iBoot operations (integer -# value) +# Time (in seconds) between retry attempts for iBoot +# operations (integer value) #retry_interval=1 @@ -1778,9 +1778,18 @@ # Options defined in ironic.drivers.modules.ssh # -# libvirt URI (string value) +# libvirt URI. (string value) #libvirt_uri=qemu:///system +# Number of attempts to try to get VM name used by the host +# that corresponds to a node's MAC address. (integer value) +#get_vm_name_attempts=3 + +# Number of seconds to wait between attempts to get VM name +# used by the host that corresponds to a node's MAC address. +# (integer value) +#get_vm_name_retry_interval=3 + [swift] diff --git a/ironic/drivers/modules/ssh.py b/ironic/drivers/modules/ssh.py index 39770f9206..7277c8618b 100644 --- a/ironic/drivers/modules/ssh.py +++ b/ironic/drivers/modules/ssh.py @@ -33,6 +33,7 @@ from oslo_concurrency import processutils from oslo_config import cfg from oslo_log import log as logging from oslo_utils import excutils +import retrying from ironic.common import boot_devices from ironic.common import exception @@ -48,7 +49,16 @@ from ironic.drivers import utils as driver_utils libvirt_opts = [ cfg.StrOpt('libvirt_uri', default='qemu:///system', - help=_('libvirt URI')) + help=_('libvirt URI.')), + cfg.IntOpt('get_vm_name_attempts', + default=3, + help=_("Number of attempts to try to get VM name used by the " + "host that corresponds to a node's MAC address.")), + cfg.IntOpt('get_vm_name_retry_interval', + default=3, + help=_("Number of seconds to wait between attempts to get " + "VM name used by the host that corresponds to a " + "node's MAC address.")), ] CONF = cfg.CONF @@ -236,6 +246,8 @@ def _get_boot_device(ssh_obj, driver_info): :raises: SSHCommandFailed on an error from ssh. :raises: NotImplementedError if the virt_type does not support getting the boot device. + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. """ cmd_to_exec = driver_info['cmd_set'].get('get_boot_device') @@ -261,6 +273,8 @@ def _set_boot_device(ssh_obj, driver_info, device): :raises: SSHCommandFailed on an error from ssh. :raises: NotImplementedError if the virt_type does not support setting the boot device. + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. """ cmd_to_exec = driver_info['cmd_set'].get('set_boot_device') @@ -363,39 +377,33 @@ def _get_power_status(ssh_obj, driver_info): :param ssh_obj: paramiko.SSHClient, an active ssh connection. :param driver_info: information for accessing the node. :returns: one of ironic.common.states POWER_OFF, POWER_ON. - :raises: NodeNotFound + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. """ power_state = None node_name = _get_hosts_name_for_node(ssh_obj, driver_info) - if node_name: - # Get a list of vms running on the host. If the command supports - # it, explicitly specify the desired node." - cmd_to_exec = "%s %s" % (driver_info['cmd_set']['base_cmd'], - driver_info['cmd_set']['list_running']) - cmd_to_exec = cmd_to_exec.replace('{_NodeName_}', node_name) - running_list = _ssh_execute(ssh_obj, cmd_to_exec) + # Get a list of vms running on the host. If the command supports + # it, explicitly specify the desired node." + cmd_to_exec = "%s %s" % (driver_info['cmd_set']['base_cmd'], + driver_info['cmd_set']['list_running']) + cmd_to_exec = cmd_to_exec.replace('{_NodeName_}', node_name) + running_list = _ssh_execute(ssh_obj, cmd_to_exec) - # Command should return a list of running vms. If the current node is - # not listed then we can assume it is not powered on. - quoted_node_name = '"%s"' % node_name - for node in running_list: - if not node: - continue - # 'node' here is a formatted output from the virt cli's. The - # node name is either an exact match or quoted (optionally with - # other information, e.g. vbox returns '"NodeName" {}') - if (quoted_node_name in node) or (node_name == node): - power_state = states.POWER_ON - break - if not power_state: - power_state = states.POWER_OFF - else: - err_msg = _LE('Node "%(host)s" with MAC address %(mac)s not found.') - LOG.error(err_msg, {'host': driver_info['host'], - 'mac': driver_info['macs']}) - - raise exception.NodeNotFound(node=driver_info['host']) + # Command should return a list of running vms. If the current node is + # not listed then we can assume it is not powered on. + quoted_node_name = '"%s"' % node_name + for node in running_list: + if not node: + continue + # 'node' here is a formatted output from the virt cli's. The + # node name is either an exact match or quoted (optionally with + # other information, e.g. vbox returns '"NodeName" {}') + if (quoted_node_name in node) or (node_name == node): + power_state = states.POWER_ON + break + if not power_state: + power_state = states.POWER_OFF return power_state @@ -415,41 +423,56 @@ def _get_hosts_name_for_node(ssh_obj, driver_info): :param ssh_obj: paramiko.SSHClient, an active ssh connection. :param driver_info: information for accessing the node. - :returns: the name or None if not found. + :returns: the name of the node. + :raises: NodeNotFound if could not find a VM corresponding to any of + the provided MACs """ - matched_name = None - cmd_to_exec = "%s %s" % (driver_info['cmd_set']['base_cmd'], - driver_info['cmd_set']['list_all']) - full_node_list = _ssh_execute(ssh_obj, cmd_to_exec) - LOG.debug("Retrieved Node List: %s" % repr(full_node_list)) - # for each node check Mac Addresses - for node in full_node_list: - if not node: - continue - LOG.debug("Checking Node: %s's Mac address." % node) + + @retrying.retry( + retry_on_result=lambda v: v is None, + retry_on_exception=lambda _: False, # Do not retry on SSHCommandFailed + stop_max_attempt_number=CONF.ssh.get_vm_name_attempts, + wait_fixed=CONF.ssh.get_vm_name_retry_interval * 1000) + def _with_retries(): + matched_name = None cmd_to_exec = "%s %s" % (driver_info['cmd_set']['base_cmd'], - driver_info['cmd_set']['get_node_macs']) - cmd_to_exec = cmd_to_exec.replace('{_NodeName_}', node) - hosts_node_mac_list = _ssh_execute(ssh_obj, cmd_to_exec) - - for host_mac in hosts_node_mac_list: - if not host_mac: + driver_info['cmd_set']['list_all']) + full_node_list = _ssh_execute(ssh_obj, cmd_to_exec) + LOG.debug("Retrieved Node List: %s" % repr(full_node_list)) + # for each node check Mac Addresses + for node in full_node_list: + if not node: continue - for node_mac in driver_info['macs']: - if not node_mac: - continue - if _normalize_mac(host_mac) in _normalize_mac(node_mac): - LOG.debug("Found Mac address: %s" % node_mac) - matched_name = node - break + LOG.debug("Checking Node: %s's Mac address." % node) + cmd_to_exec = "%s %s" % (driver_info['cmd_set']['base_cmd'], + driver_info['cmd_set']['get_node_macs']) + cmd_to_exec = cmd_to_exec.replace('{_NodeName_}', node) + hosts_node_mac_list = _ssh_execute(ssh_obj, cmd_to_exec) + for host_mac in hosts_node_mac_list: + if not host_mac: + continue + for node_mac in driver_info['macs']: + if _normalize_mac(host_mac) in _normalize_mac(node_mac): + LOG.debug("Found Mac address: %s" % node_mac) + matched_name = node + break + + if matched_name: + break if matched_name: break - if matched_name: - break - return matched_name + return matched_name + + try: + return _with_retries() + except retrying.RetryError: + raise exception.NodeNotFound( + _("SSH driver was not able to find a VM with any of the " + "specified MACs: %(macs)s for node %(node)s.") % + {'macs': driver_info['macs'], 'node': driver_info['uuid']}) def _power_on(ssh_obj, driver_info): @@ -549,7 +572,8 @@ class SSHPower(base.PowerInterface): :raises: InvalidParameterValue if any connection parameters are incorrect. :raises: MissingParameterValue when a required parameter is missing - :raises: NodeNotFound. + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. :raises: SSHCommandFailed on an error from ssh. :raises: SSHConnectFailed if ssh failed to connect to the node. """ @@ -570,7 +594,8 @@ class SSHPower(base.PowerInterface): :raises: InvalidParameterValue if any connection parameters are incorrect, or if the desired power state is invalid. :raises: MissingParameterValue when a required parameter is missing - :raises: NodeNotFound. + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. :raises: PowerStateFailure if it failed to set power state to pstate. :raises: SSHCommandFailed on an error from ssh. :raises: SSHConnectFailed if ssh failed to connect to the node. @@ -601,7 +626,8 @@ class SSHPower(base.PowerInterface): :raises: InvalidParameterValue if any connection parameters are incorrect. :raises: MissingParameterValue when a required parameter is missing - :raises: NodeNotFound. + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. :raises: PowerStateFailure if it failed to set power state to POWER_ON. :raises: SSHCommandFailed on an error from ssh. :raises: SSHConnectFailed if ssh failed to connect to the node. @@ -664,6 +690,8 @@ class SSHManagement(base.ManagementInterface): :raises: SSHCommandFailed on an error from ssh. :raises: NotImplementedError if the virt_type does not support setting the boot device. + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. """ node = task.node @@ -697,6 +725,8 @@ class SSHManagement(base.ManagementInterface): :raises: MissingParameterValue if a required parameter is missing :raises: SSHConnectFailed if ssh failed to connect to the node. :raises: SSHCommandFailed on an error from ssh. + :raises: NodeNotFound if could not find a VM corresponding to any + of the provided MACs. :returns: a dictionary containing: :boot_device: the boot device, one of diff --git a/ironic/tests/unit/drivers/modules/test_ssh.py b/ironic/tests/unit/drivers/modules/test_ssh.py index a7c25667c2..4cb501ae83 100644 --- a/ironic/tests/unit/drivers/modules/test_ssh.py +++ b/ironic/tests/unit/drivers/modules/test_ssh.py @@ -287,22 +287,6 @@ class SSHPrivateMethodsTestCase(db_base.DbTestCase): exec_ssh_mock.assert_called_once_with(self.sshclient, ssh_cmd) get_hosts_name_mock.assert_called_once_with(self.sshclient, info) - @mock.patch.object(processutils, 'ssh_execute', autospec=True) - @mock.patch.object(ssh, '_get_hosts_name_for_node', autospec=True) - def test__get_power_status_error(self, get_hosts_name_mock, exec_ssh_mock): - - info = ssh._parse_driver_info(self.node) - - info['macs'] = ["11:11:11:11:11:11", "52:54:00:cf:2d:31"] - get_hosts_name_mock.return_value = None - self.assertRaises(exception.NodeNotFound, - ssh._get_power_status, - self.sshclient, - info) - - get_hosts_name_mock.assert_called_once_with(self.sshclient, info) - self.assertFalse(exec_ssh_mock.called) - @mock.patch.object(processutils, 'ssh_execute', autospec=True) def test__get_power_status_exception(self, exec_ssh_mock): info = ssh._parse_driver_info(self.node) @@ -352,10 +336,12 @@ class SSHPrivateMethodsTestCase(db_base.DbTestCase): @mock.patch.object(processutils, 'ssh_execute', autospec=True) def test__get_hosts_name_for_node_no_match(self, exec_ssh_mock): + self.config(group='ssh', get_vm_name_attempts=2) + self.config(group='ssh', get_vm_name_retry_interval=0) info = ssh._parse_driver_info(self.node) info['macs'] = ["11:11:11:11:11:11", "22:22:22:22:22:22"] exec_ssh_mock.side_effect = iter([('NodeName', ''), - ('52:54:00:cf:2d:31', '')]) + ('52:54:00:cf:2d:31', '')] * 2) ssh_cmd = "%s %s" % (info['cmd_set']['base_cmd'], info['cmd_set']['list_all']) @@ -365,11 +351,36 @@ class SSHPrivateMethodsTestCase(db_base.DbTestCase): cmd_to_exec = cmd_to_exec.replace('{_NodeName_}', 'NodeName') expected = [mock.call(self.sshclient, ssh_cmd), - mock.call(self.sshclient, cmd_to_exec)] + mock.call(self.sshclient, cmd_to_exec)] * 2 + + self.assertRaises(exception.NodeNotFound, + ssh._get_hosts_name_for_node, self.sshclient, info) + self.assertEqual(expected, exec_ssh_mock.call_args_list) + + @mock.patch.object(processutils, 'ssh_execute', autospec=True) + def test__get_hosts_name_for_node_match_after_retry(self, exec_ssh_mock): + self.config(group='ssh', get_vm_name_attempts=2) + self.config(group='ssh', get_vm_name_retry_interval=0) + info = ssh._parse_driver_info(self.node) + info['macs'] = ["11:11:11:11:11:11", "22:22:22:22:22:22"] + exec_ssh_mock.side_effect = iter([('NodeName', ''), + ('', ''), + ('NodeName', ''), + ('11:11:11:11:11:11', '')]) + + ssh_cmd = "%s %s" % (info['cmd_set']['base_cmd'], + info['cmd_set']['list_all']) + + cmd_to_exec = "%s %s" % (info['cmd_set']['base_cmd'], + info['cmd_set']['get_node_macs']) + + cmd_to_exec = cmd_to_exec.replace('{_NodeName_}', 'NodeName') + expected = [mock.call(self.sshclient, ssh_cmd), + mock.call(self.sshclient, cmd_to_exec)] * 2 found_name = ssh._get_hosts_name_for_node(self.sshclient, info) - self.assertIsNone(found_name) + self.assertEqual('NodeName', found_name) self.assertEqual(expected, exec_ssh_mock.call_args_list) @mock.patch.object(processutils, 'ssh_execute', autospec=True)