diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index 8e2e3eaf00..cf2fbf687e 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -413,6 +413,33 @@ # (string value) #agent_api_version = v1 +# Whether Ironic should collect the deployment logs on +# deployment failure (on_failure), always or never. (string +# value) +# Allowed values: always, on_failure, never +#deploy_logs_collect = on_failure + +# The name of the storage backend where the logs will be +# stored. (string value) +# Allowed values: local, swift +#deploy_logs_storage_backend = local + +# The path to the directory where the logs should be stored, +# used when the deploy_logs_storage_backend is configured to +# "local". (string value) +#deploy_logs_local_path = /var/log/ironic/deploy + +# The name of the Swift container to store the logs, used when +# the deploy_logs_storage_backend is configured to "swift". +# (string value) +#deploy_logs_swift_container = ironic_deploy_logs_container + +# Number of days before a log object is marked as expired in +# Swift. If None, the logs will be kept forever or until +# manually deleted. Used when the deploy_logs_storage_backend +# is configured to "swift". (integer value) +#deploy_logs_swift_days_to_expire = 30 + [amt] @@ -1934,10 +1961,6 @@ # value) #max_polling_attempts = 12 -# Period (in seconds) for periodic tasks to be executed. -# (integer value) -#periodic_check_interval = 300 - # Whether to enable the periodic tasks for OneView driver be # aware when OneView hardware resources are taken and released # by Ironic or OneView users and proactively manage nodes in @@ -1945,6 +1968,10 @@ # hardware resources allocation in OneView. (boolean value) #enable_periodic_tasks = true +# Period (in seconds) for periodic tasks to be executed when +# enable_periodic_tasks=True. (integer value) +#periodic_check_interval = 300 + [oslo_concurrency] diff --git a/ironic/conf/agent.py b/ironic/conf/agent.py index 899e8afb8a..81ced891e8 100644 --- a/ironic/conf/agent.py +++ b/ironic/conf/agent.py @@ -56,7 +56,34 @@ opts = [ cfg.StrOpt('agent_api_version', default='v1', help=_('API version to use for communicating with the ramdisk ' - 'agent.')) + 'agent.')), + cfg.StrOpt('deploy_logs_collect', + choices=['always', 'on_failure', 'never'], + default='on_failure', + help=_('Whether Ironic should collect the deployment logs on ' + 'deployment failure (on_failure), always or never.')), + cfg.StrOpt('deploy_logs_storage_backend', + choices=['local', 'swift'], + default='local', + help=_('The name of the storage backend where the logs ' + 'will be stored.')), + cfg.StrOpt('deploy_logs_local_path', + default='/var/log/ironic/deploy', + help=_('The path to the directory where the logs should be ' + 'stored, used when the deploy_logs_storage_backend ' + 'is configured to "local".')), + cfg.StrOpt('deploy_logs_swift_container', + default='ironic_deploy_logs_container', + help=_('The name of the Swift container to store the logs, ' + 'used when the deploy_logs_storage_backend is ' + 'configured to "swift".')), + cfg.IntOpt('deploy_logs_swift_days_to_expire', + default=30, + help=_('Number of days before a log object is marked as ' + 'expired in Swift. If None, the logs will be kept ' + 'forever or until manually deleted. Used when the ' + 'deploy_logs_storage_backend is configured to ' + '"swift".')), ] diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index d7929596cf..f09228afa5 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -42,6 +42,7 @@ from ironic.conf import CONF from ironic.drivers import base from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils +from ironic.drivers import utils as driver_utils from ironic import objects LOG = log.getLogger(__name__) @@ -534,6 +535,10 @@ class AgentDeployMixin(object): return task.driver.power.get_power_state(task) node = task.node + + if CONF.agent.deploy_logs_collect == 'always': + driver_utils.collect_ramdisk_logs(node) + # Whether ironic should power off the node via out-of-band or # in-band methods oob_power_off = strutils.bool_from_string( diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 06ffe6569f..6f3b4fba7f 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -229,3 +229,10 @@ class AgentClient(object): method='standby.sync', params={}, wait=True) + + def collect_system_logs(self, node): + """Collect and package diagnostic and support data from the ramdisk.""" + return self._command(node=node, + method='log.collect_system_logs', + params={}, + wait=True) diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 61da5b12af..9b5dc0c716 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -474,6 +474,10 @@ def set_failed_state(task, msg): :param msg: the message to set in last_error of the node. """ node = task.node + + if CONF.agent.deploy_logs_collect in ('on_failure', 'always'): + driver_utils.collect_ramdisk_logs(node) + try: task.process_event('fail') except exception.InvalidState: diff --git a/ironic/drivers/utils.py b/ironic/drivers/utils.py index c9a447b277..60fe13fc15 100644 --- a/ironic/drivers/utils.py +++ b/ironic/drivers/utils.py @@ -12,18 +12,29 @@ # License for the specific language governing permissions and limitations # under the License. +import base64 +import os +import tempfile + +from oslo_config import cfg from oslo_log import log as logging +from oslo_utils import timeutils import six from ironic.common import exception from ironic.common.i18n import _ +from ironic.common.i18n import _LE from ironic.common.i18n import _LW +from ironic.common import swift from ironic.conductor import utils from ironic.drivers import base +from ironic.drivers.modules import agent_client LOG = logging.getLogger(__name__) +CONF = cfg.CONF + class MixinVendorInterface(base.VendorInterface): """Wrapper around multiple VendorInterfaces.""" @@ -251,3 +262,101 @@ def normalize_mac(mac): :return: Normalized MAC address string. """ return mac.replace('-', '').replace(':', '').lower() + + +def get_ramdisk_logs_file_name(node): + """Construct the log file name. + + :param node: A node object. + :returns: The log file name. + """ + timestamp = timeutils.utcnow().strftime('%Y-%m-%d-%H:%M:%S') + file_name_fields = [node.uuid] + if node.instance_uuid: + file_name_fields.append(node.instance_uuid) + + file_name_fields.append(timestamp) + return '_'.join(file_name_fields) + '.tar.gz' + + +def store_ramdisk_logs(node, logs): + """Store the ramdisk logs. + + This method stores the ramdisk logs according to the configured + storage backend. + + :param node: A node object. + :param logs: A gzipped and base64 encoded string containing the + logs archive. + :raises: OSError if the directory to save the logs cannot be created. + :raises: IOError when the logs can't be saved to the local file system. + :raises: SwiftOperationError, if any operation with Swift fails. + + """ + logs_file_name = get_ramdisk_logs_file_name(node) + data = base64.b64decode(logs) + + if CONF.agent.deploy_logs_storage_backend == 'local': + if not os.path.exists(CONF.agent.deploy_logs_local_path): + os.makedirs(CONF.agent.deploy_logs_local_path) + + log_path = os.path.join(CONF.agent.deploy_logs_local_path, + logs_file_name) + with open(log_path, 'wb') as f: + f.write(data) + + elif CONF.agent.deploy_logs_storage_backend == 'swift': + with tempfile.NamedTemporaryFile(dir=CONF.tempdir) as f: + f.write(data) + f.flush() + + # convert days to seconds + timeout = CONF.agent.deploy_logs_swift_days_to_expire * 86400 + object_headers = {'X-Delete-After': timeout} + swift_api = swift.SwiftAPI() + swift_api.create_object( + CONF.agent.deploy_logs_swift_container, logs_file_name, + f.name, object_headers=object_headers) + + +def collect_ramdisk_logs(node): + """Collect and store the system logs from the IPA ramdisk. + + Collect and store the system logs from the IPA ramdisk. This method + makes a call to the IPA ramdisk to collect the logs and store it + according to the configured storage backend. + + :param node: A node object. + + """ + client = agent_client.AgentClient() + try: + result = client.collect_system_logs(node) + except exception.IronicException as e: + LOG.error(_LE('Failed to invoke collect_system_logs agent command ' + 'for node %(node)s. Error: %(error)s'), + {'node': node.uuid, 'error': e}) + return + + error = result.get('faultstring') + if error is not None: + LOG.error(_LE('Failed to collect logs from the node %(node)s ' + 'deployment. Error: %(error)s'), + {'node': node.uuid, 'error': error}) + return + + try: + store_ramdisk_logs(node, result['command_result']['system_logs']) + except exception.SwiftOperationError as e: + LOG.error(_LE('Failed to store the logs from the node %(node)s ' + 'deployment in Swift. Error: %(error)s'), + {'node': node.uuid, 'error': e}) + except EnvironmentError as e: + LOG.exception(_LE('Failed to store the logs from the node %(node)s ' + 'deployment due a file-system related error. ' + 'Error: %(error)s'), + {'node': node.uuid, 'error': e}) + except Exception as e: + LOG.exception(_LE('Unknown error when storing logs from the node ' + '%(node)s deployment. Error: %(error)s'), + {'node': node.uuid, 'error': e}) diff --git a/ironic/tests/unit/drivers/modules/oneview/test_vendor.py b/ironic/tests/unit/drivers/modules/oneview/test_vendor.py index 8c2da93535..ace029e21c 100644 --- a/ironic/tests/unit/drivers/modules/oneview/test_vendor.py +++ b/ironic/tests/unit/drivers/modules/oneview/test_vendor.py @@ -28,6 +28,7 @@ from ironic.drivers.modules import agent_client from ironic.drivers.modules.oneview import power from ironic.drivers.modules.oneview import vendor from ironic.drivers.modules import pxe +from ironic.drivers import utils as driver_utils from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -152,6 +153,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(power.OneViewPower, 'get_power_state', @@ -160,7 +162,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): spec=types.FunctionType) def test_reboot_and_finish_deploy_power_action_fails( self, power_off_mock, get_power_state_mock, - node_power_action_mock): + node_power_action_mock, collect_ramdisk_logs_mock): self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() @@ -179,6 +181,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): mock.call(task, states.POWER_OFF)]) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + collect_ramdisk_logs_mock.assert_called_once_with(task.node) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(power.OneViewPower, 'get_power_state', diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index 1ad62d52bd..8b639f3b80 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -31,6 +31,7 @@ from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import fake from ironic.drivers.modules import pxe +from ironic.drivers import utils as driver_utils from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -900,6 +901,7 @@ class TestAgentVendor(db_base.DbTestCase): self.assertEqual(states.NOSTATE, task.node.target_provision_state) self.assertFalse(uuid_mock.called) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(agent.AgentVendorInterface, '_get_uuid_from_result', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @@ -912,12 +914,10 @@ class TestAgentVendor(db_base.DbTestCase): @mock.patch('ironic.drivers.modules.agent.AgentVendorInterface' '.check_deploy_success', autospec=True) @mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True) - def test_reboot_to_instance_boot_error(self, clean_pxe_mock, - check_deploy_mock, - prepare_mock, power_off_mock, - get_power_state_mock, - node_power_action_mock, - uuid_mock): + def test_reboot_to_instance_boot_error( + self, clean_pxe_mock, check_deploy_mock, prepare_mock, + power_off_mock, get_power_state_mock, node_power_action_mock, + uuid_mock, collect_ramdisk_logs_mock): check_deploy_mock.return_value = "Error" uuid_mock.return_value = None self.node.provision_state = states.DEPLOYWAIT @@ -936,6 +936,7 @@ class TestAgentVendor(db_base.DbTestCase): check_deploy_mock.assert_called_once_with(mock.ANY, task.node) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + collect_ramdisk_logs_mock.assert_called_once_with(task.node) @mock.patch.object(agent_base_vendor.BaseAgentVendor, 'configure_local_boot', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index 7cbc02956b..2bb5efc3b0 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -32,6 +32,7 @@ from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import fake from ironic.drivers.modules import pxe +from ironic.drivers import utils as driver_utils from ironic import objects from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base @@ -582,15 +583,17 @@ class TestHeartbeat(AgentDeployMixinBaseTest): mock_touch.assert_called_once_with(mock.ANY) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(fake.FakePower, 'get_power_state', spec=types.FunctionType) @mock.patch.object(agent_client.AgentClient, 'power_off', spec=types.FunctionType) - def test_reboot_and_finish_deploy(self, power_off_mock, - get_power_state_mock, - node_power_action_mock): + def test_reboot_and_finish_deploy( + self, power_off_mock, get_power_state_mock, + node_power_action_mock, mock_collect): + cfg.CONF.set_override('deploy_logs_collect', 'always', 'agent') self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() @@ -605,7 +608,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest): task, states.POWER_ON) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) + mock_collect.assert_called_once_with(task.node) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(fake.FakePower, 'get_power_state', @@ -619,7 +624,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): def test_reboot_and_finish_deploy_soft_poweroff_doesnt_complete( self, configure_tenant_net_mock, remove_provisioning_net_mock, power_off_mock, get_power_state_mock, - node_power_action_mock): + node_power_action_mock, mock_collect): self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() @@ -637,7 +642,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest): configure_tenant_net_mock.assert_called_once_with(mock.ANY, task) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) + self.assertFalse(mock_collect.called) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(agent_client.AgentClient, 'power_off', spec=types.FunctionType) @@ -647,7 +654,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): 'configure_tenant_networks', spec_set=True, autospec=True) def test_reboot_and_finish_deploy_soft_poweroff_fails( self, configure_tenant_net_mock, remove_provisioning_net_mock, - power_off_mock, node_power_action_mock): + power_off_mock, node_power_action_mock, mock_collect): power_off_mock.side_effect = RuntimeError("boom") self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE @@ -664,7 +671,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest): configure_tenant_net_mock.assert_called_once_with(mock.ANY, task) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) + self.assertFalse(mock_collect.called) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(fake.FakePower, 'get_power_state', @@ -677,7 +686,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest): 'configure_tenant_networks', spec_set=True, autospec=True) def test_reboot_and_finish_deploy_get_power_state_fails( self, configure_tenant_net_mock, remove_provisioning_net_mock, - power_off_mock, get_power_state_mock, node_power_action_mock): + power_off_mock, get_power_state_mock, node_power_action_mock, + mock_collect): self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() @@ -695,7 +705,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest): configure_tenant_net_mock.assert_called_once_with(mock.ANY, task) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) + self.assertFalse(mock_collect.called) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(fake.FakePower, 'get_power_state', @@ -704,7 +716,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): spec=types.FunctionType) def test_reboot_and_finish_deploy_power_action_fails( self, power_off_mock, get_power_state_mock, - node_power_action_mock): + node_power_action_mock, mock_collect): self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() @@ -721,12 +733,14 @@ class TestHeartbeat(AgentDeployMixinBaseTest): mock.call(task, states.POWER_OFF)]) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + mock_collect.assert_called_once_with(task.node) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(agent_client.AgentClient, 'sync', spec=types.FunctionType) def test_reboot_and_finish_deploy_power_action_oob_power_off( - self, sync_mock, node_power_action_mock): + self, sync_mock, node_power_action_mock, mock_collect): # Enable force power off driver_info = self.node.driver_info driver_info['deploy_forces_oob_reboot'] = True @@ -746,13 +760,15 @@ class TestHeartbeat(AgentDeployMixinBaseTest): ]) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) + self.assertFalse(mock_collect.called) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(agent_base_vendor.LOG, 'warning', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(agent_client.AgentClient, 'sync', spec=types.FunctionType) def test_reboot_and_finish_deploy_power_action_oob_power_off_failed( - self, sync_mock, node_power_action_mock, log_mock): + self, sync_mock, node_power_action_mock, log_mock, mock_collect): # Enable force power off driver_info = self.node.driver_info driver_info['deploy_forces_oob_reboot'] = True @@ -779,6 +795,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): 'Failed to flush the file system prior to hard rebooting the ' 'node %(node)s. Error: %(error)s', {'node': task.node.uuid, 'error': log_error}) + self.assertFalse(mock_collect.called) @mock.patch.object(agent_client.AgentClient, 'install_bootloader', autospec=True) @@ -841,10 +858,12 @@ class TestHeartbeat(AgentDeployMixinBaseTest): try_set_boot_device_mock.assert_called_once_with( task, boot_devices.DISK) + @mock.patch.object(agent_client.AgentClient, 'collect_system_logs', + autospec=True) @mock.patch.object(agent_client.AgentClient, 'install_bootloader', autospec=True) def test_configure_local_boot_boot_loader_install_fail( - self, install_bootloader_mock): + self, install_bootloader_mock, collect_logs_mock): install_bootloader_mock.return_value = { 'command_status': 'FAILED', 'command_error': 'boom'} self.node.provision_state = states.DEPLOYING @@ -859,14 +878,18 @@ class TestHeartbeat(AgentDeployMixinBaseTest): install_bootloader_mock.assert_called_once_with( mock.ANY, task.node, root_uuid='some-root-uuid', efi_system_part_uuid=None) + collect_logs_mock.assert_called_once_with(mock.ANY, task.node) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + @mock.patch.object(agent_client.AgentClient, 'collect_system_logs', + autospec=True) @mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True) @mock.patch.object(agent_client.AgentClient, 'install_bootloader', autospec=True) def test_configure_local_boot_set_boot_device_fail( - self, install_bootloader_mock, try_set_boot_device_mock): + self, install_bootloader_mock, try_set_boot_device_mock, + collect_logs_mock): install_bootloader_mock.return_value = { 'command_status': 'SUCCESS', 'command_error': None} try_set_boot_device_mock.side_effect = RuntimeError('error') @@ -884,6 +907,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): efi_system_part_uuid=None) try_set_boot_device_mock.assert_called_once_with( task, boot_devices.DISK) + collect_logs_mock.assert_called_once_with(mock.ANY, task.node) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) diff --git a/ironic/tests/unit/drivers/modules/test_deploy_utils.py b/ironic/tests/unit/drivers/modules/test_deploy_utils.py index 9a26c3680e..db2f74a72a 100644 --- a/ironic/tests/unit/drivers/modules/test_deploy_utils.py +++ b/ironic/tests/unit/drivers/modules/test_deploy_utils.py @@ -39,6 +39,7 @@ from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils as utils from ironic.drivers.modules import image_cache from ironic.drivers.modules import pxe +from ironic.drivers import utils as driver_utils from ironic.tests import base as tests_base from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base @@ -1275,7 +1276,8 @@ class OtherFunctionTestCase(db_base.DbTestCase): else: self.assertFalse(mock_log.called) - def test_set_failed_state(self): + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + def test_set_failed_state(self, mock_collect): exc_state = exception.InvalidState('invalid state') exc_param = exception.InvalidParameterValue('invalid parameter') mock_call = mock.call(mock.ANY) @@ -1290,8 +1292,10 @@ class OtherFunctionTestCase(db_base.DbTestCase): self._test_set_failed_state(event_value=iter([exc_state] * len(calls)), power_value=iter([exc_param] * len(calls)), log_calls=calls) + self.assertEqual(4, mock_collect.call_count) - def test_set_failed_state_no_poweroff(self): + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + def test_set_failed_state_no_poweroff(self, mock_collect): cfg.CONF.set_override('power_off_after_deploy_failure', False, 'deploy') exc_state = exception.InvalidState('invalid state') @@ -1308,6 +1312,21 @@ class OtherFunctionTestCase(db_base.DbTestCase): self._test_set_failed_state(event_value=iter([exc_state] * len(calls)), power_value=iter([exc_param] * len(calls)), log_calls=calls, poweroff=False) + self.assertEqual(4, mock_collect.call_count) + + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + def test_set_failed_state_collect_deploy_logs(self, mock_collect): + for opt in ('always', 'on_failure'): + cfg.CONF.set_override('deploy_logs_collect', opt, 'agent') + self._test_set_failed_state() + mock_collect.assert_called_once_with(mock.ANY) + mock_collect.reset_mock() + + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + def test_set_failed_state_collect_deploy_logs_never(self, mock_collect): + cfg.CONF.set_override('deploy_logs_collect', 'never', 'agent') + self._test_set_failed_state() + self.assertFalse(mock_collect.called) def test_get_boot_option(self): self.node.instance_info = {'capabilities': '{"boot_option": "local"}'} diff --git a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py index 56ca5d46b9..4defcb9ba3 100644 --- a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py +++ b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py @@ -37,6 +37,7 @@ from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import iscsi_deploy from ironic.drivers.modules import pxe +from ironic.drivers import utils as driver_utils from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -157,12 +158,14 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase): mock_unlink.assert_called_once_with('/path/uuid/disk') mock_rmtree.assert_called_once_with('/path/uuid') + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True) @mock.patch.object(iscsi_deploy, 'InstanceImageCache', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(deploy_utils, 'deploy_partition_image', autospec=True) - def test_continue_deploy_fail(self, deploy_mock, power_mock, - mock_image_cache, mock_disk_layout): + def test_continue_deploy_fail( + self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout, + mock_collect_logs): kwargs = {'address': '123456', 'iqn': 'aaa-bbb'} deploy_mock.side_effect = exception.InstanceDeployFailure( "test deploy error") @@ -184,13 +187,16 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase): mock_image_cache.assert_called_once_with() mock_image_cache.return_value.clean_up.assert_called_once_with() self.assertFalse(mock_disk_layout.called) + mock_collect_logs.assert_called_once_with(task.node) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True) @mock.patch.object(iscsi_deploy, 'InstanceImageCache', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(deploy_utils, 'deploy_partition_image', autospec=True) def test_continue_deploy_fail_no_root_uuid_or_disk_id( - self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout): + self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout, + mock_collect_logs): kwargs = {'address': '123456', 'iqn': 'aaa-bbb'} deploy_mock.return_value = {} self.node.provision_state = states.DEPLOYWAIT @@ -211,13 +217,16 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase): mock_image_cache.assert_called_once_with() mock_image_cache.return_value.clean_up.assert_called_once_with() self.assertFalse(mock_disk_layout.called) + mock_collect_logs.assert_called_once_with(task.node) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True) @mock.patch.object(iscsi_deploy, 'InstanceImageCache', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @mock.patch.object(deploy_utils, 'deploy_partition_image', autospec=True) def test_continue_deploy_fail_empty_root_uuid( - self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout): + self, deploy_mock, power_mock, mock_image_cache, + mock_disk_layout, mock_collect_logs): kwargs = {'address': '123456', 'iqn': 'aaa-bbb'} deploy_mock.return_value = {'root uuid': ''} self.node.provision_state = states.DEPLOYWAIT @@ -238,6 +247,7 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase): mock_image_cache.assert_called_once_with() mock_image_cache.return_value.clean_up.assert_called_once_with() self.assertFalse(mock_disk_layout.called) + mock_collect_logs.assert_called_once_with(task.node) @mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True) @mock.patch.object(iscsi_deploy, 'LOG', autospec=True) @@ -424,7 +434,9 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase): agent_client_mock.start_iscsi_target.assert_called_once_with( task.node, expected_iqn, 3260, wipe_disk_metadata=False) - def test_do_agent_iscsi_deploy_start_iscsi_failure(self): + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + def test_do_agent_iscsi_deploy_start_iscsi_failure( + self, mock_collect_logs): agent_client_mock = mock.MagicMock(spec_set=agent_client.AgentClient) agent_client_mock.start_iscsi_target.return_value = { 'command_status': 'FAILED', 'command_error': 'booom'} @@ -444,6 +456,7 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase): self.assertEqual(states.DEPLOYFAIL, self.node.provision_state) self.assertEqual(states.ACTIVE, self.node.target_provision_state) self.assertIsNotNone(self.node.last_error) + mock_collect_logs.assert_called_once_with(task.node) @mock.patch('ironic.drivers.modules.deploy_utils.get_ironic_api_url') def test_validate_good_api_url(self, mock_get_url): diff --git a/ironic/tests/unit/drivers/test_utils.py b/ironic/tests/unit/drivers/test_utils.py index 9f43efe603..7cb5619ab2 100644 --- a/ironic/tests/unit/drivers/test_utils.py +++ b/ironic/tests/unit/drivers/test_utils.py @@ -13,14 +13,22 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime +import os + import mock +from oslo_config import cfg +from oslo_utils import timeutils from ironic.common import driver_factory from ironic.common import exception +from ironic.common import swift from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils +from ironic.drivers.modules import agent_client from ironic.drivers.modules import fake from ironic.drivers import utils as driver_utils +from ironic.tests import base as tests_base from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.objects import utils as obj_utils @@ -237,3 +245,136 @@ class UtilsTestCase(db_base.DbTestCase): mac_raw = u"0A:1B-2C-3D:4F" mac_clean = driver_utils.normalize_mac(mac_raw) self.assertEqual("0a1b2c3d4f", mac_clean) + + +class UtilsRamdiskLogsTestCase(tests_base.TestCase): + + def setUp(self): + super(UtilsRamdiskLogsTestCase, self).setUp() + self.node = obj_utils.get_test_node(self.context) + + @mock.patch.object(timeutils, 'utcnow', autospec=True) + def test_get_ramdisk_logs_file_name(self, mock_utcnow): + mock_utcnow.return_value = datetime.datetime(2000, 1, 1, 0, 0) + name = driver_utils.get_ramdisk_logs_file_name(self.node) + expected_name = ('1be26c0b-03f2-4d2e-ae87-c02d7f33c123_' + '2000-01-01-00:00:00.tar.gz') + self.assertEqual(expected_name, name) + + # with instance_info + instance_uuid = '7a5641ba-d264-424a-a9d7-e2a293ca482b' + node2 = obj_utils.get_test_node( + self.context, instance_uuid=instance_uuid) + name = driver_utils.get_ramdisk_logs_file_name(node2) + expected_name = ('1be26c0b-03f2-4d2e-ae87-c02d7f33c123_' + + instance_uuid + '_2000-01-01-00:00:00.tar.gz') + self.assertEqual(expected_name, name) + + @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) + @mock.patch.object(agent_client.AgentClient, + 'collect_system_logs', autospec=True) + def test_collect_ramdisk_logs(self, mock_collect, mock_store): + logs = 'Gary the Snail' + mock_collect.return_value = {'command_result': {'system_logs': logs}} + driver_utils.collect_ramdisk_logs(self.node) + mock_store.assert_called_once_with(self.node, logs) + + @mock.patch.object(driver_utils.LOG, 'error', autospec=True) + @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) + @mock.patch.object(agent_client.AgentClient, + 'collect_system_logs', autospec=True) + def test_collect_ramdisk_logs_IPA_command_fail( + self, mock_collect, mock_store, mock_log): + error_str = 'MR. KRABS! I WANNA GO TO BED!' + mock_collect.return_value = {'faultstring': error_str} + driver_utils.collect_ramdisk_logs(self.node) + # assert store was never invoked + self.assertFalse(mock_store.called) + mock_log.assert_called_once_with( + mock.ANY, {'node': self.node.uuid, 'error': error_str}) + + @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) + @mock.patch.object(agent_client.AgentClient, + 'collect_system_logs', autospec=True) + def test_collect_ramdisk_logs_storage_command_fail( + self, mock_collect, mock_store): + mock_collect.side_effect = exception.IronicException('boom') + self.assertIsNone(driver_utils.collect_ramdisk_logs(self.node)) + self.assertFalse(mock_store.called) + + @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) + @mock.patch.object(agent_client.AgentClient, + 'collect_system_logs', autospec=True) + def _collect_ramdisk_logs_storage_fail( + self, expected_exception, mock_collect, mock_store): + mock_store.side_effect = expected_exception + logs = 'Gary the Snail' + mock_collect.return_value = {'command_result': {'system_logs': logs}} + driver_utils.collect_ramdisk_logs(self.node) + mock_store.assert_called_once_with(self.node, logs) + + @mock.patch.object(driver_utils.LOG, 'exception', autospec=True) + def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log): + error = IOError('boom') + self._collect_ramdisk_logs_storage_fail(error) + mock_log.assert_called_once_with( + mock.ANY, {'node': self.node.uuid, 'error': error}) + self.assertIn('file-system', mock_log.call_args[0][0]) + + @mock.patch.object(driver_utils.LOG, 'error', autospec=True) + def test_collect_ramdisk_logs_storage_fail_swift(self, mock_log): + error = exception.SwiftOperationError('boom') + self._collect_ramdisk_logs_storage_fail(error) + mock_log.assert_called_once_with( + mock.ANY, {'node': self.node.uuid, 'error': error}) + self.assertIn('Swift', mock_log.call_args[0][0]) + + @mock.patch.object(driver_utils.LOG, 'exception', autospec=True) + def test_collect_ramdisk_logs_storage_fail_unkown(self, mock_log): + error = Exception('boom') + self._collect_ramdisk_logs_storage_fail(error) + mock_log.assert_called_once_with( + mock.ANY, {'node': self.node.uuid, 'error': error}) + self.assertIn('Unknown error', mock_log.call_args[0][0]) + + @mock.patch.object(swift, 'SwiftAPI', autospec=True) + @mock.patch.object(driver_utils, + 'get_ramdisk_logs_file_name', autospec=True) + def test_store_ramdisk_logs_swift(self, mock_logs_name, mock_swift): + container_name = 'ironic_test_container' + file_name = 'ironic_test_file.tar.gz' + b64str = 'ZW5jb2RlZHN0cmluZw==\n' + + cfg.CONF.set_override('deploy_logs_storage_backend', 'swift', 'agent') + cfg.CONF.set_override( + 'deploy_logs_swift_container', container_name, 'agent') + cfg.CONF.set_override('deploy_logs_swift_days_to_expire', 1, 'agent') + + mock_logs_name.return_value = file_name + driver_utils.store_ramdisk_logs(self.node, b64str) + + mock_swift.return_value.create_object.assert_called_once_with( + container_name, file_name, mock.ANY, + object_headers={'X-Delete-After': 86400}) + mock_logs_name.assert_called_once_with(self.node) + + @mock.patch.object(os, 'makedirs', autospec=True) + @mock.patch.object(driver_utils, + 'get_ramdisk_logs_file_name', autospec=True) + def test_store_ramdisk_logs_local(self, mock_logs_name, mock_makedirs): + file_name = 'ironic_test_file.tar.gz' + b64str = 'ZW5jb2RlZHN0cmluZw==\n' + log_path = '/foo/bar' + + cfg.CONF.set_override('deploy_logs_local_path', log_path, 'agent') + mock_logs_name.return_value = file_name + + with mock.patch.object(driver_utils, 'open', new=mock.mock_open(), + create=True) as mock_open: + driver_utils.store_ramdisk_logs(self.node, b64str) + + expected_path = os.path.join(log_path, file_name) + mock_open.assert_called_once_with(expected_path, 'wb') + + mock_makedirs.assert_called_once_with(log_path) + mock_logs_name.assert_called_once_with(self.node) diff --git a/releasenotes/notes/collect-deployment-logs-2ec1634847c3f6a5.yaml b/releasenotes/notes/collect-deployment-logs-2ec1634847c3f6a5.yaml new file mode 100644 index 0000000000..f2795b5d64 --- /dev/null +++ b/releasenotes/notes/collect-deployment-logs-2ec1634847c3f6a5.yaml @@ -0,0 +1,12 @@ +--- +features: + - Adds support for collecting deployment logs from the IPA + ramdisk. Five new configuration options were added, [agent]/group + deploy_logs_collect, [agent]deploy_logs_storage_backend, + [agent]/deploy_logs_local_path, [agent]/deploy_logs_swift_container and + [agent]/deploy_logs_swift_days_to_expire. +upgrades: + - Collecting logs on deploy failure is enabled by default and the logs + will be saved to the local disk. Operators upgrading may want to disable + this feature, enable some form of rotation for the logs or change the + configuration to store the logs in Swift to avoid disk space problems.