Fix two severe errors in the firmware caching code
First, it tries to create components even if the current version is not
known and fails with a database constraint error (because the initial
version cannot be NULL). Can be reproduced with sushy-tools before
37f118237a
Second, unexpected exceptions are not handled in the caching code, so
any of them will cause the node to get stuck in cleaning forever.
On top of that, the caching code is missing a metrics decorator.
This change does not update any unit tests because none currently exist.
Change-Id: Iaa242ca6aa6138fcdaaf63b763708e2f1e559cb0
This commit is contained in:
parent
dcea5f5a1d
commit
23745d97fe
@ -1449,8 +1449,8 @@ def node_cache_bios_settings(task, node):
|
|||||||
except exception.UnsupportedDriverExtension:
|
except exception.UnsupportedDriverExtension:
|
||||||
LOG.warning('BIOS settings are not supported for node %s, '
|
LOG.warning('BIOS settings are not supported for node %s, '
|
||||||
'skipping', node.uuid)
|
'skipping', node.uuid)
|
||||||
# TODO(zshi) remove this check when classic drivers are removed
|
|
||||||
except Exception:
|
except Exception:
|
||||||
|
# NOTE(dtantsur): the caller expects this function to never fail
|
||||||
msg = (_('Caching of bios settings failed on node %(node)s.')
|
msg = (_('Caching of bios settings failed on node %(node)s.')
|
||||||
% {'node': node.uuid})
|
% {'node': node.uuid})
|
||||||
LOG.exception(msg)
|
LOG.exception(msg)
|
||||||
@ -1474,6 +1474,7 @@ def node_cache_vendor(task):
|
|||||||
except exception.UnsupportedDriverExtension:
|
except exception.UnsupportedDriverExtension:
|
||||||
return
|
return
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
# NOTE(dtantsur): the caller expects this function to never fail
|
||||||
LOG.warning('Unexpected exception when trying to detect vendor '
|
LOG.warning('Unexpected exception when trying to detect vendor '
|
||||||
'for node %(node)s. %(class)s: %(exc)s',
|
'for node %(node)s. %(class)s: %(exc)s',
|
||||||
{'node': task.node.uuid,
|
{'node': task.node.uuid,
|
||||||
@ -1840,6 +1841,10 @@ def node_cache_firmware_components(task):
|
|||||||
except exception.UnsupportedDriverExtension:
|
except exception.UnsupportedDriverExtension:
|
||||||
LOG.warning('Firmware Components are not supported for node %s, '
|
LOG.warning('Firmware Components are not supported for node %s, '
|
||||||
'skipping', task.node.uuid)
|
'skipping', task.node.uuid)
|
||||||
|
except Exception:
|
||||||
|
# NOTE(dtantsur): the caller expects this function to never fail
|
||||||
|
LOG.exception('Caching of firmware components failed on node %s',
|
||||||
|
task.node.uuid)
|
||||||
|
|
||||||
|
|
||||||
def run_node_action(task, call, error_msg, success_msg=None, **kwargs):
|
def run_node_action(task, call, error_msg, success_msg=None, **kwargs):
|
||||||
|
@ -71,6 +71,7 @@ class RedfishFirmware(base.FirmwareInterface):
|
|||||||
"""
|
"""
|
||||||
redfish_utils.parse_driver_info(task.node)
|
redfish_utils.parse_driver_info(task.node)
|
||||||
|
|
||||||
|
@METRICS.timer('RedfishFirmware.cache_firmware_components')
|
||||||
def cache_firmware_components(self, task):
|
def cache_firmware_components(self, task):
|
||||||
"""Store or update Firmware Components on the given node.
|
"""Store or update Firmware Components on the given node.
|
||||||
|
|
||||||
@ -90,6 +91,7 @@ class RedfishFirmware(base.FirmwareInterface):
|
|||||||
|
|
||||||
system = redfish_utils.get_system(task.node)
|
system = redfish_utils.get_system(task.node)
|
||||||
|
|
||||||
|
if system.bios_version:
|
||||||
bios_fw = {'component': 'bios',
|
bios_fw = {'component': 'bios',
|
||||||
'current_version': system.bios_version}
|
'current_version': system.bios_version}
|
||||||
settings.append(bios_fw)
|
settings.append(bios_fw)
|
||||||
@ -99,6 +101,7 @@ class RedfishFirmware(base.FirmwareInterface):
|
|||||||
# access the Manager.
|
# access the Manager.
|
||||||
try:
|
try:
|
||||||
manager = redfish_utils.get_manager(task.node, system)
|
manager = redfish_utils.get_manager(task.node, system)
|
||||||
|
if manager.firmware_version:
|
||||||
bmc_fw = {'component': 'bmc',
|
bmc_fw = {'component': 'bmc',
|
||||||
'current_version': manager.firmware_version}
|
'current_version': manager.firmware_version}
|
||||||
settings.append(bmc_fw)
|
settings.append(bmc_fw)
|
||||||
@ -107,8 +110,8 @@ class RedfishFirmware(base.FirmwareInterface):
|
|||||||
'from the bmc of node %s', task.node.uuid)
|
'from the bmc of node %s', task.node.uuid)
|
||||||
|
|
||||||
if not settings:
|
if not settings:
|
||||||
error_msg = (_('Cannot retrieve firmware for node %s.')
|
error_msg = (_('Cannot retrieve firmware for node %s: no '
|
||||||
% task.node.uuid)
|
'supported components') % task.node.uuid)
|
||||||
LOG.error(error_msg)
|
LOG.error(error_msg)
|
||||||
raise exception.UnsupportedDriverExtension(error_msg)
|
raise exception.UnsupportedDriverExtension(error_msg)
|
||||||
|
|
||||||
|
8
releasenotes/notes/firmware-fail-c6f6c70220373033.yaml
Normal file
8
releasenotes/notes/firmware-fail-c6f6c70220373033.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Nodes no longer get stuck in cleaning when the firmware components caching
|
||||||
|
code raises an unexpected exception.
|
||||||
|
- |
|
||||||
|
Prevents a database constraints error on caching firmware components
|
||||||
|
when a supported component does not have the current version.
|
Loading…
x
Reference in New Issue
Block a user