Merge "Config to stop powering off nodes on failure"
This commit is contained in:
commit
c51a1510b8
@ -919,6 +919,10 @@
|
||||
# Deprecated group/name - [agent]/agent_erase_devices_iterations
|
||||
#erase_devices_iterations=1
|
||||
|
||||
# Whether to power off a node after deploy failure. Defaults
|
||||
# to True. (boolean value)
|
||||
#power_off_after_deploy_failure=true
|
||||
|
||||
|
||||
[dhcp]
|
||||
|
||||
|
@ -71,6 +71,10 @@ deploy_opts = [
|
||||
deprecated_group='agent',
|
||||
default=1,
|
||||
help=_('Number of iterations to be run for erasing devices.')),
|
||||
cfg.BoolOpt('power_off_after_deploy_failure',
|
||||
default=True,
|
||||
help=_('Whether to power off a node after deploy failure. '
|
||||
'Defaults to True.')),
|
||||
]
|
||||
CONF = cfg.CONF
|
||||
CONF.register_opts(deploy_opts, group='deploy')
|
||||
@ -492,15 +496,15 @@ def set_failed_state(task, msg):
|
||||
% {'node': node.uuid, 'state': node.provision_state})
|
||||
LOG.exception(msg2)
|
||||
|
||||
try:
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
except Exception:
|
||||
msg2 = (_LE('Node %s failed to power off while handling deploy '
|
||||
'failure. This may be a serious condition. Node '
|
||||
'should be removed from Ironic or put in maintenance '
|
||||
'mode until the problem is resolved.') % node.uuid)
|
||||
LOG.exception(msg2)
|
||||
|
||||
if CONF.deploy.power_off_after_deploy_failure:
|
||||
try:
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
except Exception:
|
||||
msg2 = (_LE('Node %s failed to power off while handling deploy '
|
||||
'failure. This may be a serious condition. Node '
|
||||
'should be removed from Ironic or put in maintenance '
|
||||
'mode until the problem is resolved.') % node.uuid)
|
||||
LOG.exception(msg2)
|
||||
# NOTE(deva): node_power_action() erases node.last_error
|
||||
# so we need to set it here.
|
||||
node.last_error = msg
|
||||
|
@ -1252,7 +1252,7 @@ class OtherFunctionTestCase(db_base.DbTestCase):
|
||||
autospec=True)
|
||||
def _test_set_failed_state(self, mock_event, mock_power, mock_log,
|
||||
event_value=None, power_value=None,
|
||||
log_calls=None):
|
||||
log_calls=None, poweroff=True):
|
||||
err_msg = 'some failure'
|
||||
mock_event.side_effect = event_value
|
||||
mock_power.side_effect = power_value
|
||||
@ -1260,9 +1260,12 @@ class OtherFunctionTestCase(db_base.DbTestCase):
|
||||
shared=False) as task:
|
||||
utils.set_failed_state(task, err_msg)
|
||||
mock_event.assert_called_once_with(task, 'fail')
|
||||
mock_power.assert_called_once_with(task, states.POWER_OFF)
|
||||
if poweroff:
|
||||
mock_power.assert_called_once_with(task, states.POWER_OFF)
|
||||
else:
|
||||
self.assertFalse(mock_power.called)
|
||||
self.assertEqual(err_msg, task.node.last_error)
|
||||
if log_calls:
|
||||
if (log_calls and poweroff):
|
||||
mock_log.exception.assert_has_calls(log_calls)
|
||||
else:
|
||||
self.assertFalse(mock_log.called)
|
||||
@ -1283,6 +1286,23 @@ class OtherFunctionTestCase(db_base.DbTestCase):
|
||||
power_value=iter([exc_param] * len(calls)),
|
||||
log_calls=calls)
|
||||
|
||||
def test_set_failed_state_no_poweroff(self):
|
||||
cfg.CONF.deploy.power_off_after_deploy_failure = False
|
||||
exc_state = exception.InvalidState('invalid state')
|
||||
exc_param = exception.InvalidParameterValue('invalid parameter')
|
||||
mock_call = mock.call(mock.ANY)
|
||||
self._test_set_failed_state(poweroff=False)
|
||||
calls = [mock_call]
|
||||
self._test_set_failed_state(event_value=iter([exc_state] * len(calls)),
|
||||
log_calls=calls, poweroff=False)
|
||||
calls = [mock_call]
|
||||
self._test_set_failed_state(power_value=iter([exc_param] * len(calls)),
|
||||
log_calls=calls, poweroff=False)
|
||||
calls = [mock_call, mock_call]
|
||||
self._test_set_failed_state(event_value=iter([exc_state] * len(calls)),
|
||||
power_value=iter([exc_param] * len(calls)),
|
||||
log_calls=calls, poweroff=False)
|
||||
|
||||
def test_get_boot_option(self):
|
||||
self.node.instance_info = {'capabilities': '{"boot_option": "local"}'}
|
||||
result = utils.get_boot_option(self.node)
|
||||
|
@ -0,0 +1,12 @@
|
||||
---
|
||||
features:
|
||||
- Operators can now set
|
||||
deploy.power_off_after_deploy_failure to leave nodes
|
||||
powered on when a deployment fails. This is useful
|
||||
for troubleshooting deployment issues. As a note,
|
||||
Nova will still attempt to delete a node after a failed
|
||||
deployment, so deploy.power_off_after_deploy_failure
|
||||
may not be very effective in non-standalone
|
||||
deployments until a similar patch to ironic's driver in
|
||||
nova is proposed.
|
||||
|
Loading…
x
Reference in New Issue
Block a user