Fix fast-track with the direct deploy interface
Several small fixes: 1) Make sure the deploy.deploy step returns DEPLOYWAIT after running prepare_image otherwise the conductor wrongly assumes that the deploy is done. 2) Handle the case when provision_state==DEPLOYWAIT when returning from an asynchronous deploy step. 3) Do not assume that prepare_image is always the last command to run, sometimes get_deploy_steps sneaks in. 4) Do not issue a deprecation warning when receiving "agent is busy" on get_deploy_steps, this is normal for fast-track. Change-Id: I19274c48bd36fca19961a7d78467ec8c29f85905
This commit is contained in:
parent
df439697ae
commit
f0803493de
@ -291,7 +291,8 @@ def do_next_deploy_step(task, step_index, conductor_id):
|
|||||||
LOG.info('Deploy step %(step)s on node %(node)s being '
|
LOG.info('Deploy step %(step)s on node %(node)s being '
|
||||||
'executed asynchronously, waiting for driver.',
|
'executed asynchronously, waiting for driver.',
|
||||||
{'node': node.uuid, 'step': step})
|
{'node': node.uuid, 'step': step})
|
||||||
task.process_event('wait')
|
if task.node.provision_state != states.DEPLOYWAIT:
|
||||||
|
task.process_event('wait')
|
||||||
return
|
return
|
||||||
elif result is not None:
|
elif result is not None:
|
||||||
# NOTE(rloo): This is an internal/dev error; shouldn't happen.
|
# NOTE(rloo): This is an internal/dev error; shouldn't happen.
|
||||||
|
@ -186,17 +186,13 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
|
|||||||
if not commands:
|
if not commands:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
last_command = commands[-1]
|
try:
|
||||||
|
last_command = next(cmd for cmd in reversed(commands)
|
||||||
if last_command['command_name'] != 'prepare_image':
|
if cmd['command_name'] == 'prepare_image')
|
||||||
# catches race condition where prepare_image is still processing
|
except StopIteration:
|
||||||
# so deploy hasn't started yet
|
|
||||||
return False
|
return False
|
||||||
|
else:
|
||||||
if last_command['command_status'] != 'RUNNING':
|
return last_command['command_status'] != 'RUNNING'
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
@METRICS.timer('AgentDeployMixin.continue_deploy')
|
@METRICS.timer('AgentDeployMixin.continue_deploy')
|
||||||
@task_manager.require_exclusive_lock
|
@task_manager.require_exclusive_lock
|
||||||
@ -487,6 +483,7 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
|||||||
# the state machine state going from DEPLOYWAIT -> DEPLOYING
|
# the state machine state going from DEPLOYWAIT -> DEPLOYING
|
||||||
task.process_event('wait')
|
task.process_event('wait')
|
||||||
self.continue_deploy(task)
|
self.continue_deploy(task)
|
||||||
|
return states.DEPLOYWAIT
|
||||||
elif task.driver.storage.should_write_image(task):
|
elif task.driver.storage.should_write_image(task):
|
||||||
# Check if the driver has already performed a reboot in a previous
|
# Check if the driver has already performed a reboot in a previous
|
||||||
# deploy step.
|
# deploy step.
|
||||||
|
@ -722,10 +722,15 @@ class AgentDeployMixin(HeartbeatMixin):
|
|||||||
'steps': previous_steps})
|
'steps': previous_steps})
|
||||||
|
|
||||||
call = getattr(self._client, 'get_%s_steps' % step_type)
|
call = getattr(self._client, 'get_%s_steps' % step_type)
|
||||||
# TODO(dtantsur): remove the error handling in the V release.
|
|
||||||
try:
|
try:
|
||||||
agent_result = call(node, task.ports).get('command_result', {})
|
agent_result = call(node, task.ports).get('command_result', {})
|
||||||
except exception.AgentAPIError as exc:
|
except exception.AgentAPIError as exc:
|
||||||
|
if 'agent is busy' in str(exc):
|
||||||
|
LOG.debug('Agent is busy with a command, will refresh steps '
|
||||||
|
'on the next heartbeat')
|
||||||
|
return
|
||||||
|
|
||||||
|
# TODO(dtantsur): change to just 'raise'
|
||||||
if step_type == 'clean':
|
if step_type == 'clean':
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
|
@ -411,6 +411,39 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
|
|||||||
mock_execute.assert_called_once_with(mock.ANY, task,
|
mock_execute.assert_called_once_with(mock.ANY, task,
|
||||||
self.deploy_steps[0])
|
self.deploy_steps[0])
|
||||||
|
|
||||||
|
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
|
||||||
|
autospec=True)
|
||||||
|
def test__do_next_deploy_step_in_deploywait(self, mock_execute):
|
||||||
|
driver_internal_info = {'deploy_step_index': None,
|
||||||
|
'deploy_steps': self.deploy_steps}
|
||||||
|
self._start_service()
|
||||||
|
node = obj_utils.create_test_node(
|
||||||
|
self.context, driver='fake-hardware',
|
||||||
|
driver_internal_info=driver_internal_info,
|
||||||
|
deploy_step={})
|
||||||
|
|
||||||
|
def fake_execute(interface, task, step):
|
||||||
|
# A deploy step leaves the node in DEPLOYWAIT
|
||||||
|
task.process_event('wait')
|
||||||
|
return states.DEPLOYWAIT
|
||||||
|
|
||||||
|
mock_execute.side_effect = fake_execute
|
||||||
|
expected_first_step = node.driver_internal_info['deploy_steps'][0]
|
||||||
|
task = task_manager.TaskManager(self.context, node.uuid)
|
||||||
|
task.process_event('deploy')
|
||||||
|
|
||||||
|
deployments.do_next_deploy_step(task, 0, self.service.conductor.id)
|
||||||
|
|
||||||
|
node.refresh()
|
||||||
|
self.assertIsNone(node.last_error)
|
||||||
|
self.assertEqual(states.DEPLOYWAIT, node.provision_state)
|
||||||
|
self.assertEqual(states.ACTIVE, node.target_provision_state)
|
||||||
|
self.assertEqual(expected_first_step, node.deploy_step)
|
||||||
|
self.assertEqual(0, node.driver_internal_info['deploy_step_index'])
|
||||||
|
self.assertEqual(self.service.conductor.id, node.conductor_affinity)
|
||||||
|
mock_execute.assert_called_once_with(mock.ANY, task,
|
||||||
|
self.deploy_steps[0])
|
||||||
|
|
||||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
|
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def test__do_next_deploy_step_continue_from_last_step(self, mock_execute):
|
def test__do_next_deploy_step_continue_from_last_step(self, mock_execute):
|
||||||
|
@ -492,7 +492,7 @@ class TestAgentDeploy(db_base.DbTestCase):
|
|||||||
self.node.save()
|
self.node.save()
|
||||||
with task_manager.acquire(
|
with task_manager.acquire(
|
||||||
self.context, self.node['uuid'], shared=False) as task:
|
self.context, self.node['uuid'], shared=False) as task:
|
||||||
self.driver.deploy(task)
|
self.assertEqual(states.DEPLOYWAIT, self.driver.deploy(task))
|
||||||
self.assertFalse(power_mock.called)
|
self.assertFalse(power_mock.called)
|
||||||
self.assertFalse(mock_pxe_instance.called)
|
self.assertFalse(mock_pxe_instance.called)
|
||||||
task.node.refresh()
|
task.node.refresh()
|
||||||
@ -1739,6 +1739,27 @@ class TestAgentDeploy(db_base.DbTestCase):
|
|||||||
'command_status': 'RUNNING'}]
|
'command_status': 'RUNNING'}]
|
||||||
self.assertFalse(task.driver.deploy.deploy_is_done(task))
|
self.assertFalse(task.driver.deploy.deploy_is_done(task))
|
||||||
|
|
||||||
|
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||||
|
autospec=True)
|
||||||
|
def test_deploy_is_done_several_results(self, mock_get_cmd):
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid) as task:
|
||||||
|
mock_get_cmd.return_value = [
|
||||||
|
{'command_name': 'prepare_image', 'command_status': 'SUCCESS'},
|
||||||
|
{'command_name': 'other_command', 'command_status': 'SUCCESS'},
|
||||||
|
{'command_name': 'prepare_image', 'command_status': 'RUNNING'},
|
||||||
|
]
|
||||||
|
self.assertFalse(task.driver.deploy.deploy_is_done(task))
|
||||||
|
|
||||||
|
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||||
|
autospec=True)
|
||||||
|
def test_deploy_is_done_not_the_last(self, mock_get_cmd):
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid) as task:
|
||||||
|
mock_get_cmd.return_value = [
|
||||||
|
{'command_name': 'prepare_image', 'command_status': 'SUCCESS'},
|
||||||
|
{'command_name': 'other_command', 'command_status': 'SUCCESS'},
|
||||||
|
]
|
||||||
|
self.assertTrue(task.driver.deploy.deploy_is_done(task))
|
||||||
|
|
||||||
@mock.patch.object(manager_utils, 'restore_power_state_if_needed',
|
@mock.patch.object(manager_utils, 'restore_power_state_if_needed',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch.object(manager_utils, 'power_on_node_if_needed',
|
@mock.patch.object(manager_utils, 'power_on_node_if_needed',
|
||||||
|
@ -2235,6 +2235,25 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
|
|||||||
self.assertEqual([self.clean_steps['clean_steps'][
|
self.assertEqual([self.clean_steps['clean_steps'][
|
||||||
'SpecificHardwareManager'][1]], steps['raid'])
|
'SpecificHardwareManager'][1]], steps['raid'])
|
||||||
|
|
||||||
|
@mock.patch.object(agent_base.LOG, 'warning', autospec=True)
|
||||||
|
@mock.patch.object(agent_client.AgentClient, 'get_deploy_steps',
|
||||||
|
autospec=True)
|
||||||
|
def test_refresh_steps_busy(self, client_mock, log_mock):
|
||||||
|
client_mock.side_effect = exception.AgentAPIError(
|
||||||
|
node="node", status="500", error='agent is busy')
|
||||||
|
|
||||||
|
with task_manager.acquire(
|
||||||
|
self.context, self.node.uuid, shared=False) as task:
|
||||||
|
self.deploy.refresh_steps(task, 'deploy')
|
||||||
|
|
||||||
|
client_mock.assert_called_once_with(mock.ANY, task.node,
|
||||||
|
task.ports)
|
||||||
|
self.assertNotIn('agent_cached_deploy_steps_refreshed',
|
||||||
|
task.node.driver_internal_info)
|
||||||
|
self.assertIsNone(task.node.driver_internal_info.get(
|
||||||
|
'agent_cached_deploy_steps'))
|
||||||
|
self.assertFalse(log_mock.called)
|
||||||
|
|
||||||
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
|
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def test_refresh_steps_missing_steps(self, client_mock):
|
def test_refresh_steps_missing_steps(self, client_mock):
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes fast-track deployments with the ``direct`` deploy interface that
|
||||||
|
used to hang previously.
|
Loading…
x
Reference in New Issue
Block a user