diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index fc53595908..47318d47a5 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -3585,7 +3585,10 @@ class ConductorManager(base_manager.BaseConductorManager): # pre-generation of tokens with virtual media usage. node.set_driver_internal_info('agent_secret_token', "******") return node - task.upgrade_lock() + # Do not retry the lock, fail immediately otherwise + # we can cause these requests to stack up on the API, + # all thinking they can process the node. + task.upgrade_lock(retry=False) LOG.debug('Generating agent token for node %(node)s', {'node': task.node.uuid}) utils.add_secret_token(task.node) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 582c87d80c..fd2e43a50e 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -3717,6 +3717,16 @@ class MiscTestCase(mgr_utils.ServiceSetUpMixin, mgr_utils.CommonMixIn, self.assertEqual('******', res.driver_internal_info['agent_secret_token']) + def test_node_with_token_already_locked(self): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + network_interface='noop', + reservation='meow') + exc = self.assertRaises(messaging.rpc.ExpectedException, + self.service.get_node_with_token, + self.context, node.id) + self.assertEqual(exception.NodeLocked, exc.exc_info[0]) + @mgr_utils.mock_record_keepalive class ConsoleTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): diff --git a/releasenotes/notes/fail-fast-on-lookup-lock-a408feac87890050.yaml b/releasenotes/notes/fail-fast-on-lookup-lock-a408feac87890050.yaml new file mode 100644 index 0000000000..1f905bbcaa --- /dev/null +++ b/releasenotes/notes/fail-fast-on-lookup-lock-a408feac87890050.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixes an issue where lookups to generate an agent token would stack up as + the internal lock upgrade logic silently holds on to the request while trying + to obtain a lock. The task creation will now immediately fail with a + ``NodeLocked`` exception, which the agent will retry.