From fb253a670fc66d6e1946f6435448d45c9c13f5dd Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Mon, 1 Aug 2022 18:18:53 -0700 Subject: [PATCH] Suppress Chassis Not Found on API Operation When you have a multi-db deployment, or even just many different threads operating on the same server with different transactions, you can run into a situation where one thread initiates a transaction to get a list of nodes, and then another triggers a delete of the chassis (and most likely node, but hey, there is really no way to detect that and work.) So as the API is processing the response and making the json result set, the query to resolve a chassis_id on a node object can begin to fail. Before this patch, this would raise an exception to the client. Now, we just suppress the error, and return the field value as None. In the grand scheme, the node is likely has also already been deleted as well. Change-Id: I3594ac580c01454c70922a965a2a653a8b568cbb Closes-Bug: 1508995 Story: 1508995 Task: 10038 --- ironic/api/controllers/v1/node.py | 12 ++++++++++-- .../tests/unit/api/controllers/v1/test_node.py | 17 +++++++++++++++++ ...hassis_not_found_error-99ee4b902d504ec7.yaml | 9 +++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/suppress_chassis_not_found_error-99ee4b902d504ec7.yaml diff --git a/ironic/api/controllers/v1/node.py b/ironic/api/controllers/v1/node.py index 2ef369e136..dab1342588 100644 --- a/ironic/api/controllers/v1/node.py +++ b/ironic/api/controllers/v1/node.py @@ -1172,8 +1172,16 @@ def _get_chassis_uuid(node): """ if not node.chassis_id: return - chassis = objects.Chassis.get_by_id(api.request.context, node.chassis_id) - return chassis.uuid + try: + chassis = objects.Chassis.get_by_id(api.request.context, + node.chassis_id) + return chassis.uuid + except exception.ChassisNotFound: + # NOTE(TheJulia): This is a case where multiple threads are racing + # and the chassis was not found... or somebody edited the database + # directly. Regardless, operationally, there is no chassis, and we + # return as there is nothing to actually return to the API consumer. + return def _replace_chassis_uuid_with_id(node_dict): diff --git a/ironic/tests/unit/api/controllers/v1/test_node.py b/ironic/tests/unit/api/controllers/v1/test_node.py index 3c913834eb..d7a3d474eb 100644 --- a/ironic/tests/unit/api/controllers/v1/test_node.py +++ b/ironic/tests/unit/api/controllers/v1/test_node.py @@ -600,6 +600,23 @@ class TestListNodes(test_api_base.BaseApiTest): self.assertCountEqual(['driver_info', 'links'], data) self.assertEqual('******', data['driver_info']['fake_password']) + def test_get_one_with_deleted_chassis(self): + node = obj_utils.create_test_node(self.context, + chassis_id=self.chassis.id) + with mock.patch.object(self.dbapi, + 'get_chassis_by_id', + autospec=True) as mock_gc: + # Explicitly return a chassis not found, and make sure the API + # hides this from the API consumer as this is likely just an + # in-flight deletion across multiple DB sessions or different + # API surfaces (or, just slow DB replication.) + mock_gc.side_effect = exception.ChassisNotFound( + chassis=self.chassis.id) + data = self.get_json( + '/nodes/%s' % node.uuid, + headers={api_base.Version.string: str(api_v1.max_version())}) + self.assertIsNone(data['chassis_uuid']) + def test_get_network_interface_fields_invalid_api_version(self): node = obj_utils.create_test_node(self.context, chassis_id=self.chassis.id) diff --git a/releasenotes/notes/suppress_chassis_not_found_error-99ee4b902d504ec7.yaml b/releasenotes/notes/suppress_chassis_not_found_error-99ee4b902d504ec7.yaml new file mode 100644 index 0000000000..bc55ec3a2a --- /dev/null +++ b/releasenotes/notes/suppress_chassis_not_found_error-99ee4b902d504ec7.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes an issue where an API user, when requesting a node list or single + node object, could get an error indicating that the request was bad as + the chassis was not found. This can occur when in-flight delete + operations are in progress on another thread. Instead of surfacing a + request breaking error, the API now suppresses the error and just + treats it as if there is no Chassis.