diff --git a/etc/sample_reconciler_config.json b/etc/sample_reconciler_config.json new file mode 100644 index 0000000..90e5a24 --- /dev/null +++ b/etc/sample_reconciler_config.json @@ -0,0 +1,12 @@ +{ + "nova": { + "RegionOne": { + "username": "admin", + "project_id": "admin", + "api_key": "some_key", + "auth_url": "http://identity.example.com:5000/v2.0", + "auth_system": "keystone" + } + }, + "region_mapping_loc": "/etc/stacktach/region_mapping.json" +} diff --git a/etc/sample_region_mapping.json b/etc/sample_region_mapping.json new file mode 100644 index 0000000..675c817 --- /dev/null +++ b/etc/sample_region_mapping.json @@ -0,0 +1,6 @@ +{ + "RegionOne.dev.global": "RegionOne", + "RegionOne.dev.cell1": "RegionOne", + "RegionTwo.dev.global": "RegionTwo", + "RegionTwo.dev.cell1": "RegionTwo" +} diff --git a/reports/nova_usage_audit.py b/reports/nova_usage_audit.py index 6a7eff4..a4e54f3 100644 --- a/reports/nova_usage_audit.py +++ b/reports/nova_usage_audit.py @@ -30,12 +30,21 @@ from django.db.models import F from stacktach import datetime_to_decimal as dt from stacktach import models +from stacktach.reconciler import Reconciler -OLD_LAUNCHES_QUERY = "select * from stacktach_instanceusage " \ - "where launched_at is not null and " \ - "launched_at < %s and instance not in " \ - "(select distinct(instance) " \ - "from stacktach_instancedeletes where deleted_at < %s)" +OLD_LAUNCHES_QUERY = """ +select * from stacktach_instanceusage where + launched_at is not null and + launched_at < %s and + instance not in + (select distinct(instance) + from stacktach_instancedeletes where + deleted_at < %s union + select distinct(instance) + from stacktach_instancereconcile where + deleted_at < %s);""" + +reconciler = None def _get_new_launches(beginning, ending): @@ -63,25 +72,34 @@ def _get_exists(beginning, ending): return models.InstanceExists.objects.filter(**filters) -def _audit_launches_to_exists(launches, exists): +def _audit_launches_to_exists(launches, exists, beginning): fails = [] for (instance, launches) in launches.items(): if instance in exists: - for launch1 in launches: + for expected in launches: found = False - for launch2 in exists[instance]: - if int(launch1['launched_at']) == int(launch2['launched_at']): + for actual in exists[instance]: + if int(expected['launched_at']) == \ + int(actual['launched_at']): # HACK (apmelton): Truncate the decimal because we may not # have the milliseconds. found = True if not found: + rec = False + if reconciler: + args = (expected['id'], beginning) + rec = reconciler.missing_exists_for_instance(*args) msg = "Couldn't find exists for launch (%s, %s)" - msg = msg % (instance, launch1['launched_at']) - fails.append(['Launch', launch1['id'], msg]) + msg = msg % (instance, expected['launched_at']) + fails.append(['Launch', expected['id'], msg, 'Y' if rec else 'N']) else: + rec = False + if reconciler: + args = (launches[0]['id'], beginning) + rec = reconciler.missing_exists_for_instance(*args) msg = "No exists for instance (%s)" % instance - fails.append(['Launch', '-', msg]) + fails.append(['Launch', '-', msg, 'Y' if rec else 'N']) return fails @@ -175,8 +193,13 @@ def _launch_audit_for_period(beginning, ending): else: launches_dict[instance] = [l, ] - old_launches = models.InstanceUsage.objects.raw(OLD_LAUNCHES_QUERY, - [beginning, beginning]) + # NOTE (apmelton) + # Django's safe substitution doesn't allow dict substitution... + # Thus, we send it 'beginning' three times... + old_launches = models.InstanceUsage.objects\ + .raw(OLD_LAUNCHES_QUERY, + [beginning, beginning, beginning]) + old_launches_dict = {} for launch in old_launches: instance = launch.instance @@ -205,7 +228,8 @@ def _launch_audit_for_period(beginning, ending): exists_dict[instance] = [e, ] launch_to_exists_fails = _audit_launches_to_exists(launches_dict, - exists_dict) + exists_dict, + beginning) return launch_to_exists_fails, new_launches.count(), len(old_launches_dict) @@ -222,11 +246,11 @@ def audit_for_period(beginning, ending): summary = { 'verifier': verify_summary, - 'launch_fails': { - 'total_failures': len(detail), + 'launch_summary': { 'new_launches': new_count, - 'old_launches': old_count - } + 'old_launches': old_count, + 'failures': len(detail) + }, } details = { @@ -276,7 +300,7 @@ def store_results(start, end, summary, details): def make_json_report(summary, details): report = [{'summary': summary}, - ['Object', 'ID', 'Error Description']] + ['Object', 'ID', 'Error Description', 'Reconciled?']] report.extend(details['exist_fails']) report.extend(details['launch_fails']) return json.dumps(report) @@ -302,8 +326,20 @@ if __name__ == '__main__': help="If set to true, report will be stored. " "Otherwise, it will just be printed", type=bool, default=False) + parser.add_argument('--reconcile', + help="Enabled reconciliation", + type=bool, default=False) + parser.add_argument('--reconciler_config', + help="Location of the reconciler config file", + type=str, + default='/etc/stacktach/reconciler-config.json') args = parser.parse_args() + if args.reconcile: + with open(args.reconciler_config) as f: + reconciler_config = json.load(f) + reconciler = Reconciler(reconciler_config) + if args.utcdatetime is not None: time = args.utcdatetime else: diff --git a/stacktach/reconciler.py b/stacktach/reconciler.py index 9446ea8..1c3645f 100644 --- a/stacktach/reconciler.py +++ b/stacktach/reconciler.py @@ -94,12 +94,13 @@ class Reconciler(object): def missing_exists_for_instance(self, launched_id, period_beginning): reconciled = False - launch = models.InstanceUsage.objects.get(launched_id) + launch = models.InstanceUsage.objects.get(id=launched_id) region = self._region_for_launch(launch) nova = self._get_nova(region) try: server = nova.servers.get(launch.instance) - if TERMINATED_AT_KEY in server._info: + if (server.status == 'DELETED' and + TERMINATED_AT_KEY in server._info): # Check to see if instance has been deleted terminated_at = server._info[TERMINATED_AT_KEY] terminated_at = utils.str_time_to_unix(terminated_at) diff --git a/tests/unit/test_reconciler.py b/tests/unit/test_reconciler.py index cf65427..994c5c3 100644 --- a/tests/unit/test_reconciler.py +++ b/tests/unit/test_reconciler.py @@ -51,7 +51,6 @@ config = { }, 'region_mapping_loc': '/etc/stacktach/region_mapping.json', - 'flavor_mapping_loc': '/etc/stacktach/flavor_mapping.json', } region_mapping = { @@ -220,7 +219,7 @@ class ReconcilerTestCase(unittest.TestCase): launch = self.mox.CreateMockAnything() launch.instance = INSTANCE_ID_1 - models.InstanceUsage.objects.get(1).AndReturn(launch) + models.InstanceUsage.objects.get(id=1).AndReturn(launch) self.mox.StubOutWithMock(self.reconciler, '_region_for_launch') self.reconciler._region_for_launch(launch).AndReturn('RegionOne') @@ -228,6 +227,7 @@ class ReconcilerTestCase(unittest.TestCase): nova = self._mocked_nova_client() self.reconciler._get_nova('RegionOne').AndReturn(nova) server = self.mox.CreateMockAnything() + server.status = 'DELETED' server._info = { 'OS-INST-USG:terminated_at': str(deleted_at_dt), } @@ -241,6 +241,32 @@ class ReconcilerTestCase(unittest.TestCase): self.assertTrue(result) self.mox.VerifyAll() + def test_missing_exists_for_instance_non_deleted_status(self): + now = datetime.datetime.utcnow() + beginning_dt = now - datetime.timedelta(days=1) + beginning_dec = utils.decimal_utc(beginning_dt) + + launch = self.mox.CreateMockAnything() + launch.instance = INSTANCE_ID_1 + models.InstanceUsage.objects.get(id=1).AndReturn(launch) + self.mox.StubOutWithMock(self.reconciler, '_region_for_launch') + self.reconciler._region_for_launch(launch).AndReturn('RegionOne') + + self.mox.StubOutWithMock(self.reconciler, '_get_nova') + nova = self._mocked_nova_client() + self.reconciler._get_nova('RegionOne').AndReturn(nova) + server = self.mox.CreateMockAnything() + server.status = 'ACTIVE' + server._info = { + 'OS-INST-USG:terminated_at': None, + } + nova.servers.get(INSTANCE_ID_1).AndReturn(server) + + self.mox.ReplayAll() + result = self.reconciler.missing_exists_for_instance(1, beginning_dec) + self.assertFalse(result) + self.mox.VerifyAll() + def test_missing_exists_for_instance_deleted_too_soon(self): now = datetime.datetime.utcnow() deleted_at_dt = now - datetime.timedelta(hours=4) @@ -249,7 +275,7 @@ class ReconcilerTestCase(unittest.TestCase): launch = self.mox.CreateMockAnything() launch.instance = INSTANCE_ID_1 - models.InstanceUsage.objects.get(1).AndReturn(launch) + models.InstanceUsage.objects.get(id=1).AndReturn(launch) self.mox.StubOutWithMock(self.reconciler, '_region_for_launch') self.reconciler._region_for_launch(launch).AndReturn('RegionOne') @@ -276,7 +302,7 @@ class ReconcilerTestCase(unittest.TestCase): launch = self.mox.CreateMockAnything() launch.instance = INSTANCE_ID_1 - models.InstanceUsage.objects.get(1).AndReturn(launch) + models.InstanceUsage.objects.get(id=1).AndReturn(launch) self.mox.StubOutWithMock(self.reconciler, '_region_for_launch') self.reconciler._region_for_launch(launch).AndReturn('RegionOne') @@ -301,7 +327,7 @@ class ReconcilerTestCase(unittest.TestCase): launch = self.mox.CreateMockAnything() launch.instance = INSTANCE_ID_1 - models.InstanceUsage.objects.get(1).AndReturn(launch) + models.InstanceUsage.objects.get(id=1).AndReturn(launch) self.mox.StubOutWithMock(self.reconciler, '_region_for_launch') self.reconciler._region_for_launch(launch).AndReturn('RegionOne')