Change image ID from int sequence to UUID
When we export and import image data (for backup/restore purposes), we need to reset the ZK sequence counter for image builds in order to avoid collisions. The only way we can do that is to create and then delete a large number of znodes. Some sites (including OpenDev) have sequence numbers that are in the hundreds of thousands. To avoid this time-consuming operation (which is only intended to be run to restore from backup -- when operators are already under additional stress!), this change switches the build IDs from integer sequences to UUIDs. This avoids the problem with collisions after import (at least, to the degree that UUIDs avoid collisions). The actual change is fairly simple, but many unit tests need to be updated. Since the change is user-visible in the command output (image lists, etc), a release note is added. A related change which updates all of the textual references of build "number" to build "id" follows this one for clarity and ease of review. Change-Id: Ie7c68b094bc9733914a808756eeee8b62f696713
This commit is contained in:
parent
426951af18
commit
3815cce7aa
@ -390,7 +390,7 @@ class CleanupWorker(BaseWorker):
|
|||||||
self.log.exception("Exception cleaning up local build %s:",
|
self.log.exception("Exception cleaning up local build %s:",
|
||||||
local_build)
|
local_build)
|
||||||
|
|
||||||
LOCAL_BUILDS_RE = re.compile(r'^(.*)-(\d+)\.(?:\w+)$')
|
LOCAL_BUILDS_RE = re.compile(r'^(.*)-([abcdef\d]+)\.(?:\w+)$')
|
||||||
|
|
||||||
def _getLocalBuilds(self):
|
def _getLocalBuilds(self):
|
||||||
seen = set()
|
seen = set()
|
||||||
@ -628,13 +628,14 @@ class BuildWorker(BaseWorker):
|
|||||||
return
|
return
|
||||||
log_dir = self._getBuildLogRoot(name)
|
log_dir = self._getBuildLogRoot(name)
|
||||||
keep = max(self._config.build_log_retention, 1)
|
keep = max(self._config.build_log_retention, 1)
|
||||||
existing = sorted(os.listdir(log_dir))
|
existing = os.listdir(log_dir)
|
||||||
existing = [f for f in existing if f.startswith(name)]
|
existing = [f for f in existing if f.startswith(name)]
|
||||||
|
existing = [os.path.join(log_dir, f) for f in existing]
|
||||||
|
existing = sorted(existing, key=os.path.getmtime)
|
||||||
delete = existing[:0 - keep]
|
delete = existing[:0 - keep]
|
||||||
for f in delete:
|
for f in delete:
|
||||||
fp = os.path.join(log_dir, f)
|
self.log.info("Deleting old build log %s", f)
|
||||||
self.log.info("Deleting old build log %s" % (fp,))
|
os.unlink(f)
|
||||||
os.unlink(fp)
|
|
||||||
|
|
||||||
def _getBuildLog(self, name, build_id):
|
def _getBuildLog(self, name, build_id):
|
||||||
log_dir = self._getBuildLogRoot(name)
|
log_dir = self._getBuildLogRoot(name)
|
||||||
|
@ -516,21 +516,32 @@ class DBTestCase(BaseTestCase):
|
|||||||
break
|
break
|
||||||
self.wait_for_threads()
|
self.wait_for_threads()
|
||||||
|
|
||||||
def waitForBuild(self, image_name, build_id, states=None,
|
def waitForBuild(self, image_name, states=None, check_files=True,
|
||||||
check_files=True):
|
ignore_list=None):
|
||||||
if states is None:
|
if states is None:
|
||||||
states = (zk.READY,)
|
states = (zk.READY,)
|
||||||
|
|
||||||
base = "-".join([image_name, build_id])
|
if ignore_list:
|
||||||
|
ignore_list = [b.id for b in ignore_list]
|
||||||
|
else:
|
||||||
|
ignore_list = []
|
||||||
|
|
||||||
for _ in iterate_timeout(ONE_MINUTE, Exception,
|
for _ in iterate_timeout(ONE_MINUTE, Exception,
|
||||||
"Image build record to reach state",
|
"Image build record to reach state",
|
||||||
interval=1):
|
interval=1):
|
||||||
self.wait_for_threads()
|
self.wait_for_threads()
|
||||||
build = self.zk.getBuild(image_name, build_id)
|
build = None
|
||||||
if build and build.state in states:
|
for b in self.zk.getBuilds(image_name):
|
||||||
|
if b.state not in states:
|
||||||
|
continue
|
||||||
|
if b.id in ignore_list:
|
||||||
|
continue
|
||||||
|
build = b
|
||||||
|
if build:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
base = "-".join([image_name, build.id])
|
||||||
|
|
||||||
# We should only expect a dib manifest with a successful build.
|
# We should only expect a dib manifest with a successful build.
|
||||||
while check_files and build.state == zk.READY:
|
while check_files and build.state == zk.READY:
|
||||||
self.wait_for_threads()
|
self.wait_for_threads()
|
||||||
|
@ -230,10 +230,10 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
configfile = self.setup_config('node_two_image.yaml')
|
configfile = self.setup_config('node_two_image.yaml')
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
self.waitForImage('fake-provider', 'fake-image')
|
self.waitForImage('fake-provider', 'fake-image')
|
||||||
self.waitForImage('fake-provider', 'fake-image2')
|
image2 = self.waitForImage('fake-provider', 'fake-image2')
|
||||||
self.replace_config(configfile, 'node_two_image_remove.yaml')
|
self.replace_config(configfile, 'node_two_image_remove.yaml')
|
||||||
self.waitForImageDeletion('fake-provider', 'fake-image2')
|
self.waitForImageDeletion('fake-provider', 'fake-image2')
|
||||||
self.waitForBuildDeletion('fake-image2', '0000000001')
|
self.waitForBuildDeletion('fake-image2', image2.build_id)
|
||||||
|
|
||||||
def test_image_removal_two_builders(self):
|
def test_image_removal_two_builders(self):
|
||||||
# This tests the gap between building and uploading an image.
|
# This tests the gap between building and uploading an image.
|
||||||
@ -246,7 +246,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
configfile1 = self.setup_config('builder_image1.yaml')
|
configfile1 = self.setup_config('builder_image1.yaml')
|
||||||
builder1 = self.useBuilder(configfile1)
|
builder1 = self.useBuilder(configfile1)
|
||||||
self.waitForImage('fake-provider1', 'fake-image1')
|
self.waitForImage('fake-provider1', 'fake-image1')
|
||||||
self.waitForBuild('fake-image1', '0000000001')
|
self.waitForBuild('fake-image1')
|
||||||
for worker in builder1._upload_workers:
|
for worker in builder1._upload_workers:
|
||||||
worker.shutdown()
|
worker.shutdown()
|
||||||
worker.join()
|
worker.join()
|
||||||
@ -260,7 +260,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
self.waitForImage('fake-provider2', 'fake-image2')
|
self.waitForImage('fake-provider2', 'fake-image2')
|
||||||
# Don't check files because the image path switched to the
|
# Don't check files because the image path switched to the
|
||||||
# second builder; we're really only interested in ZK.
|
# second builder; we're really only interested in ZK.
|
||||||
self.waitForBuild('fake-image1', '0000000001', check_files=False)
|
self.waitForBuild('fake-image1', check_files=False)
|
||||||
|
|
||||||
def test_image_removal_dib_deletes_first(self):
|
def test_image_removal_dib_deletes_first(self):
|
||||||
# Break cloud image deleting
|
# Break cloud image deleting
|
||||||
@ -285,13 +285,13 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
'DIB disk files did not delete first'):
|
'DIB disk files did not delete first'):
|
||||||
self.wait_for_threads()
|
self.wait_for_threads()
|
||||||
files = builder.DibImageFile.from_image_id(
|
files = builder.DibImageFile.from_image_id(
|
||||||
self._config_images_dir.path, 'fake-image2-0000000001')
|
self._config_images_dir.path, f'fake-image2-{img.build_id}')
|
||||||
if not files:
|
if not files:
|
||||||
break
|
break
|
||||||
# Check image is still in fake-provider cloud
|
# Check image is still in fake-provider cloud
|
||||||
img.state = zk.DELETING
|
img.state = zk.DELETING
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.zk.getImageUpload('fake-image2', '0000000001',
|
self.zk.getImageUpload('fake-image2', img.build_id,
|
||||||
'fake-provider', '0000000001'),
|
'fake-provider', '0000000001'),
|
||||||
img)
|
img)
|
||||||
|
|
||||||
@ -301,7 +301,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
# Check image is removed from cloud and zk
|
# Check image is removed from cloud and zk
|
||||||
self.waitForImageDeletion('fake-provider', 'fake-image2', match=img)
|
self.waitForImageDeletion('fake-provider', 'fake-image2', match=img)
|
||||||
# Check build is removed from zk
|
# Check build is removed from zk
|
||||||
self.waitForBuildDeletion('fake-image2', '0000000001')
|
self.waitForBuildDeletion('fake-image2', img.build_id)
|
||||||
|
|
||||||
def test_image_rebuild_age(self):
|
def test_image_rebuild_age(self):
|
||||||
self._test_image_rebuild_age()
|
self._test_image_rebuild_age()
|
||||||
@ -309,40 +309,40 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
def _test_image_rebuild_age(self, expire=86400):
|
def _test_image_rebuild_age(self, expire=86400):
|
||||||
configfile = self.setup_config('node.yaml')
|
configfile = self.setup_config('node.yaml')
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
build = self.waitForBuild('fake-image', '0000000001')
|
build1 = self.waitForBuild('fake-image')
|
||||||
log_path1 = os.path.join(self._config_build_log_dir.path,
|
log_path1 = os.path.join(self._config_build_log_dir.path,
|
||||||
'fake-image-0000000001.log')
|
f'fake-image-{build1.id}.log')
|
||||||
self.assertTrue(os.path.exists(log_path1))
|
self.assertTrue(os.path.exists(log_path1))
|
||||||
image = self.waitForImage('fake-provider', 'fake-image')
|
image = self.waitForImage('fake-provider', 'fake-image')
|
||||||
# Expire rebuild-age (default: 1day) to force a new build.
|
# Expire rebuild-age (default: 1day) to force a new build.
|
||||||
build.state_time -= expire
|
build1.state_time -= expire
|
||||||
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
||||||
self.zk.storeBuild('fake-image', build, '0000000001')
|
self.zk.storeBuild('fake-image', build1, build1.id)
|
||||||
self.waitForBuild('fake-image', '0000000002')
|
build2 = self.waitForBuild('fake-image', ignore_list=[build1])
|
||||||
log_path2 = os.path.join(self._config_build_log_dir.path,
|
log_path2 = os.path.join(self._config_build_log_dir.path,
|
||||||
'fake-image-0000000002.log')
|
f'fake-image-{build2.id}.log')
|
||||||
self.assertTrue(os.path.exists(log_path2))
|
self.assertTrue(os.path.exists(log_path2))
|
||||||
self.waitForImage('fake-provider', 'fake-image', [image])
|
self.waitForImage('fake-provider', 'fake-image', [image])
|
||||||
builds = self.zk.getBuilds('fake-image', zk.READY)
|
builds = self.zk.getBuilds('fake-image', zk.READY)
|
||||||
self.assertEqual(len(builds), 2)
|
self.assertEqual(len(builds), 2)
|
||||||
return (build, image)
|
return (build1, build2, image)
|
||||||
|
|
||||||
def test_image_rotation(self):
|
def test_image_rotation(self):
|
||||||
# Expire rebuild-age (2days), to avoid problems when expiring 2 images.
|
# Expire rebuild-age (2days), to avoid problems when expiring 2 images.
|
||||||
self._test_image_rebuild_age(expire=172800)
|
build1, build2, image = self._test_image_rebuild_age(expire=172800)
|
||||||
build = self.waitForBuild('fake-image', '0000000002')
|
|
||||||
# Expire rebuild-age (default: 1day) to force a new build.
|
# Expire rebuild-age (default: 1day) to force a new build.
|
||||||
build.state_time -= 86400
|
build2.state_time -= 86400
|
||||||
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
||||||
self.zk.storeBuild('fake-image', build, '0000000002')
|
self.zk.storeBuild('fake-image', build2, build2.id)
|
||||||
self.waitForBuildDeletion('fake-image', '0000000001')
|
self.waitForBuildDeletion('fake-image', build1.id)
|
||||||
self.waitForBuild('fake-image', '0000000003')
|
build3 = self.waitForBuild('fake-image',
|
||||||
|
ignore_list=[build1, build2])
|
||||||
log_path1 = os.path.join(self._config_build_log_dir.path,
|
log_path1 = os.path.join(self._config_build_log_dir.path,
|
||||||
'fake-image-0000000001.log')
|
f'fake-image-{build1.id}.log')
|
||||||
log_path2 = os.path.join(self._config_build_log_dir.path,
|
log_path2 = os.path.join(self._config_build_log_dir.path,
|
||||||
'fake-image-0000000002.log')
|
f'fake-image-{build2.id}.log')
|
||||||
log_path3 = os.path.join(self._config_build_log_dir.path,
|
log_path3 = os.path.join(self._config_build_log_dir.path,
|
||||||
'fake-image-0000000003.log')
|
f'fake-image-{build3.id}.log')
|
||||||
# Our log retention is set to 1, so the first log should be deleted.
|
# Our log retention is set to 1, so the first log should be deleted.
|
||||||
self.assertFalse(os.path.exists(log_path1))
|
self.assertFalse(os.path.exists(log_path1))
|
||||||
self.assertTrue(os.path.exists(log_path2))
|
self.assertTrue(os.path.exists(log_path2))
|
||||||
@ -361,16 +361,15 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
# total of 2 diskimages on disk at all times.
|
# total of 2 diskimages on disk at all times.
|
||||||
|
|
||||||
# Expire rebuild-age (2days), to avoid problems when expiring 2 images.
|
# Expire rebuild-age (2days), to avoid problems when expiring 2 images.
|
||||||
build001, image001 = self._test_image_rebuild_age(expire=172800)
|
build1, build2, image1 = self._test_image_rebuild_age(expire=172800)
|
||||||
build002 = self.waitForBuild('fake-image', '0000000002')
|
|
||||||
|
|
||||||
# Make sure 2rd diskimage build was uploaded.
|
# Make sure 2rd diskimage build was uploaded.
|
||||||
image002 = self.waitForImage('fake-provider', 'fake-image', [image001])
|
image2 = self.waitForImage('fake-provider', 'fake-image',
|
||||||
self.assertEqual(image002.build_id, '0000000002')
|
ignore_list=[image1])
|
||||||
|
|
||||||
# Delete external name / id so we can test exception handlers.
|
# Delete external name / id so we can test exception handlers.
|
||||||
upload = self.zk.getUploads(
|
upload = self.zk.getUploads(
|
||||||
'fake-image', '0000000001', 'fake-provider', zk.READY)[0]
|
'fake-image', build1.id, 'fake-provider', zk.READY)[0]
|
||||||
upload.external_name = None
|
upload.external_name = None
|
||||||
upload.external_id = None
|
upload.external_id = None
|
||||||
with self.zk.imageUploadLock(upload.image_name, upload.build_id,
|
with self.zk.imageUploadLock(upload.image_name, upload.build_id,
|
||||||
@ -380,32 +379,33 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
upload.provider_name, upload, upload.id)
|
upload.provider_name, upload, upload.id)
|
||||||
|
|
||||||
# Expire rebuild-age (default: 1day) to force a new build.
|
# Expire rebuild-age (default: 1day) to force a new build.
|
||||||
build002.state_time -= 86400
|
build2.state_time -= 86400
|
||||||
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
||||||
self.zk.storeBuild('fake-image', build002, '0000000002')
|
self.zk.storeBuild('fake-image', build2, build2.id)
|
||||||
self.waitForBuildDeletion('fake-image', '0000000001')
|
self.waitForBuildDeletion('fake-image', build1.id)
|
||||||
|
|
||||||
# Make sure fake-image for fake-provider is removed from zookeeper.
|
# Make sure fake-image for fake-provider is removed from zookeeper.
|
||||||
upload = self.zk.getUploads(
|
upload = self.zk.getUploads(
|
||||||
'fake-image', '0000000001', 'fake-provider')
|
'fake-image', build1.id, 'fake-provider')
|
||||||
self.assertEqual(len(upload), 0)
|
self.assertEqual(len(upload), 0)
|
||||||
self.waitForBuild('fake-image', '0000000003')
|
build3 = self.waitForBuild('fake-image',
|
||||||
|
ignore_list=[build1, build2])
|
||||||
|
|
||||||
# Ensure we only have 2 builds on disk.
|
# Ensure we only have 2 builds on disk.
|
||||||
builds = self.zk.getBuilds('fake-image', zk.READY)
|
builds = self.zk.getBuilds('fake-image', zk.READY)
|
||||||
self.assertEqual(len(builds), 2)
|
self.assertEqual(len(builds), 2)
|
||||||
|
|
||||||
# Make sure 3rd diskimage build was uploaded.
|
# Make sure 3rd diskimage build was uploaded.
|
||||||
image003 = self.waitForImage(
|
image3 = self.waitForImage(
|
||||||
'fake-provider', 'fake-image', [image001, image002])
|
'fake-provider', 'fake-image', ignore_list=[image1, image2])
|
||||||
self.assertEqual(image003.build_id, '0000000003')
|
self.assertEqual(image3.build_id, build3.id)
|
||||||
|
|
||||||
def test_cleanup_hard_upload_fails(self):
|
def test_cleanup_hard_upload_fails(self):
|
||||||
configfile = self.setup_config('node.yaml')
|
configfile = self.setup_config('node.yaml')
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
self.waitForImage('fake-provider', 'fake-image')
|
image = self.waitForImage('fake-provider', 'fake-image')
|
||||||
|
|
||||||
upload = self.zk.getUploads('fake-image', '0000000001',
|
upload = self.zk.getUploads('fake-image', image.build_id,
|
||||||
'fake-provider', zk.READY)[0]
|
'fake-provider', zk.READY)[0]
|
||||||
|
|
||||||
# Store a new ZK node as UPLOADING to represent a hard fail
|
# Store a new ZK node as UPLOADING to represent a hard fail
|
||||||
@ -433,16 +433,8 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
found = False
|
found = False
|
||||||
for _ in iterate_timeout(10, Exception, 'image builds to fail', 0.1):
|
for _ in iterate_timeout(10, Exception, 'image builds to fail', 0.1):
|
||||||
builds = self.zk.getBuilds('fake-image')
|
builds = self.zk.getBuilds('fake-image')
|
||||||
for build in builds:
|
if builds:
|
||||||
# Lexicographical order
|
found = builds[0].id
|
||||||
if build and build.id > '0000000001':
|
|
||||||
# We know we've built more than one image and we know
|
|
||||||
# they have all failed. We can't check if they have
|
|
||||||
# failed directly because they may be cleaned up.
|
|
||||||
found = build.id
|
|
||||||
break
|
|
||||||
time.sleep(0.1)
|
|
||||||
if found:
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# Now replace the config with a valid config and check that the image
|
# Now replace the config with a valid config and check that the image
|
||||||
@ -460,7 +452,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
def test_diskimage_build_only(self):
|
def test_diskimage_build_only(self):
|
||||||
configfile = self.setup_config('node_diskimage_only.yaml')
|
configfile = self.setup_config('node_diskimage_only.yaml')
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
build_tar = self.waitForBuild('fake-image', '0000000001')
|
build_tar = self.waitForBuild('fake-image')
|
||||||
|
|
||||||
self.assertEqual(build_tar._formats, ['tar'])
|
self.assertEqual(build_tar._formats, ['tar'])
|
||||||
self.assertReportedStat('nodepool.dib_image_build.'
|
self.assertReportedStat('nodepool.dib_image_build.'
|
||||||
@ -476,9 +468,8 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
def test_diskimage_build_formats(self):
|
def test_diskimage_build_formats(self):
|
||||||
configfile = self.setup_config('node_diskimage_formats.yaml')
|
configfile = self.setup_config('node_diskimage_formats.yaml')
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
build_default = self.waitForBuild('fake-image-default-format',
|
build_default = self.waitForBuild('fake-image-default-format')
|
||||||
'0000000001')
|
build_vhd = self.waitForBuild('fake-image-vhd')
|
||||||
build_vhd = self.waitForBuild('fake-image-vhd', '0000000001')
|
|
||||||
|
|
||||||
self.assertEqual(build_default._formats, ['qcow2'])
|
self.assertEqual(build_default._formats, ['qcow2'])
|
||||||
self.assertEqual(build_vhd._formats, ['vhd'])
|
self.assertEqual(build_vhd._formats, ['vhd'])
|
||||||
@ -491,15 +482,15 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
def test_diskimage_build_parents(self):
|
def test_diskimage_build_parents(self):
|
||||||
configfile = self.setup_config('node_diskimage_parents.yaml')
|
configfile = self.setup_config('node_diskimage_parents.yaml')
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
self.waitForBuild('parent-image-1', '0000000001')
|
self.waitForBuild('parent-image-1')
|
||||||
self.waitForBuild('parent-image-2', '0000000001')
|
self.waitForBuild('parent-image-2')
|
||||||
|
|
||||||
@mock.patch('select.poll')
|
@mock.patch('select.poll')
|
||||||
def test_diskimage_build_timeout(self, mock_poll):
|
def test_diskimage_build_timeout(self, mock_poll):
|
||||||
configfile = self.setup_config('diskimage_build_timeout.yaml')
|
configfile = self.setup_config('diskimage_build_timeout.yaml')
|
||||||
builder.BUILD_PROCESS_POLL_TIMEOUT = 500
|
builder.BUILD_PROCESS_POLL_TIMEOUT = 500
|
||||||
self.useBuilder(configfile, cleanup_interval=0)
|
self.useBuilder(configfile, cleanup_interval=0)
|
||||||
self.waitForBuild('fake-image', '0000000001', states=(zk.FAILED,))
|
self.waitForBuild('fake-image', states=(zk.FAILED,))
|
||||||
|
|
||||||
def test_session_loss_during_build(self):
|
def test_session_loss_during_build(self):
|
||||||
configfile = self.setup_config('node.yaml')
|
configfile = self.setup_config('node.yaml')
|
||||||
@ -513,7 +504,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
|
|
||||||
# Disable cleanup thread to verify builder cleans up after itself
|
# Disable cleanup thread to verify builder cleans up after itself
|
||||||
bldr = self.useBuilder(configfile, cleanup_interval=0)
|
bldr = self.useBuilder(configfile, cleanup_interval=0)
|
||||||
self.waitForBuild('fake-image', '0000000001', states=(zk.BUILDING,))
|
self.waitForBuild('fake-image', states=(zk.BUILDING,))
|
||||||
|
|
||||||
# The build should now be paused just before writing out any DIB files.
|
# The build should now be paused just before writing out any DIB files.
|
||||||
# Mock the next call to storeBuild() which is supposed to be the update
|
# Mock the next call to storeBuild() which is supposed to be the update
|
||||||
@ -587,9 +578,9 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
def test_post_upload_hook(self):
|
def test_post_upload_hook(self):
|
||||||
configfile = self.setup_config('node_upload_hook.yaml')
|
configfile = self.setup_config('node_upload_hook.yaml')
|
||||||
bldr = self.useBuilder(configfile)
|
bldr = self.useBuilder(configfile)
|
||||||
self.waitForImage('fake-provider', 'fake-image')
|
image = self.waitForImage('fake-provider', 'fake-image')
|
||||||
|
|
||||||
images_dir = bldr._config.images_dir
|
images_dir = bldr._config.images_dir
|
||||||
post_file = os.path.join(
|
post_file = os.path.join(
|
||||||
images_dir, 'fake-image-0000000001.qcow2.post')
|
images_dir, f'fake-image-{image.build_id}.qcow2.post')
|
||||||
self.assertTrue(os.path.exists(post_file), 'Post hook file exists')
|
self.assertTrue(os.path.exists(post_file), 'Post hook file exists')
|
||||||
|
@ -254,10 +254,10 @@ class TestNodepoolCMD(tests.DBTestCase):
|
|||||||
self.patch_argv('-c', configfile, 'dib-image-delete',
|
self.patch_argv('-c', configfile, 'dib-image-delete',
|
||||||
'fake-image-%s' % (builds[0].id))
|
'fake-image-%s' % (builds[0].id))
|
||||||
nodepoolcmd.main()
|
nodepoolcmd.main()
|
||||||
self.waitForBuildDeletion('fake-image', '0000000001')
|
self.waitForBuildDeletion('fake-image', builds[0].id)
|
||||||
# Check that fake-image-0000000001 doesn't exist
|
# Check that fake-image-0000000001 doesn't exist
|
||||||
self.assert_listed(
|
self.assert_listed(
|
||||||
configfile, ['dib-image-list'], 0, 'fake-image-0000000001', 0)
|
configfile, ['dib-image-list'], 0, f'fake-image-{builds[0].id}', 0)
|
||||||
|
|
||||||
def test_dib_image_delete_custom_image_creation(self):
|
def test_dib_image_delete_custom_image_creation(self):
|
||||||
# Test deletion of images without a .d manifest folder
|
# Test deletion of images without a .d manifest folder
|
||||||
@ -274,22 +274,22 @@ class TestNodepoolCMD(tests.DBTestCase):
|
|||||||
builds = self.zk.getMostRecentBuilds(1, 'fake-image', zk.READY)
|
builds = self.zk.getMostRecentBuilds(1, 'fake-image', zk.READY)
|
||||||
# Delete manifest folder to simulate custom image creation
|
# Delete manifest folder to simulate custom image creation
|
||||||
shutil.rmtree(os.path.join(builder._config.images_dir,
|
shutil.rmtree(os.path.join(builder._config.images_dir,
|
||||||
'fake-image-0000000001.d'))
|
f'fake-image-{builds[0].id}.d'))
|
||||||
# But ensure image is still present
|
# But ensure image is still present
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
os.path.exists(os.path.join(builder._config.images_dir,
|
os.path.exists(os.path.join(builder._config.images_dir,
|
||||||
'fake-image-0000000001.qcow2')))
|
f'fake-image-{builds[0].id}.qcow2')))
|
||||||
# Delete the image
|
# Delete the image
|
||||||
self.patch_argv('-c', configfile, 'dib-image-delete',
|
self.patch_argv('-c', configfile, 'dib-image-delete',
|
||||||
'fake-image-%s' % (builds[0].id))
|
'fake-image-%s' % (builds[0].id))
|
||||||
nodepoolcmd.main()
|
nodepoolcmd.main()
|
||||||
self.waitForBuildDeletion('fake-image', '0000000001')
|
self.waitForBuildDeletion('fake-image', builds[0].id)
|
||||||
# Check that fake-image-0000000001 doesn't exist
|
# Check that fake-image-0000000001 doesn't exist
|
||||||
self.assert_listed(
|
self.assert_listed(
|
||||||
configfile, ['dib-image-list'], 0, 'fake-image-0000000001', 0)
|
configfile, ['dib-image-list'], 0, f'fake-image-{builds[0].id}', 0)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
os.path.exists(os.path.join(builder._config.images_dir,
|
os.path.exists(os.path.join(builder._config.images_dir,
|
||||||
'fake-image-0000000001.qcow2')))
|
f'fake-image-{builds[0].id}.qcow2')))
|
||||||
|
|
||||||
def test_dib_image_delete_two_builders(self):
|
def test_dib_image_delete_two_builders(self):
|
||||||
# This tests deleting an image when its builder is offline
|
# This tests deleting an image when its builder is offline
|
||||||
@ -316,19 +316,20 @@ class TestNodepoolCMD(tests.DBTestCase):
|
|||||||
nodepoolcmd.main()
|
nodepoolcmd.main()
|
||||||
|
|
||||||
# 4. Verify builder2 deleted the ZK records, but image is still on disk
|
# 4. Verify builder2 deleted the ZK records, but image is still on disk
|
||||||
self.waitForBuildDeletion('fake-image', '0000000001')
|
self.waitForBuildDeletion('fake-image', builds[0].id)
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
os.path.exists(os.path.join(builder1._config.images_dir,
|
os.path.exists(os.path.join(builder1._config.images_dir,
|
||||||
'fake-image-0000000001.d')))
|
f'fake-image-{builds[0].id}.d')))
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
os.path.exists(os.path.join(builder2._config.images_dir,
|
os.path.exists(os.path.join(builder2._config.images_dir,
|
||||||
'fake-image-0000000001.d')))
|
f'fake-image-{builds[0].id}.d')))
|
||||||
|
|
||||||
# 5. Start builder1 and verify it deletes image on disk
|
# 5. Start builder1 and verify it deletes image on disk
|
||||||
builder1 = self.useBuilder(configfile1)
|
builder1 = self.useBuilder(configfile1)
|
||||||
for _ in iterate_timeout(30, AssertionError, 'image file delete'):
|
for _ in iterate_timeout(30, AssertionError, 'image file delete'):
|
||||||
if not os.path.exists(os.path.join(builder1._config.images_dir,
|
if not os.path.exists(
|
||||||
'fake-image-0000000001.d')):
|
os.path.join(builder1._config.images_dir,
|
||||||
|
f'fake-image-{builds[0].id}.d')):
|
||||||
break
|
break
|
||||||
|
|
||||||
def test_delete(self):
|
def test_delete(self):
|
||||||
@ -650,15 +651,15 @@ class TestNodepoolCMD(tests.DBTestCase):
|
|||||||
self.startPool(pool)
|
self.startPool(pool)
|
||||||
self.waitForNodes('fake-label')
|
self.waitForNodes('fake-label')
|
||||||
|
|
||||||
build = self.waitForBuild('fake-image', '0000000001')
|
build = self.waitForBuild('fake-image')
|
||||||
# Delete the first build so that we have a hole in our
|
# Delete the first build so that we have a hole in our
|
||||||
# numbering. This lets us validate that we reconstruct the
|
# numbering. This lets us validate that we reconstruct the
|
||||||
# sequence state correctly.
|
# sequence state correctly.
|
||||||
build.state = zk.DELETING
|
build.state = zk.DELETING
|
||||||
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
||||||
self.zk.storeBuild('fake-image', build, '0000000001')
|
self.zk.storeBuild('fake-image', build, build.id)
|
||||||
self.waitForBuildDeletion('fake-image', '0000000001')
|
self.waitForBuildDeletion('fake-image', build.id)
|
||||||
self.waitForBuild('fake-image', '0000000002')
|
build2 = self.waitForBuild('fake-image', ignore_list=[build])
|
||||||
|
|
||||||
pool.stop()
|
pool.stop()
|
||||||
for worker in builder._upload_workers:
|
for worker in builder._upload_workers:
|
||||||
@ -668,7 +669,7 @@ class TestNodepoolCMD(tests.DBTestCase):
|
|||||||
# Save a copy of the data in ZK
|
# Save a copy of the data in ZK
|
||||||
old_data = self.getZKTree('/nodepool/images')
|
old_data = self.getZKTree('/nodepool/images')
|
||||||
# We aren't backing up the lock data
|
# We aren't backing up the lock data
|
||||||
old_data.pop('/nodepool/images/fake-image/builds/0000000002'
|
old_data.pop(f'/nodepool/images/fake-image/builds/{build2.id}'
|
||||||
'/providers/fake-provider/images/lock')
|
'/providers/fake-provider/images/lock')
|
||||||
old_data.pop('/nodepool/images/fake-image/builds/lock')
|
old_data.pop('/nodepool/images/fake-image/builds/lock')
|
||||||
|
|
||||||
@ -692,7 +693,7 @@ class TestNodepoolCMD(tests.DBTestCase):
|
|||||||
# First test a new upload of the existing image and make sure
|
# First test a new upload of the existing image and make sure
|
||||||
# it uses the correct sequence number.
|
# it uses the correct sequence number.
|
||||||
upload = self.waitForUpload('fake-provider', 'fake-image',
|
upload = self.waitForUpload('fake-provider', 'fake-image',
|
||||||
'0000000002', '0000000001')
|
build2.id, '0000000001')
|
||||||
upload.state = zk.DELETING
|
upload.state = zk.DELETING
|
||||||
with self.zk.imageUploadLock(upload.image_name, upload.build_id,
|
with self.zk.imageUploadLock(upload.image_name, upload.build_id,
|
||||||
upload.provider_name, blocking=True,
|
upload.provider_name, blocking=True,
|
||||||
@ -702,15 +703,12 @@ class TestNodepoolCMD(tests.DBTestCase):
|
|||||||
# We skip at least one number because upload lock is a sequence
|
# We skip at least one number because upload lock is a sequence
|
||||||
# node too (this is why builds and uploads start at 1 instead of 0).
|
# node too (this is why builds and uploads start at 1 instead of 0).
|
||||||
upload = self.waitForUpload('fake-provider', 'fake-image',
|
upload = self.waitForUpload('fake-provider', 'fake-image',
|
||||||
'0000000002', '0000000003')
|
build2.id, '0000000003')
|
||||||
|
|
||||||
# Now build a new image and make sure it uses the correct
|
# Now build a new image and make sure it uses a different id.
|
||||||
# sequence number.
|
build2 = self.waitForBuild('fake-image', ignore_list=[build])
|
||||||
build = self.waitForBuild('fake-image', '0000000002')
|
|
||||||
# Expire rebuild-age (default: 1day) to force a new build.
|
# Expire rebuild-age (default: 1day) to force a new build.
|
||||||
build.state_time -= 86400
|
build2.state_time -= 86400
|
||||||
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
|
||||||
self.zk.storeBuild('fake-image', build, '0000000002')
|
self.zk.storeBuild('fake-image', build2, build2.id)
|
||||||
# We skip at least one number because build lock is a sequence
|
self.waitForBuild('fake-image', ignore_list=[build, build2])
|
||||||
# node too (this is why builds and uploads start at 1 instead of 0).
|
|
||||||
self.waitForBuild('fake-image', '0000000004')
|
|
||||||
|
@ -80,7 +80,7 @@ class TestWebApp(tests.DBTestCase):
|
|||||||
# cache update
|
# cache update
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
|
|
||||||
self.waitForImage('fake-provider', 'fake-image')
|
image = self.waitForImage('fake-provider', 'fake-image')
|
||||||
self.waitForNodes('fake-label')
|
self.waitForNodes('fake-label')
|
||||||
|
|
||||||
req = request.Request(
|
req = request.Request(
|
||||||
@ -90,7 +90,7 @@ class TestWebApp(tests.DBTestCase):
|
|||||||
self.assertEqual(f.info().get('Content-Type'),
|
self.assertEqual(f.info().get('Content-Type'),
|
||||||
'text/plain; charset=UTF-8')
|
'text/plain; charset=UTF-8')
|
||||||
data = f.read()
|
data = f.read()
|
||||||
self.assertIn("| 0000000001 | fake-image | ready |",
|
self.assertIn(f"| {image.build_id} | fake-image | ready |",
|
||||||
data.decode('utf8'))
|
data.decode('utf8'))
|
||||||
|
|
||||||
def test_image_list_json(self):
|
def test_image_list_json(self):
|
||||||
@ -105,7 +105,7 @@ class TestWebApp(tests.DBTestCase):
|
|||||||
# cache update
|
# cache update
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
|
|
||||||
self.waitForImage('fake-provider', 'fake-image')
|
image = self.waitForImage('fake-provider', 'fake-image')
|
||||||
self.waitForNodes('fake-label')
|
self.waitForNodes('fake-label')
|
||||||
|
|
||||||
req = request.Request(
|
req = request.Request(
|
||||||
@ -116,7 +116,7 @@ class TestWebApp(tests.DBTestCase):
|
|||||||
'application/json')
|
'application/json')
|
||||||
data = f.read()
|
data = f.read()
|
||||||
objs = json.loads(data.decode('utf8'))
|
objs = json.loads(data.decode('utf8'))
|
||||||
self.assertDictContainsSubset({'id': '0000000001',
|
self.assertDictContainsSubset({'id': image.build_id,
|
||||||
'image': 'fake-image',
|
'image': 'fake-image',
|
||||||
'provider': 'fake-provider',
|
'provider': 'fake-provider',
|
||||||
'state': 'ready'}, objs[0])
|
'state': 'ready'}, objs[0])
|
||||||
@ -133,7 +133,7 @@ class TestWebApp(tests.DBTestCase):
|
|||||||
# cache update
|
# cache update
|
||||||
self.useBuilder(configfile)
|
self.useBuilder(configfile)
|
||||||
|
|
||||||
self.waitForImage('fake-provider', 'fake-image')
|
image = self.waitForImage('fake-provider', 'fake-image')
|
||||||
self.waitForNodes('fake-label')
|
self.waitForNodes('fake-label')
|
||||||
|
|
||||||
req = request.Request(
|
req = request.Request(
|
||||||
@ -146,7 +146,7 @@ class TestWebApp(tests.DBTestCase):
|
|||||||
objs = json.loads(data.decode('utf8'))
|
objs = json.loads(data.decode('utf8'))
|
||||||
# make sure this is valid json and has some of the
|
# make sure this is valid json and has some of the
|
||||||
# non-changing keys
|
# non-changing keys
|
||||||
self.assertDictContainsSubset({'id': 'fake-image-0000000001',
|
self.assertDictContainsSubset({'id': f'fake-image-{image.build_id}',
|
||||||
'formats': ['qcow2'],
|
'formats': ['qcow2'],
|
||||||
'state': 'ready'}, objs[0])
|
'state': 'ready'}, objs[0])
|
||||||
|
|
||||||
|
@ -262,7 +262,7 @@ class TestZooKeeper(tests.DBTestCase):
|
|||||||
image = "ubuntu-trusty"
|
image = "ubuntu-trusty"
|
||||||
b1 = self.zk.storeBuild(image, zk.ImageBuild())
|
b1 = self.zk.storeBuild(image, zk.ImageBuild())
|
||||||
b2 = self.zk.storeBuild(image, zk.ImageBuild())
|
b2 = self.zk.storeBuild(image, zk.ImageBuild())
|
||||||
self.assertLess(int(b1), int(b2))
|
self.assertNotEqual(b1, b2)
|
||||||
|
|
||||||
def test_store_and_get_build(self):
|
def test_store_and_get_build(self):
|
||||||
image = "ubuntu-trusty"
|
image = "ubuntu-trusty"
|
||||||
|
@ -1857,12 +1857,12 @@ class ZooKeeper(ZooKeeperBase):
|
|||||||
build_path = self._imageBuildsPath(image) + "/"
|
build_path = self._imageBuildsPath(image) + "/"
|
||||||
|
|
||||||
if build_number is None:
|
if build_number is None:
|
||||||
path = self.kazoo_client.create(
|
build_number = uuid.uuid4().hex
|
||||||
build_path,
|
path = build_path + build_number
|
||||||
|
self.kazoo_client.create(
|
||||||
|
path,
|
||||||
value=build_data.serialize(),
|
value=build_data.serialize(),
|
||||||
sequence=True,
|
|
||||||
makepath=True)
|
makepath=True)
|
||||||
build_number = path.split("/")[-1]
|
|
||||||
else:
|
else:
|
||||||
path = build_path + build_number
|
path = build_path + build_number
|
||||||
self.kazoo_client.set(path, build_data.serialize())
|
self.kazoo_client.set(path, build_data.serialize())
|
||||||
@ -2960,34 +2960,33 @@ class ZooKeeper(ZooKeeperBase):
|
|||||||
# We do some extra work to ensure that the sequence numbers
|
# We do some extra work to ensure that the sequence numbers
|
||||||
# don't collide. ZK sequence numbers are stored in the parent
|
# don't collide. ZK sequence numbers are stored in the parent
|
||||||
# node and ZK isn't smart enough to avoid collisions if there
|
# node and ZK isn't smart enough to avoid collisions if there
|
||||||
# are missing entries. So if we restore build 1, and then the
|
# are missing entries. So if we restore upload 1, and then the
|
||||||
# builder later wants to create a new build, it will attempt
|
# builder later wants to create a new upload, it will attempt
|
||||||
# to create build 1, and fail since the node already exists.
|
# to create upload 1, and fail since the node already exists.
|
||||||
#
|
#
|
||||||
# NB: The behavior is slightly different for sequence number 1
|
# NB: The behavior is slightly different for sequence number 1
|
||||||
# vs others; if 2 is the lowest number, then ZK will create
|
# vs others; if 2 is the lowest number, then ZK will create
|
||||||
# node 0 and 1 before colliding with 2. This is further
|
# node 0 and 1 before colliding with 2. This is further
|
||||||
# complicated in the nodepool context since we create lock
|
# complicated in the nodepool context since we create lock
|
||||||
# entries under the build/upload znodes which also seem to
|
# entries under the upload znodes which also seem to
|
||||||
# have an effect on the counter.
|
# have an effect on the counter.
|
||||||
#
|
#
|
||||||
# Regardless, if we pre-create sequence nodes up to our
|
# Regardless, if we pre-create sequence nodes up to our
|
||||||
# highest node numbers for builds and uploads, we are
|
# highest node numbers for uploads, we are guaranteed that the
|
||||||
# guaranteed that the next sequence node created will be
|
# next sequence node created will be greater. So we look at
|
||||||
# greater. So we look at all the sequence nodes in our import
|
# all the sequence nodes in our import data set and pre-create
|
||||||
# data set and pre-create sequence nodes up to that number.
|
# sequence nodes up to that number.
|
||||||
|
#
|
||||||
|
# Build ids are not affected since they are not sequence nodes
|
||||||
|
# (though they used to be).
|
||||||
|
|
||||||
highest_num = {}
|
highest_num = {}
|
||||||
# 0 1 2 3 4 5
|
# 0 1 2 3 4 5
|
||||||
# /nodepool/images/fake-image/builds/0000000002/
|
# /nodepool/images/fake-image/builds/UUID/
|
||||||
# 6 7 8 9
|
# 6 7 8 9
|
||||||
# providers/fake-provider/images/0000000001
|
# providers/fake-provider/images/0000000001
|
||||||
for path, data in import_data.items():
|
for path, data in import_data.items():
|
||||||
parts = path.split('/')
|
parts = path.split('/')
|
||||||
if len(parts) == 6:
|
|
||||||
key = '/'.join(parts[:5])
|
|
||||||
num = int(parts[5])
|
|
||||||
highest_num[key] = max(highest_num.get(key, num), num)
|
|
||||||
if len(parts) == 10:
|
if len(parts) == 10:
|
||||||
key = '/'.join(parts[:9])
|
key = '/'.join(parts[:9])
|
||||||
num = int(parts[9])
|
num = int(parts[9])
|
||||||
|
8
releasenotes/notes/buildid-4131ffe6f4cea09f.yaml
Normal file
8
releasenotes/notes/buildid-4131ffe6f4cea09f.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
upgrade:
|
||||||
|
- |
|
||||||
|
New diskimage build ids are now in UUID format instead of integer
|
||||||
|
sequences with leading zeroes. This facilitates faster
|
||||||
|
restoration of images from backup data on systems with large
|
||||||
|
numbers of image builds. Existing builds will continue to use the
|
||||||
|
integer format and can coexist with the new format.
|
Loading…
x
Reference in New Issue
Block a user