From e7210dfb6eae26c9aa4884952e5e529c21d3cffe Mon Sep 17 00:00:00 2001 From: Phil Bridges Date: Thu, 2 Jun 2016 18:19:58 -0500 Subject: [PATCH] Pile of patches for pushing out to the wider world; adopted Prashanth Pai's dup fd fix patch (https://review.openstack.org/#/c/289773/), changed metadata serialization to dodge an HPSS UDA bug, and fixed up the package metadata some among other things --- setup.py | 4 +- swiftonhpss/swift/__init__.py | 6 +-- swiftonhpss/swift/common/utils.py | 46 +++++++++++---------- swiftonhpss/swift/obj/diskfile.py | 57 ++++++++++++++++---------- swiftonhpss/swift/obj/server.py | 41 ++++++++++-------- test/functional/swift_on_file_tests.py | 18 ++++---- test/functional/swift_on_hpss_tests.py | 28 ++++++------- 7 files changed, 112 insertions(+), 88 deletions(-) diff --git a/setup.py b/setup.py index eb9ec2c..c40f9ac 100644 --- a/setup.py +++ b/setup.py @@ -25,12 +25,12 @@ setup( version=_pkginfo.full_version, description='SwiftOnHPSS', license='Apache License (2.0)', - author='IBM Corporation; Red Hat, Inc.', + author='HPSS Collaboration', url='https://github.com/hpss-collaboration/swiftonhpss', packages=find_packages(exclude=['test', 'bin']), test_suite='nose.collector', classifiers=[ - 'Development Status :: 2 - Pre-Alpha', + 'Development Status :: 3 - Alpha', 'Environment :: OpenStack', 'Intended Audience :: Information Technology', 'Intended Audience :: System Administrators', diff --git a/swiftonhpss/swift/__init__.py b/swiftonhpss/swift/__init__.py index 1aa0b0b..13a9151 100644 --- a/swiftonhpss/swift/__init__.py +++ b/swiftonhpss/swift/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" SwiftOnHPSS """ +""" SwiftOnHPSS package information, stashed here """ class PkgInfo(object): @@ -22,7 +22,7 @@ class PkgInfo(object): self.release = release self.name = name self.final = final - self.full_version = self.canonical_version + '-' + self.release + self.full_version = self.canonical_version + '.' + self.release def save_config(self, filename): """ @@ -44,7 +44,7 @@ class PkgInfo(object): # Change the Package version here _pkginfo = PkgInfo(canonical_version='2.5.0', - release='0', + release='1', name='swiftonhpss', final=False) __version__ = _pkginfo.pretty_version diff --git a/swiftonhpss/swift/common/utils.py b/swiftonhpss/swift/common/utils.py index d33873c..94660ec 100644 --- a/swiftonhpss/swift/common/utils.py +++ b/swiftonhpss/swift/common/utils.py @@ -32,6 +32,7 @@ from swiftonhpss.swift.common.fs_utils import do_stat, \ do_walk, do_rmdir, do_log_rl, get_filename_from_fd, do_open, \ do_getxattr, do_setxattr, do_removexattr, do_read, \ do_close, do_dup, do_lseek, do_fstat, do_fsync, do_rename +from urllib import quote, unquote X_CONTENT_TYPE = 'Content-Type' X_CONTENT_LENGTH = 'Content-Length' @@ -105,16 +106,18 @@ pickle.loads = SafeUnpickler.loads def serialize_metadata(metadata): - return json.dumps(metadata, separators=(',', ':')) + return quote(json.dumps(metadata, separators=(',', ':'))) -def deserialize_metadata(metastr): +def deserialize_metadata(in_metastr): """ Returns dict populated with metadata if deserializing is successful. Returns empty dict if deserialzing fails. """ global read_pickled_metadata + metastr = unquote(in_metastr) + if metastr.startswith('\x80\x02}') and metastr.endswith('.') and \ read_pickled_metadata: # Assert that the serialized metadata is pickled using @@ -236,32 +239,34 @@ def _read_for_etag(fp): return etag.hexdigest() -def get_etag(fd_or_path): +def get_etag(path_or_fd): """ - Either read the ETag from HPSS metadata, or read the entire file to - generate it. - """ - # Try to just get the MD5 sum from HPSS. We're assuming that we recheck - # this checksum every time we actually open the file for read/write. - attrs = xattr.xattr(fd_or_path) - if 'system.hpss.hash' in attrs: - return attrs['system.hpss.hash'] - elif 'user.hash.checksum' in attrs: - return attrs['user.hash.checksum'] + FIXME: It would be great to have a translator that returns the md5sum() of + the file as an xattr that can be simply fetched. - if isinstance(fd_or_path, int): + Since we don't have that we should yield after each chunk read and + computed so that we don't consume the worker thread. + """ + etag = '' + if isinstance(path_or_fd, int): # We are given a file descriptor, so this is an invocation from the # DiskFile.open() method. - etag = _read_for_etag(do_dup(fd_or_path)) - do_lseek(fd_or_path, 0, os.SEEK_SET) - + fd = path_or_fd + dup_fd = do_dup(fd) + try: + etag = _read_for_etag(dup_fd) + do_lseek(fd, 0, os.SEEK_SET) + finally: + do_close(dup_fd) else: # We are given a path to the object when the DiskDir.list_objects_iter # method invokes us. - path = fd_or_path + path = path_or_fd fd = do_open(path, os.O_RDONLY) - etag = _read_for_etag(fd) - do_close(fd) + try: + etag = _read_for_etag(fd) + finally: + do_close(fd) return etag @@ -270,7 +275,6 @@ def get_object_metadata(obj_path_or_fd, stats=None): """ Return metadata of object. """ - logging.error('Entering get_object_metadata for %s' % obj_path_or_fd) if not stats: if isinstance(obj_path_or_fd, int): # We are given a file descriptor, so this is an invocation from the diff --git a/swiftonhpss/swift/obj/diskfile.py b/swiftonhpss/swift/obj/diskfile.py index 07d742f..ff72c7a 100644 --- a/swiftonhpss/swift/obj/diskfile.py +++ b/swiftonhpss/swift/obj/diskfile.py @@ -55,7 +55,7 @@ from swift.obj.diskfile import get_async_dir # FIXME: Hopefully we'll be able to move to Python 2.7+ where O_CLOEXEC will # be back ported. See http://www.python.org/dev/peps/pep-0433/ -O_CLOEXEC = 0o2000000 +O_CLOEXEC = 0o20000000 MAX_RENAME_ATTEMPTS = 10 MAX_OPEN_ATTEMPTS = 10 @@ -310,6 +310,15 @@ class DiskFileWriter(object): # clean). do_fsync(self._fd) + # (HPSS) Purge lock the file now if we're asked to. + if purgelock: + try: + hpssfs.ioctl(self._fd, hpssfs.HPSSFS_PURGE_LOCK, int(purgelock)) + except IOError as err: + raise SwiftOnFileSystemIOError(err.errno, + '%s, hpssfs.ioctl("%s", ...)' % ( + err.strerror, self._fd)) + # From the Department of the Redundancy Department, make sure # we call drop_cache() after fsync() to avoid redundant work # (pages all clean). @@ -383,15 +392,6 @@ class DiskFileWriter(object): # Success! break - # (HPSS) Purge lock the file now if we're asked to. - if purgelock: - try: - hpssfs.ioctl(self._fd, hpssfs.HPSSFS_PURGE_LOCK, int(purgelock)) - except IOError as err: - raise SwiftOnFileSystemIOError(err.errno, - '%s, hpssfs.ioctl("%s", ...)' % ( - err.strerror, self._fd)) - # Close here so the calling context does not have to perform this # in a thread. self.close() @@ -845,7 +845,6 @@ class DiskFile(object): except IOError as err: error_message = "Couldn't get HPSS xattr %s from file %s" \ % (xattr_to_get, self._data_file) - logging.error(error_message) raise SwiftOnFileSystemIOError(err.errno, error_message) return result @@ -896,15 +895,28 @@ class DiskFile(object): def read_metadata(self): """ - Return the metadata for an object without requiring the caller to open - the object first. + Return the metadata for an object without opening the object's file on + disk. :returns: metadata dictionary for an object :raises DiskFileError: this implementation will raise the same errors as the `open()` method. """ - with self.open(): - return self.get_metadata() + # FIXME: pull a lot of this and the copy of it from open() out to + # another function + + # Do not actually open the file, in order to duck hpssfs checksum + # validation and resulting timeouts + # This means we do a few things DiskFile.open() does. + try: + self._is_dir = os.path.isdir(self._data_file) + self._metadata = read_metadata(self._data_file) + except IOError: + raise DiskFileNotExist + if not self._validate_object_metadata(): + self._create_object_metadata(self._data_file) + self._filter_metadata() + return self._metadata def reader(self, iter_hook=None, keep_cache=False): """ @@ -1034,13 +1046,6 @@ class DiskFile(object): fd = do_open(tmppath, os.O_WRONLY | os.O_CREAT | os.O_EXCL | O_CLOEXEC) - if cos: - try: - hpssfs.ioctl(fd, hpssfs.HPSSFS_SET_COS_HINT, int(cos)) - except IOError as err: - raise SwiftOnFileSystemIOError(err.errno, - '%s, hpssfs.ioctl("%s", SET_COS)' % ( - err.strerror, fd)) if size: try: hpssfs.ioctl(fd, hpssfs.HPSSFS_SET_FSIZE_HINT, @@ -1050,6 +1055,14 @@ class DiskFile(object): '%s, hpssfs.ioctl("%s", SET_FSIZE)' % ( err.strerror, fd)) + if cos: + try: + hpssfs.ioctl(fd, hpssfs.HPSSFS_SET_COS_HINT, int(cos)) + except IOError as err: + raise SwiftOnFileSystemIOError(err.errno, + '%s, hpssfs.ioctl("%s", SET_COS)' % ( + err.strerror, fd)) + except SwiftOnFileSystemOSError as gerr: if gerr.errno in (errno.ENOSPC, errno.EDQUOT): # Raise DiskFileNoSpace to be handled by upper layers when diff --git a/swiftonhpss/swift/obj/server.py b/swiftonhpss/swift/obj/server.py index a1d5559..1c73754 100644 --- a/swiftonhpss/swift/obj/server.py +++ b/swiftonhpss/swift/obj/server.py @@ -21,6 +21,9 @@ import xattr import os import hpssfs import time + +import eventlet + from hashlib import md5 from swift.common.swob import HTTPConflict, HTTPBadRequest, HeaderKeyDict, \ HTTPInsufficientStorage, HTTPPreconditionFailed, HTTPRequestTimeout, \ @@ -37,9 +40,7 @@ from swiftonhpss.swift.common.exceptions import AlreadyExistsAsFile, \ from swift.common.exceptions import DiskFileDeviceUnavailable, \ DiskFileNotExist, DiskFileQuarantined, ChunkReadTimeout, DiskFileNoSpace, \ DiskFileXattrNotSupported, DiskFileExpired, DiskFileDeleted -from swift.common.constraints import valid_timestamp, check_account_format, \ - check_destination_header - +from swift.common.constraints import valid_timestamp, check_account_format from swift.obj import server from swift.common.ring import Ring @@ -90,6 +91,7 @@ class ObjectController(server.ObjectController): self.container_ring = Ring(self.swift_dir, ring_name='container') return self.container_ring + @public @timing_stats() def PUT(self, request): @@ -156,9 +158,9 @@ class ObjectController(server.ObjectController): elapsed_time = 0 # (HPSS) Check for HPSS-specific metadata headers - cos = request.headers.get('X-HPSS-Class-Of-Service-ID', None) - purgelock = request.headers.get('X-HPSS-Purgelock-Status', 'false') - purgelock = purgelock.lower() in ['true', '1', 'yes'] + cos = request.headers.get('X-Hpss-Class-Of-Service-Id', None) + purgelock = config_true_value( + request.headers.get('X-Hpss-Purgelock-Status', 'false')) try: # Feed DiskFile our HPSS-specific stuff @@ -198,9 +200,10 @@ class ObjectController(server.ObjectController): 'ETag': etag, 'Content-Length': str(upload_size), } - metadata.update( - val for val in request.headers.iteritems() - if is_sys_or_user_meta('object', val[0])) + meta_headers = {header: request.headers[header] for header + in request.headers + if is_sys_or_user_meta('object', header)} + metadata.update(meta_headers) backend_headers = \ request.headers.get('X-Backend-Replication-Headers') for header_key in (backend_headers or self.allowed_headers): @@ -214,7 +217,6 @@ class ObjectController(server.ObjectController): except DiskFileNoSpace: return HTTPInsufficientStorage(drive=device, request=request) except SwiftOnFileSystemIOError as e: - logging.debug('IOError in writing file') return HTTPServiceUnavailable(request=request) # FIXME: this stuff really should be handled in DiskFile somehow? @@ -352,7 +354,8 @@ class ObjectController(server.ObjectController): # Read DiskFile metadata try: - metadata = disk_file.read_metadata() + disk_file.open() + metadata = disk_file.get_metadata() except (DiskFileNotExist, DiskFileQuarantined) as e: headers = {} if hasattr(e, 'timestamp'): @@ -388,12 +391,14 @@ class ObjectController(server.ObjectController): hpss_headers = disk_file.read_hpss_system_metadata() response.headers.update(hpss_headers) except SwiftOnFileSystemIOError: + disk_file._close_fd() return HTTPServiceUnavailable(request=request) if 'X-Object-Sysmeta-Update-Container' in response.headers: self._sof_container_update(request, response) response.headers.pop('X-Object-Sysmeta-Update-Container') + disk_file._close_fd() return response @public @@ -407,6 +412,9 @@ class ObjectController(server.ObjectController): 'X-Storage-Token' not in request.headers ) + if 'X-Debug-Stop' in request.headers: + raise eventlet.StopServe() + # Get Diskfile try: disk_file = self.get_diskfile(device, partition, account, container, @@ -460,6 +468,7 @@ class ObjectController(server.ObjectController): return HTTPServiceUnavailable(request=request) return request.get_response(response) except (DiskFileNotExist, DiskFileQuarantined) as e: + disk_file._close_fd() headers = {} if hasattr(e, 'timestamp'): headers['X-Backend-Timestamp'] = e.timestamp.internal @@ -501,12 +510,11 @@ class ObjectController(server.ObjectController): cos = request.headers.get('X-HPSS-Class-Of-Service-ID') if cos: try: - hpssfs.ioctl(disk_file._fd, hpssfs.HPSSFS_SET_COS_HINT, - int(cos)) + xattr.setxattr(disk_file._fd, 'system.hpss.cos', int(cos)) except IOError as err: raise SwiftOnFileSystemIOError( err.errno, - '%s, xattr.getxattr("%s", ...)' % + '%s, xattr.setxattr("%s", ...)' % (err.strerror, disk_file._fd)) # Update metadata from request @@ -552,7 +560,8 @@ class ObjectController(server.ObjectController): return HTTPInsufficientStorage(drive=device, request=request) try: - orig_metadata = disk_file.read_metadata() + with disk_file.open(): + orig_metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except DiskFileExpired as e: @@ -563,7 +572,6 @@ class ObjectController(server.ObjectController): orig_timestamp = e.timestamp orig_metadata = {} response_class = HTTPNotFound - # If the file got deleted outside of Swift, we won't see it. # So we say "file, what file?" and delete it from the container. except DiskFileNotExist: @@ -580,6 +588,7 @@ class ObjectController(server.ObjectController): response_class = HTTPNoContent else: response_class = HTTPConflict + response_timestamp = max(orig_timestamp, req_timestamp) orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) try: diff --git a/test/functional/swift_on_file_tests.py b/test/functional/swift_on_file_tests.py index ed2ee4d..d3c1404 100644 --- a/test/functional/swift_on_file_tests.py +++ b/test/functional/swift_on_file_tests.py @@ -28,6 +28,7 @@ import test.functional as tf # PGB - changed 'AUTH_' hardcoded reseller prefix to 'KEY_'. # TODO: read Swift proxy config for this + class TestSwiftOnFileEnv: @classmethod def setUp(cls): @@ -92,8 +93,8 @@ class TestSwiftOnFile(Base): set_up = False @classmethod - def tearDownClass(self): - self.env.account.delete_containers() + def tearDownClass(cls): + cls.env.account.delete_containers() #for account_dir in os.listdir(self.env.root_dir): # rmtree(os.path.join(self.env.root_dir, account_dir)) @@ -132,15 +133,14 @@ class TestSwiftOnFile(Base): file_item.write_random() self.assert_status(201) file_info = file_item.info() - fhOnMountPoint = open(os.path.join( - self.env.root_dir, - self.env.account.name, - self.env.container.name, - file_name), 'r') - data_read_from_mountP = fhOnMountPoint.read() + + with open(os.path.join(self.env.root_dir, + self.env.account.name, + self.env.container.name, + file_name), 'r') as fhOnMountPoint: + data_read_from_mountP = fhOnMountPoint.read() md5_returned = hashlib.md5(data_read_from_mountP).hexdigest() self.assertEquals(md5_returned, file_info['etag']) - fhOnMountPoint.close() def test_GET_on_file_created_over_mountpoint(self): file_name = Utils.create_name() diff --git a/test/functional/swift_on_hpss_tests.py b/test/functional/swift_on_hpss_tests.py index 4e944cf..05a8238 100644 --- a/test/functional/swift_on_hpss_tests.py +++ b/test/functional/swift_on_hpss_tests.py @@ -34,8 +34,7 @@ class TestSwiftOnHPSS(unittest.TestCase): tf.config.get('account', tf.config['username'])) cls.container = cls.account.container('swiftonhpss_test') - if not cls.container.create(hdrs={'X-Storage-Policy': 'hpss'}): - raise ResponseError(cls.connection.response) + cls.container.create(hdrs={'X-Storage-Policy': 'hpss'}) cls.hpss_dir = '/srv/swift/hpss' @classmethod @@ -46,18 +45,13 @@ class TestSwiftOnHPSS(unittest.TestCase): self.test_file = self.container.file('testfile') def tearDown(self): - try: - self.test_file.delete() - except ResponseError as e: - if e.status == 404: - pass - else: - raise + self.test_file.delete() def test_purge_lock(self): self.test_file.write(data='test', - hdrs={'X-HPSS-Purgelock-Status': 'true', - 'X-HPSS-Class-Of-Service-ID': '3'}) + hdrs={'X-Hpss-Purgelock-Status': 'true', + 'X-Hpss-Class-Of-Service-Id': '3'}) + test_file_name = os.path.join(self.hpss_dir, self.account.name, self.container.name, @@ -66,21 +60,25 @@ class TestSwiftOnHPSS(unittest.TestCase): xattrs = dict(xattr.xattr(test_file_name)) self.assertEqual(xattrs['system.hpss.purgelock'], '1') - self.test_file.post(hdrs={'X-HPSS-Purgelock-Status': 'false'}) - + self.test_file.post(hdrs={'X-Hpss-Purgelock-Status': 'false'}) + xattrs = dict(xattr.xattr(test_file_name)) self.assertEqual(xattrs['system.hpss.purgelock'], '0') def test_change_cos(self): self.test_file.write(data='asdfasdf', - hdrs={'X-HPSS-Class-Of-Service-ID': '3'}) + hdrs={'X-Hpss-Class-Of-Service-Id': '3'}) + test_file_name = os.path.join(self.hpss_dir, self.account.name, self.container.name, 'testfile') + + time.sleep(30) # It takes a long time for HPSS to get around to it. xattrs = dict(xattr.xattr(test_file_name)) self.assertEqual(xattrs['system.hpss.cos'], '3') - self.test_file.post(hdrs={'X-HPSS-Class-Of-Service-ID': '1'}) + self.test_file.post(hdrs={'X-HPSS-Class-Of-Service-ID': '1'}) + time.sleep(30) xattrs = dict(xattr.xattr(test_file_name)) self.assertEqual(xattrs['system.hpss.cos'], '1')