From b1e834ac1acdcf887e109289c0cdee9cec2767b9 Mon Sep 17 00:00:00 2001 From: Taylor McKinnon Date: Wed, 29 Nov 2023 11:04:30 -0800 Subject: [PATCH] switch to DelimiterVersions for object listing --- lib/reindex/s3_bucketd.py | 40 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/lib/reindex/s3_bucketd.py b/lib/reindex/s3_bucketd.py index 9be38e5b..3bfac118 100644 --- a/lib/reindex/s3_bucketd.py +++ b/lib/reindex/s3_bucketd.py @@ -233,10 +233,9 @@ def get_next_upload_id(p): def _sum_objects(self, bucket, listing): count = 0 total_size = 0 - last_master = None - last_size = None + last_key = None for status_code, payload in listing: - contents = payload['Contents'] if isinstance(payload, dict) else payload + contents = payload['Versions'] if isinstance(payload, dict) else payload if contents is None: _log.error('Invalid contents in listing. bucket:%s status_code:%s'%(bucket.name, status_code)) raise InvalidListing(bucket.name) @@ -250,41 +249,36 @@ def _sum_objects(self, bucket, listing): data = json.loads(obj['value']) size = data.get('content-length', 0) - if self._only_latest_when_locked and bucket.object_lock_enabled and '\x00' in obj['key']: + is_latest = obj['key'] != last_key + last_key = obj['key'] + + if self._only_latest_when_locked and bucket.object_lock_enabled and not is_latest: _log.debug('Skipping versioned key: %s'%obj['key']) continue count += 1 total_size += size - # If versioned, subtract the size of the master to avoid double counting - if last_master is not None and obj['key'].startswith(last_master + '\x00'): - _log.debug('Detected versioned key: %s - subtracting master size: %i'% ( - obj['key'], - last_size, - )) - total_size -= last_size - count -= 1 - last_master = None - - # Only save master versions - elif '\x00' not in obj['key']: - last_master = obj['key'] - last_size = size return count, total_size def count_bucket_contents(self, bucket): - def get_next_marker(p): + def get_key_marker(p): if p is None or len(p) == 0: return '' - return p[-1].get('key', '') + return p.get('NextKeyMarker', '') + + def get_vid_marker(p): + if p is None or len(p) == 0: + return '' + return p.get('NextVersionIdMarker', '') params = { - 'listingType': 'Basic', - 'maxKeys': 1000, - 'gt': get_next_marker, + 'listingType': 'DelimiterVersions', + 'maxKeys': 2, + 'keyMarker': get_key_marker, + 'versionIdMarker': get_vid_marker, } count, total_size = self._sum_objects(bucket, self._list_bucket(bucket.name, **params))