From 21ec92fea84c151e118a5f341d7f9fb8cf6b3b50 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Tue, 11 Mar 2025 14:03:48 -0400 Subject: [PATCH 1/2] init: nuke_bucket() doesn't reuse deleted markers resolves an error from nuke_bucket() when there are lots of object versions to delete: > An error occurred (BucketNotEmpty) when calling the DeleteBucket operation the problem is that we tried to continue ListObjectVersions from the LastVersionIdMarker of the previous listing, but that version had been deleted by DeleteObjects. as a result, ListObjectVersions doesn't know where to resume so skips some entries. this is probably a ceph/rgw bug Signed-off-by: Casey Bodley --- s3tests_boto3/functional/__init__.py | 32 ++++++++++++---------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/s3tests_boto3/functional/__init__.py b/s3tests_boto3/functional/__init__.py index 5fd58913f..ad6d9a211 100644 --- a/s3tests_boto3/functional/__init__.py +++ b/s3tests_boto3/functional/__init__.py @@ -79,28 +79,18 @@ def get_objects_list(bucket, client=None, prefix=None): return objects_list -# generator function that returns object listings in batches, where each -# batch is a list of dicts compatible with delete_objects() -def list_versions(client, bucket, batch_size): - kwargs = {'Bucket': bucket, 'MaxKeys': batch_size} - truncated = True - while truncated: - listing = client.list_object_versions(**kwargs) - - kwargs['KeyMarker'] = listing.get('NextKeyMarker') - kwargs['VersionIdMarker'] = listing.get('NextVersionIdMarker') - truncated = listing['IsTruncated'] - - objs = listing.get('Versions', []) + listing.get('DeleteMarkers', []) - if len(objs): - yield [{'Key': o['Key'], 'VersionId': o['VersionId']} for o in objs] - def nuke_bucket(client, bucket): batch_size = 128 max_retain_date = None # list and delete objects in batches - for objects in list_versions(client, bucket, batch_size): + truncated = True + while truncated: + listing = client.list_object_versions(Bucket=bucket, MaxKeys=batch_size) + truncated = listing['IsTruncated'] + objs = listing.get('Versions', []) + listing.get('DeleteMarkers', []) + objects = [{'Key': o['Key'], 'VersionId': o['VersionId']} for o in objs] + delete = client.delete_objects(Bucket=bucket, Delete={'Objects': objects, 'Quiet': True}, BypassGovernanceRetention=True) @@ -131,7 +121,13 @@ def nuke_bucket(client, bucket): 'seconds for object locks to expire') time.sleep(delta.total_seconds()) - for objects in list_versions(client, bucket, batch_size): + truncated = True + while truncated: + listing = client.list_object_versions(Bucket=bucket, MaxKeys=batch_size) + truncated = listing['IsTruncated'] + objs = listing.get('Versions', []) + listing.get('DeleteMarkers', []) + objects = [{'Key': o['Key'], 'VersionId': o['VersionId']} for o in objs] + client.delete_objects(Bucket=bucket, Delete={'Objects': objects, 'Quiet': True}, BypassGovernanceRetention=True) From c4158effa91ecf32e75c78f4f01677dc12ff6277 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Tue, 11 Mar 2025 17:14:08 -0400 Subject: [PATCH 2/2] s3: test listing when object has lots of versions this is the minimal reproducer i could find for https://tracker.ceph.com/issues/70399, but requires custom config for rgw: > rgw_list_bucket_min_readahead = 1 > rgw_override_bucket_index_max_shards = 1 Signed-off-by: Casey Bodley --- s3tests_boto3/functional/test_s3.py | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index 2e86d4431..4e3898d7e 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -1343,6 +1343,48 @@ def test_bucket_list_marker_not_in_list(): keys = _get_keys(response) assert keys == [ 'foo','quxx'] +def test_versioned_bucket_listing(): + client = get_client() + bucket_name = get_new_bucket(client) + count = 15 + all_versions = set() + + # write one object before enabling versioning + client.put_object(Bucket=bucket_name, Key='A') + # this gets converted to a "null" version + all_versions.add('null') + + check_configure_versioning_retry(bucket_name, "Enabled", "Enabled") + + for i in range(count): + response = client.put_object(Bucket=bucket_name, Key='A') + all_versions.add(response['VersionId']) + + response = client.put_object(Bucket=bucket_name, Key='B') + all_versions.add(response['VersionId']) + + last_marker = None + last_version = None + paginator = client.get_paginator('list_object_versions') + for page in paginator.paginate(Bucket=bucket_name, MaxKeys=1): + next_marker = page.get('NextKeyMarker') + next_version = page.get('NextVersionIdMarker') + assert last_marker != next_marker or last_version != next_version + last_marker = next_marker + last_version = next_version + for v in page['Versions']: + all_versions.remove(v['VersionId']) # fails if already removed + + assert not all_versions, 'list_object_versions() did not visit every version' + + response = client.list_objects(Bucket=bucket_name, Marker='A', MaxKeys=1) + assert response['Contents'][0]['Key'] == 'B' + assert not response['IsTruncated'] + + response = client.list_objects_v2(Bucket=bucket_name, StartAfter='A', MaxKeys=1) + assert response['Contents'][0]['Key'] == 'B' + assert not response['IsTruncated'] + @pytest.mark.list_objects_v2 def test_bucket_listv2_startafter_not_in_list(): key_names = ['bar', 'baz', 'foo', 'quxx']