Skip to content

Commit

Permalink
Multipart chunksize (#807)
Browse files Browse the repository at this point in the history
* add multipart-chunksize parameter

* add multipart-chunksize parameter

---------

Co-authored-by: soaig <[email protected]>
Co-authored-by: Radovan <[email protected]>
  • Loading branch information
3 people authored Jan 8, 2025
1 parent 98d9b6f commit 1d86144
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 8 deletions.
7 changes: 5 additions & 2 deletions medusa/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
'StorageConfig',
['bucket_name', 'key_file', 'prefix', 'fqdn', 'host_file_separator', 'storage_provider', 'storage_class',
'base_path', 'max_backup_age', 'max_backup_count', 'api_profile', 'transfer_max_bandwidth',
'concurrent_transfers', 'multi_part_upload_threshold', 'host', 'region', 'port', 'secure', 'ssl_verify',
'aws_cli_path', 'kms_id', 'backup_grace_period_in_days', 'use_sudo_for_restore', 'k8s_mode', 'read_timeout']
'concurrent_transfers', 'multi_part_upload_threshold', 'multipart_chunksize', 'host', 'region', 'port', 'secure',
'ssl_verify', 'aws_cli_path', 'kms_id', 'backup_grace_period_in_days', 'use_sudo_for_restore', 'k8s_mode',
'read_timeout']
)

CassandraConfig = collections.namedtuple(
Expand Down Expand Up @@ -118,6 +119,8 @@ def _build_default_config():
'region': 'default',
'backup_grace_period_in_days': 10,
'use_sudo_for_restore': 'True',
'multipart_chunksize': '50MB',
'read_timeout': 60
}

config['logging'] = {
Expand Down
3 changes: 3 additions & 0 deletions medusa/storage/s3_base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def _make_connection_arguments(self, config) -> t.Dict[str, str]:
def _make_transfer_config(self, config):

transfer_max_bandwidth = config.transfer_max_bandwidth or None
multipart_chunksize = config.multipart_chunksize or None

# we hard-code this one because the parallelism is for now applied to chunking the files
transfer_config = {
Expand All @@ -196,6 +197,8 @@ def _make_transfer_config(self, config):
if transfer_max_bandwidth is not None:
transfer_config['max_bandwidth'] = AbstractStorage._human_size_to_bytes(transfer_max_bandwidth)

if multipart_chunksize is not None:
transfer_config['multipart_chunksize'] = AbstractStorage._human_size_to_bytes(multipart_chunksize)
return TransferConfig(**transfer_config)

@staticmethod
Expand Down
1 change: 1 addition & 0 deletions tests/resources/config/medusa-s3_us_west_oregon-dse.ini
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ concurrent_transfers = 16
backup_grace_period_in_days = 0
max_backup_count = 1
region = us-west-2
multipart_chunksize = 50MB

[monitoring]
monitoring_provider = local
1 change: 1 addition & 0 deletions tests/resources/config/medusa-s3_us_west_oregon.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ concurrent_transfers = 16
backup_grace_period_in_days = 0
max_backup_count = 1
region = us-west-2
multipart_chunksize = 50MB
read_timeout = 60

[monitoring]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ backup_grace_period_in_days = 0
max_backup_count = 1
kms_id = 939b70ee-a65e-46af-aecf-a20ef6a457b7
region = us-west-2
multipart_chunksize = 50MB

[monitoring]
monitoring_provider = local
18 changes: 12 additions & 6 deletions tests/storage/s3_storage_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ def test_make_s3_url(self):
'ssl_verify': 'False',
'host': None,
'port': None,
'concurrent_transfers': '1'
'concurrent_transfers': '1',
'multipart_chunksize': '5MB'
})
s3_storage = S3BaseStorage(config)
# there are no extra connection args when connecting to regular S3
Expand All @@ -244,7 +245,8 @@ def test_make_s3_url_without_secure(self):
'ssl_verify': 'False',
'host': None,
'port': None,
'concurrent_transfers': '1'
'concurrent_transfers': '1',
'multipart_chunksize': '5MB'
})
s3_storage = S3BaseStorage(config)
# again, no extra connection args when connecting to regular S3
Expand All @@ -269,7 +271,8 @@ def test_make_s3_compatible_url(self):
'ssl_verify': 'False',
'host': 's3.example.com',
'port': '443',
'concurrent_transfers': '1'
'concurrent_transfers': '1',
'multipart_chunksize': '5MB'
})
s3_storage = S3BaseStorage(config)
self.assertEqual(
Expand All @@ -292,7 +295,8 @@ def test_make_s3_compatible_url_without_secure(self):
'ssl_verify': 'False',
'host': 's3.example.com',
'port': '8080',
'concurrent_transfers': '1'
'concurrent_transfers': '1',
'multipart_chunksize': '5MB'
})
s3_storage = S3BaseStorage(config)
self.assertEqual(
Expand All @@ -314,7 +318,8 @@ def test_make_connection_arguments_without_ssl_verify(self):
'ssl_verify': 'False',
'host': 's3.example.com',
'port': '8080',
'concurrent_transfers': '1'
'concurrent_transfers': '1',
'multipart_chunksize': '5MB'
})
s3_storage = S3BaseStorage(config)
connection_args = s3_storage._make_connection_arguments(config)
Expand All @@ -334,7 +339,8 @@ def test_make_connection_arguments_with_ssl_verify(self):
'ssl_verify': 'True',
'host': 's3.example.com',
'port': '8080',
'concurrent_transfers': '1'
'concurrent_transfers': '1',
'multipart_chunksize': '5MB'
})
s3_storage = S3BaseStorage(config)
connection_args = s3_storage._make_connection_arguments(config)
Expand Down

0 comments on commit 1d86144

Please sign in to comment.