From ff3f91a6bfcba1a9999928c6e6e0b4ce68b8426a Mon Sep 17 00:00:00 2001 From: Ahmed Moustafa <35640105+aemous@users.noreply.github.com> Date: Wed, 20 Nov 2024 13:43:13 -0500 Subject: [PATCH 1/6] [v2] Add botocore config options for checksum updates. (#9084) --- awscli/botocore/args.py | 53 ++++++++++++++++++++++++- awscli/botocore/config.py | 33 ++++++++++++++++ awscli/botocore/configprovider.py | 14 ++++++- awscli/botocore/exceptions.py | 9 +++++ tests/unit/botocore/test_args.py | 64 +++++++++++++++++++++++++++++++ 5 files changed, 171 insertions(+), 2 deletions(-) diff --git a/awscli/botocore/args.py b/awscli/botocore/args.py index 7fb4721781a1..24ecb2db9236 100644 --- a/awscli/botocore/args.py +++ b/awscli/botocore/args.py @@ -36,6 +36,14 @@ # values result in a warning-level log message. USERAGENT_APPID_MAXLEN = 50 +VALID_REQUEST_CHECKSUM_CALCULATION_CONFIG = ( + "when_supported", + "when_required", +) +VALID_RESPONSE_CHECKSUM_VALIDATION_CONFIG = ( + "when_supported", + "when_required", +) class ClientArgsCreator(object): def __init__(self, event_emitter, user_agent, response_parser_factory, @@ -216,11 +224,18 @@ def compute_client_args(self, service_model, client_config, sigv4a_signing_region_set=( client_config.sigv4a_signing_region_set ), + request_checksum_calculation=( + client_config.request_checksum_calculation + ), + response_checksum_validation=( + client_config.response_checksum_validation + ), ) self._compute_retry_config(config_kwargs) self._compute_request_compression_config(config_kwargs) self._compute_user_agent_appid_config(config_kwargs) self._compute_sigv4a_signing_region_set_config(config_kwargs) + self._compute_checksum_config(config_kwargs) s3_config = self.compute_s3_config(client_config) is_s3_service = self._is_s3_service(service_name) @@ -588,4 +603,40 @@ def _compute_sigv4a_signing_region_set_config(self, config_kwargs): sigv4a_signing_region_set = self._config_store.get_config_variable( 'sigv4a_signing_region_set' ) - config_kwargs['sigv4a_signing_region_set'] = sigv4a_signing_region_set \ No newline at end of file + config_kwargs['sigv4a_signing_region_set'] = sigv4a_signing_region_set + + def _compute_checksum_config(self, config_kwargs): + self._handle_checksum_config( + config_kwargs, + config_key="request_checksum_calculation", + default_value="when_supported", + valid_options=VALID_REQUEST_CHECKSUM_CALCULATION_CONFIG, + ) + self._handle_checksum_config( + config_kwargs, + config_key="response_checksum_validation", + default_value="when_supported", + valid_options=VALID_RESPONSE_CHECKSUM_VALIDATION_CONFIG, + ) + + def _handle_checksum_config( + self, + config_kwargs, + config_key, + default_value, + valid_options, + ): + value = config_kwargs.get(config_key) + if value is None: + value = ( + self._config_store.get_config_variable(config_key) + or default_value + ) + value = value.lower() + if value not in valid_options: + raise botocore.exceptions.InvalidChecksumConfigError( + config_key=config_key, + config_value=value, + valid_options=valid_options, + ) + config_kwargs[config_key] = value \ No newline at end of file diff --git a/awscli/botocore/config.py b/awscli/botocore/config.py index 8fe5662b1fd9..ece4cb820e92 100644 --- a/awscli/botocore/config.py +++ b/awscli/botocore/config.py @@ -194,6 +194,37 @@ class Config(object): :param sigv4a_signing_region_set: A set of AWS regions to apply the signature for when using SigV4a for signing. Set to ``*`` to represent all regions. Defaults to None. + + :type request_checksum_calculation: str + :param request_checksum_calculation: Determines when a checksum will be + calculated for request payloads. Valid values are: + + * ``when_supported`` -- When set, a checksum will be calculated for + all request payloads of operations modeled with the ``httpChecksum`` + trait where ``requestChecksumRequired`` is ``true`` and/or a + ``requestAlgorithmMember`` is modeled. + + * ``when_required`` -- When set, a checksum will only be calculated + for request payloads of operations modeled with the ``httpChecksum`` + trait where ``requestChecksumRequired`` is ``true`` or where a + ``requestAlgorithmMember`` is modeled and supplied. + + Defaults to None. + + :type response_checksum_validation: str + :param response_checksum_validation: Determines when checksum validation + will be performed on response payloads. Valid values are: + + * ``when_supported`` -- When set, checksum validation is performed on + all response payloads of operations modeled with the ``httpChecksum`` + trait where ``responseAlgorithms`` is modeled, except when no modeled + checksum algorithms are supported. + + * ``when_required`` -- When set, checksum validation is not performed + on response payloads of operations unless the checksum algorithm is + supported and the ``requestValidationModeMember`` member is set to ``ENABLED``. + + Defaults to None. """ OPTION_DEFAULTS = OrderedDict([ ('region_name', None), @@ -218,6 +249,8 @@ class Config(object): ('request_min_compression_size_bytes', None), ('disable_request_compression', None), ('sigv4a_signing_region_set', None), + ('request_checksum_calculation', None), + ('response_checksum_validation', None), ]) def __init__(self, *args, **kwargs): diff --git a/awscli/botocore/configprovider.py b/awscli/botocore/configprovider.py index 63c75c27f796..86fea6bd1cdc 100644 --- a/awscli/botocore/configprovider.py +++ b/awscli/botocore/configprovider.py @@ -137,6 +137,18 @@ None, None, ), + 'request_checksum_calculation': ( + 'request_checksum_calculation', + 'AWS_REQUEST_CHECKSUM_CALCULATION', + "when_supported", + None, + ), + 'response_checksum_validation': ( + 'response_checksum_validation', + 'AWS_RESPONSE_CHECKSUM_VALIDATION', + "when_supported", + None, + ), } # A mapping for the s3 specific configuration vars. These are the configuration # vars that typically go in the s3 section of the config file. This mapping @@ -336,7 +348,7 @@ def __copy__(self): def get_config_variable(self, logical_name): """ - Retrieve the value associeated with the specified logical_name + Retrieve the value associated with the specified logical_name from the corresponding provider. If no value is found None will be returned. diff --git a/awscli/botocore/exceptions.py b/awscli/botocore/exceptions.py index 9adbf32070ef..b5c7ba3807a3 100644 --- a/awscli/botocore/exceptions.py +++ b/awscli/botocore/exceptions.py @@ -742,3 +742,12 @@ class EndpointResolutionError(EndpointProviderError): class UnknownEndpointResolutionBuiltInName(EndpointProviderError): fmt = 'Unknown builtin variable name: {name}' + + +class InvalidChecksumConfigError(BotoCoreError): + """Error when invalid value supplied for checksum config""" + + fmt = ( + 'Unsupported configuration value for {config_key}. ' + 'Expected one of {valid_options} but got {config_value}.' + ) diff --git a/tests/unit/botocore/test_args.py b/tests/unit/botocore/test_args.py index ac6ada14f51d..9a55474b942a 100644 --- a/tests/unit/botocore/test_args.py +++ b/tests/unit/botocore/test_args.py @@ -450,6 +450,70 @@ def test_bad_value_disable_request_compression(self): config = client_args['client_config'] self.assertFalse(config.disable_request_compression) + def test_checksum_default_client_config(self): + input_config = Config() + client_args = self.call_get_client_args(client_config=input_config) + config = client_args["client_config"] + self.assertEqual(config.request_checksum_calculation, "when_supported") + self.assertEqual(config.response_checksum_validation, "when_supported") + + def test_checksum_client_config(self): + input_config = Config( + request_checksum_calculation="when_required", + response_checksum_validation="when_required", + ) + client_args = self.call_get_client_args(client_config=input_config) + config = client_args['client_config'] + self.assertEqual(config.request_checksum_calculation, "when_required") + self.assertEqual(config.response_checksum_validation, "when_required") + + def test_checksum_config_store(self): + self.config_store.set_config_variable( + "request_checksum_calculation", "when_required" + ) + self.config_store.set_config_variable( + "response_checksum_validation", "when_required" + ) + config = self.call_get_client_args()['client_config'] + self.assertEqual(config.request_checksum_calculation, "when_required") + self.assertEqual(config.response_checksum_validation, "when_required") + + def test_checksum_client_config_overrides_config_store(self): + self.config_store.set_config_variable( + "request_checksum_calculation", "when_supported" + ) + self.config_store.set_config_variable( + "response_checksum_validation", "when_supported" + ) + input_config = Config( + request_checksum_calculation="when_required", + response_checksum_validation="when_required", + ) + client_args = self.call_get_client_args(client_config=input_config) + config = client_args['client_config'] + self.assertEqual(config.request_checksum_calculation, "when_required") + self.assertEqual(config.response_checksum_validation, "when_required") + + def test_request_checksum_calculation_invalid_client_config(self): + with self.assertRaises(exceptions.InvalidChecksumConfigError): + config = Config(request_checksum_calculation="invalid_config") + self.call_get_client_args(client_config=config) + self.config_store.set_config_variable( + 'request_checksum_calculation', "invalid_config" + ) + with self.assertRaises(exceptions.InvalidChecksumConfigError): + self.call_get_client_args() + + def test_response_checksum_validation_invalid_client_config(self): + with self.assertRaises(exceptions.InvalidChecksumConfigError): + config = Config(response_checksum_validation="invalid_config") + self.call_get_client_args(client_config=config) + self.config_store.set_config_variable( + 'response_checksum_validation', "invalid_config" + ) + with self.assertRaises(exceptions.InvalidChecksumConfigError): + self.call_get_client_args() + class TestEndpointResolverBuiltins(unittest.TestCase): def setUp(self): From 30374ebe97fc2720e813805c1a6834d4b16914f7 Mon Sep 17 00:00:00 2001 From: aemous Date: Wed, 6 Nov 2024 11:00:56 -0500 Subject: [PATCH 2/6] Initial progress on enabling request checksum calculation within botocore. --- awscli/botocore/handlers.py | 18 +- awscli/botocore/httpchecksum.py | 73 +-- awscli/botocore/utils.py | 27 +- .../functional/botocore/test_httpchecksum.py | 501 ++++++++++++++++++ tests/functional/botocore/test_s3.py | 127 +++-- tests/unit/botocore/test_handlers.py | 55 +- tests/unit/botocore/test_httpchecksum.py | 81 +-- 7 files changed, 756 insertions(+), 126 deletions(-) create mode 100644 tests/functional/botocore/test_httpchecksum.py diff --git a/awscli/botocore/handlers.py b/awscli/botocore/handlers.py index 8ba860957312..199366a01df1 100644 --- a/awscli/botocore/handlers.py +++ b/awscli/botocore/handlers.py @@ -60,8 +60,6 @@ from botocore.utils import ( SAFE_CHARS, ArnParser, - conditionally_calculate_checksum, - conditionally_calculate_md5, hyphenize_service_id, is_global_accesspoint, percent_encode, @@ -1151,6 +1149,17 @@ def _update_status_code(response, **kwargs): http_response.status_code = parsed_status_code +def handle_request_validation_mode_member(params, model, **kwargs): + client_config = kwargs.get("context", {}).get("client_config") + if client_config is None: + return + response_checksum_validation = client_config.response_checksum_validation + http_checksum = model.http_checksum + mode_member = http_checksum.get("requestValidationModeMember") + if mode_member and response_checksum_validation == "when_supported": + params.setdefault(mode_member, "ENABLED") + + # This is a list of (event_name, handler). # When a Session is created, everything in this list will be # automatically registered with that Session. @@ -1177,7 +1186,7 @@ def _update_status_code(response, **kwargs): ('before-parse.s3.*', handle_expires_header), ('before-parse.s3.*', _handle_200_error, REGISTER_FIRST), ('before-parameter-build', generate_idempotent_uuid), - + ('before-parameter-build', handle_request_validation_mode_member), ('before-parameter-build.s3', validate_bucket_name), ('before-parameter-build.s3', remove_bucket_from_url_paths_from_model), @@ -1205,10 +1214,7 @@ def _update_status_code(response, **kwargs): ('before-call.s3', add_expect_header), ('before-call.glacier', add_glacier_version), ('before-call.api-gateway', add_accept_header), - ('before-call.s3.PutObject', conditionally_calculate_checksum), - ('before-call.s3.UploadPart', conditionally_calculate_md5), ('before-call.s3.DeleteObjects', escape_xml_payload), - ('before-call.s3.DeleteObjects', conditionally_calculate_checksum), ('before-call.s3.PutBucketLifecycleConfiguration', escape_xml_payload), ('before-call.glacier.UploadArchive', add_glacier_checksums), ('before-call.glacier.UploadMultipartPart', add_glacier_checksums), diff --git a/awscli/botocore/httpchecksum.py b/awscli/botocore/httpchecksum.py index 4804483f7272..196237b0883c 100644 --- a/awscli/botocore/httpchecksum.py +++ b/awscli/botocore/httpchecksum.py @@ -25,15 +25,15 @@ from hashlib import sha1, sha256 from awscrt import checksums as crt_checksums +from botocore.compat import HAS_CRT, urlparse from botocore.exceptions import AwsChunkedWrapperError, FlexibleChecksumError from botocore.response import StreamingBody -from botocore.utils import ( - conditionally_calculate_md5, - determine_content_length, -) +from botocore.utils import determine_content_length, has_checksum_header logger = logging.getLogger(__name__) +DEFAULT_CHECKSUM_ALGORITHM = "CRC32" + class BaseChecksum: _CHUNK_SIZE = 1024 * 1024 @@ -246,7 +246,18 @@ def resolve_checksum_context(request, operation_model, params): def resolve_request_checksum_algorithm( request, operation_model, params, supported_algorithms=None, ): + # If the header is already set by the customer, skip calculation + if has_checksum_header(request): + return + + request_checksum_calculation = request["context"][ + "client_config" + ].request_checksum_calculation http_checksum = operation_model.http_checksum + request_checksum_required = ( + operation_model.http_checksum_required + or http_checksum.get("requestChecksumRequired") + ) algorithm_member = http_checksum.get("requestAlgorithmMember") if algorithm_member and algorithm_member in params: # If the client has opted into using flexible checksums and the @@ -259,35 +270,32 @@ def resolve_request_checksum_algorithm( raise FlexibleChecksumError( error_msg="Unsupported checksum algorithm: %s" % algorithm_name ) + elif request_checksum_required or ( + algorithm_member and request_checksum_calculation == "when_supported" + ): + algorithm_name = DEFAULT_CHECKSUM_ALGORITHM.lower() + else: + return - location_type = "header" - if operation_model.has_streaming_input: - # Operations with streaming input must support trailers. - if request["url"].startswith("https:"): - # We only support unsigned trailer checksums currently. As this - # disables payload signing we'll only use trailers over TLS. - location_type = "trailer" - - algorithm = { - "algorithm": algorithm_name, - "in": location_type, - "name": "x-amz-checksum-%s" % algorithm_name, - } + location_type = "header" + if ( + operation_model.has_streaming_input + and urlparse(request["url"]).scheme == "https" + ): + # Operations with streaming input must support trailers. + # We only support unsigned trailer checksums currently. As this + # disables payload signing we'll only use trailers over TLS. + location_type = "trailer" - if algorithm["name"] in request["headers"]: - # If the header is already set by the customer, skip calculation - return + algorithm = { + "algorithm": algorithm_name, + "in": location_type, + "name": f"x-amz-checksum-{algorithm_name}", + } - checksum_context = request["context"].get("checksum", {}) - checksum_context["request_algorithm"] = algorithm - request["context"]["checksum"] = checksum_context - elif operation_model.http_checksum_required or http_checksum.get( - "requestChecksumRequired" - ): - # Otherwise apply the old http checksum behavior via Content-MD5 - checksum_context = request["context"].get("checksum", {}) - checksum_context["request_algorithm"] = "conditional-md5" - request["context"]["checksum"] = checksum_context + checksum_context = request["context"].get("checksum", {}) + checksum_context["request_algorithm"] = algorithm + request["context"]["checksum"] = checksum_context def apply_request_checksum(request): @@ -297,10 +305,7 @@ def apply_request_checksum(request): if not algorithm: return - if algorithm == "conditional-md5": - # Special case to handle the http checksum required trait - conditionally_calculate_md5(request) - elif algorithm["in"] == "header": + if algorithm["in"] == "header": _apply_request_header_checksum(request) elif algorithm["in"] == "trailer": _apply_request_trailer_checksum(request) diff --git a/awscli/botocore/utils.py b/awscli/botocore/utils.py index 2f4e1ef4fd5e..36ac91029678 100644 --- a/awscli/botocore/utils.py +++ b/awscli/botocore/utils.py @@ -2962,6 +2962,7 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'): def calculate_md5(body, **kwargs): + """This function has been deprecated, but is kept for backwards compatibility.""" if isinstance(body, (bytes, bytearray)): binary_md5 = _calculate_md5_from_bytes(body) else: @@ -2970,11 +2971,13 @@ def calculate_md5(body, **kwargs): def _calculate_md5_from_bytes(body_bytes): + """This function has been deprecated, but is kept for backwards compatibility.""" md5 = get_md5(body_bytes) return md5.digest() def _calculate_md5_from_file(fileobj): + """This function has been deprecated, but is kept for backwards compatibility.""" start_position = fileobj.tell() md5 = get_md5() for chunk in iter(lambda: fileobj.read(1024 * 1024), b''): @@ -2990,15 +2993,16 @@ def _is_s3express_request(params): return endpoint_properties.get('backend') == 'S3Express' -def _has_checksum_header(params): +def has_checksum_header(params): + """ + Checks if a header starting with "x-amz-checksum-" is provided in a request. + This class is considered private and subject to abrupt breaking changes or + removal without prior announcement. Please do not use it directly. + """ headers = params['headers'] - # If a user provided Content-MD5 is present, - # don't try to compute a new one. - if 'Content-MD5' in headers: - return True # If a header matching the x-amz-checksum-* pattern is present, we - # assume a checksum has already been provided and an md5 is not needed + # assume a checksum has already been provided by the user. for header in headers: if CHECKSUM_HEADER_PATTERN.match(header): return True @@ -3007,12 +3011,14 @@ def _has_checksum_header(params): def conditionally_calculate_checksum(params, **kwargs): - if not _has_checksum_header(params): + """This function has been deprecated, but is kept for backwards compatibility.""" + if not has_checksum_header(params): conditionally_calculate_md5(params, **kwargs) conditionally_enable_crc32(params, **kwargs) def conditionally_enable_crc32(params, **kwargs): + """This function has been deprecated, but is kept for backwards compatibility.""" checksum_context = params.get('context', {}).get('checksum', {}) checksum_algorithm = checksum_context.get('request_algorithm') if ( @@ -3030,7 +3036,10 @@ def conditionally_enable_crc32(params, **kwargs): def conditionally_calculate_md5(params, **kwargs): - """Only add a Content-MD5 if the system supports it.""" + """ + This function has been deprecated, but is kept for backwards compatibility. + Only add a Content-MD5 if the system supports it. + """ body = params['body'] checksum_context = params.get('context', {}).get('checksum', {}) checksum_algorithm = checksum_context.get('request_algorithm') @@ -3038,7 +3047,7 @@ def conditionally_calculate_md5(params, **kwargs): # Skip for requests that will have a flexible checksum applied return - if _has_checksum_header(params): + if has_checksum_header(params): # Don't add a new header if one is already available. return diff --git a/tests/functional/botocore/test_httpchecksum.py b/tests/functional/botocore/test_httpchecksum.py new file mode 100644 index 000000000000..d7f8ac2b8f84 --- /dev/null +++ b/tests/functional/botocore/test_httpchecksum.py @@ -0,0 +1,501 @@ +# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + + +import pytest + +from botocore.compat import HAS_CRT +from botocore.exceptions import FlexibleChecksumError +from tests import ClientHTTPStubber, patch_load_service_model + +TEST_CHECKSUM_SERVICE_MODEL = { + "version": "2.0", + "documentation": "This is a test service.", + "metadata": { + "apiVersion": "2023-01-01", + "endpointPrefix": "test", + "protocol": "rest-json", + "jsonVersion": "1.1", + "serviceFullName": "Test Service", + "serviceId": "Test Service", + "signatureVersion": "v4", + "signingName": "testservice", + "uid": "testservice-2023-01-01", + }, + "operations": { + "HttpChecksumOperation": { + "name": "HttpChecksumOperation", + "http": {"method": "POST", "requestUri": "/HttpChecksumOperation"}, + "input": {"shape": "SomeInput"}, + "output": {"shape": "SomeOutput"}, + "httpChecksum": { + "requestChecksumRequired": True, + "requestAlgorithmMember": "checksumAlgorithm", + "requestValidationModeMember": "validationMode", + "responseAlgorithms": [ + "CRC32", + "CRC32C", + "CRC64NVME", + "SHA1", + "SHA256", + ], + }, + }, + "HttpChecksumStreamingOperation": { + "name": "HttpChecksumStreamingOperation", + "http": { + "method": "POST", + "requestUri": "/HttpChecksumStreamingOperation", + }, + "input": {"shape": "SomeStreamingInput"}, + "output": {"shape": "SomeStreamingOutput"}, + "httpChecksum": { + "requestChecksumRequired": True, + "requestAlgorithmMember": "checksumAlgorithm", + "requestValidationModeMember": "validationMode", + "responseAlgorithms": [ + "CRC32", + "CRC32C", + "CRC64NVME", + "SHA1", + "SHA256", + ], + }, + }, + }, + "shapes": { + "ChecksumAlgorithm": { + "type": "string", + "enum": {"CRC32", "CRC32C", "CRC64NVME", "SHA1", "SHA256"}, + "member": {"shape": "MockOpParam"}, + }, + "ValidationMode": {"type": "string", "enum": {"ENABLE"}}, + "String": {"type": "string"}, + "Blob": {"type": "blob"}, + "SomeStreamingOutput": { + "type": "structure", + "members": {"body": {"shape": "Blob", "streaming": True}}, + "payload": "body", + }, + "SomeStreamingInput": { + "type": "structure", + "required": ["body"], + "members": { + "body": { + "shape": "Blob", + "streaming": True, + }, + "validationMode": { + "shape": "ValidationMode", + "location": "header", + "locationName": "x-amz-response-validation-mode", + }, + "checksumAlgorithm": { + "shape": "ChecksumAlgorithm", + "location": "header", + "locationName": "x-amz-request-algorithm", + }, + }, + "payload": "body", + }, + "SomeInput": { + "type": "structure", + "required": ["body"], + "members": { + "body": {"shape": "String"}, + "validationMode": { + "shape": "ValidationMode", + "location": "header", + "locationName": "x-amz-response-validation-mode", + }, + "checksumAlgorithm": { + "shape": "ChecksumAlgorithm", + "location": "header", + "locationName": "x-amz-request-algorithm", + }, + }, + "payload": "body", + }, + "SomeOutput": { + "type": "structure", + }, + }, +} + +TEST_CHECKSUM_RULESET = { + "version": "1.0", + "parameters": {}, + "rules": [ + { + "conditions": [], + "type": "endpoint", + "endpoint": { + "url": "https://foo.bar", + "properties": {}, + "headers": {}, + }, + } + ], +} + + +def _request_checksum_calculation_cases(): + request_payload = "Hello world" + cases = [ + ( + "CRC32", + request_payload, + { + "x-amz-request-algorithm": "CRC32", + "x-amz-checksum-crc32": "i9aeUg==", + }, + ), + ( + "SHA1", + request_payload, + { + "x-amz-request-algorithm": "SHA1", + "x-amz-checksum-sha1": "e1AsOh9IyGCa4hLN+2Od7jlnP14=", + }, + ), + ( + "SHA256", + request_payload, + { + "x-amz-request-algorithm": "SHA256", + "x-amz-checksum-sha256": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=", + }, + ), + ] + if HAS_CRT: + cases.extend( + [ + ( + "CRC32C", + request_payload, + { + "x-amz-request-algorithm": "CRC32C", + "x-amz-checksum-crc32c": "crUfeA==", + }, + ), + ( + "CRC64NVME", + request_payload, + { + "x-amz-request-algorithm": "CRC64NVME", + "x-amz-checksum-crc64nvme": "OOJZ0D8xKts=", + }, + ), + ] + ) + return cases + + +@pytest.mark.parametrize( + "checksum_algorithm, request_payload, expected_headers", + _request_checksum_calculation_cases(), +) +def test_request_checksum_calculation( + patched_session, + monkeypatch, + checksum_algorithm, + request_payload, + expected_headers, +): + patch_load_service_model( + patched_session, + monkeypatch, + TEST_CHECKSUM_SERVICE_MODEL, + TEST_CHECKSUM_RULESET, + ) + client = patched_session.create_client( + "testservice", + region_name="us-west-2", + ) + with ClientHTTPStubber(client, strict=True) as http_stubber: + http_stubber.add_response(status=200, body=b"") + client.http_checksum_operation( + body=request_payload, checksumAlgorithm=checksum_algorithm + ) + actual_headers = http_stubber.requests[0].headers + for key, val in expected_headers.items(): + assert key in actual_headers + assert actual_headers[key].decode() == val + + +def _streaming_request_checksum_calculation_cases(): + request_payload = "Hello world" + cases = [ + ( + "CRC32", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc32", + }, + {"x-amz-checksum-crc32": "i9aeUg=="}, + ), + ( + "SHA1", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-sha1", + }, + {"x-amz-checksum-sha1": "e1AsOh9IyGCa4hLN+2Od7jlnP14="}, + ), + ( + "SHA256", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-sha256", + }, + { + "x-amz-checksum-sha256": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=" + }, + ), + ] + if HAS_CRT: + cases.extend( + [ + ( + "CRC32C", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc32c", + }, + {"x-amz-checksum-crc32c": "crUfeA=="}, + ), + ( + "CRC64NVME", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc64nvme", + }, + {"x-amz-checksum-crc64nvme": "OOJZ0D8xKts="}, + ), + ] + ) + return cases + + +@pytest.mark.parametrize( + "checksum_algorithm, request_payload, expected_headers, expected_trailers", + _streaming_request_checksum_calculation_cases(), +) +def test_streaming_request_checksum_calculation( + patched_session, + monkeypatch, + checksum_algorithm, + request_payload, + expected_headers, + expected_trailers, +): + patch_load_service_model( + patched_session, + monkeypatch, + TEST_CHECKSUM_SERVICE_MODEL, + TEST_CHECKSUM_RULESET, + ) + client = patched_session.create_client( + "testservice", + region_name="us-west-2", + ) + with ClientHTTPStubber(client, strict=True) as http_stubber: + http_stubber.add_response(status=200, body=b"") + client.http_checksum_streaming_operation( + body=request_payload, checksumAlgorithm=checksum_algorithm + ) + request = http_stubber.requests[0] + actual_headers = request.headers + for key, val in expected_headers.items(): + assert key in actual_headers + assert actual_headers[key].decode() == val + read_body = request.body.read() + for key, val in expected_trailers.items(): + assert f"{key}:{val}".encode() in read_body + + +def _response_checksum_validation_cases(): + response_payload = "Hello world" + cases = [ + ( + "CRC32", + response_payload, + {"x-amz-checksum-crc32": "i9aeUg=="}, + {"kind": "success"}, + ), + ( + "CRC32", + response_payload, + {"x-amz-checksum-crc32": "bm90LWEtY2hlY2tzdW0="}, + {"kind": "failure", "calculatedChecksum": "i9aeUg=="}, + ), + ( + "SHA1", + response_payload, + {"x-amz-checksum-sha1": "e1AsOh9IyGCa4hLN+2Od7jlnP14="}, + {"kind": "success"}, + ), + ( + "SHA1", + response_payload, + {"x-amz-checksum-sha1": "bm90LWEtY2hlY2tzdW0="}, + { + "kind": "failure", + "calculatedChecksum": "e1AsOh9IyGCa4hLN+2Od7jlnP14=", + }, + ), + ( + "SHA256", + response_payload, + { + "x-amz-checksum-sha256": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=" + }, + {"kind": "success"}, + ), + ( + "SHA256", + response_payload, + {"x-amz-checksum-sha256": "bm90LWEtY2hlY2tzdW0="}, + { + "kind": "failure", + "calculatedChecksum": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=", + }, + ), + ] + if HAS_CRT: + cases.extend( + [ + ( + "CRC32C", + response_payload, + {"x-amz-checksum-crc32c": "crUfeA=="}, + {"kind": "success"}, + ), + ( + "CRC32C", + response_payload, + {"x-amz-checksum-crc32c": "bm90LWEtY2hlY2tzdW0="}, + {"kind": "failure", "calculatedChecksum": "crUfeA=="}, + ), + ( + "CRC64NVME", + response_payload, + {"x-amz-checksum-crc64nvme": "OOJZ0D8xKts="}, + {"kind": "success"}, + ), + ( + "CRC64NVME", + response_payload, + {"x-amz-checksum-crc64nvme": "bm90LWEtY2hlY2tzdW0="}, + {"kind": "failure", "calculatedChecksum": "OOJZ0D8xKts="}, + ), + ] + ) + return cases + + +@pytest.mark.parametrize( + "checksum_algorithm, response_payload, response_headers, expected", + _response_checksum_validation_cases(), +) +def test_response_checksum_validation( + patched_session, + monkeypatch, + checksum_algorithm, + response_payload, + response_headers, + expected, +): + patch_load_service_model( + patched_session, + monkeypatch, + TEST_CHECKSUM_SERVICE_MODEL, + TEST_CHECKSUM_RULESET, + ) + client = patched_session.create_client( + "testservice", + region_name="us-west-2", + ) + with ClientHTTPStubber(client, strict=True) as http_stubber: + http_stubber.add_response( + status=200, + body=response_payload.encode(), + headers=response_headers, + ) + operation_kwargs = { + "body": response_payload, + "checksumAlgorithm": checksum_algorithm, + } + if expected["kind"] == "failure": + with pytest.raises(FlexibleChecksumError) as expected_error: + client.http_checksum_operation(**operation_kwargs) + error_msg = "Expected checksum {} did not match calculated checksum: {}".format( + response_headers[ + f'x-amz-checksum-{checksum_algorithm.lower()}' + ], + expected['calculatedChecksum'], + ) + assert str(expected_error.value) == error_msg + else: + client.http_checksum_operation(**operation_kwargs) + + +@pytest.mark.parametrize( + "checksum_algorithm, response_payload, response_headers, expected", + _response_checksum_validation_cases(), +) +def test_streaming_response_checksum_validation( + patched_session, + monkeypatch, + checksum_algorithm, + response_payload, + response_headers, + expected, +): + patch_load_service_model( + patched_session, + monkeypatch, + TEST_CHECKSUM_SERVICE_MODEL, + TEST_CHECKSUM_RULESET, + ) + client = patched_session.create_client( + "testservice", + region_name="us-west-2", + ) + with ClientHTTPStubber(client, strict=True) as http_stubber: + http_stubber.add_response( + status=200, + body=response_payload.encode(), + headers=response_headers, + ) + response = client.http_checksum_streaming_operation( + body=response_payload, + checksumAlgorithm=checksum_algorithm, + ) + if expected["kind"] == "failure": + with pytest.raises(FlexibleChecksumError) as expected_error: + response["body"].read() + error_msg = "Expected checksum {} did not match calculated checksum: {}".format( + response_headers[ + f'x-amz-checksum-{checksum_algorithm.lower()}' + ], + expected['calculatedChecksum'], + ) + assert str(expected_error.value) == error_msg + else: + response["body"].read() \ No newline at end of file diff --git a/tests/functional/botocore/test_s3.py b/tests/functional/botocore/test_s3.py index c51981c3ada2..0a9a6293e004 100644 --- a/tests/functional/botocore/test_s3.py +++ b/tests/functional/botocore/test_s3.py @@ -10,12 +10,12 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import base64 import re import pytest from dateutil.tz import tzutc +from botocore.httpchecksum import HAS_CRT, Crc32Checksum, CrtCrc32Checksum from tests import ( create_session, mock, temporary_file, unittest, BaseSessionTest, ClientHTTPStubber, FreezeTime @@ -23,7 +23,7 @@ import botocore.session from botocore.config import Config -from botocore.compat import datetime, urlsplit, parse_qs, get_md5 +from botocore.compat import datetime, urlsplit, parse_qs from botocore.exceptions import ( ParamValidationError, ClientError, UnsupportedS3ConfigurationError, @@ -1389,31 +1389,68 @@ def setUp(self): def get_sent_headers(self): return self.http_stubber.requests[0].headers - def test_content_md5_set(self): + def test_trailing_checksum_set(self): with self.http_stubber: self.client.put_object(Bucket='foo', Key='bar', Body='baz') - self.assertIn('content-md5', self.get_sent_headers()) + sent_headers = self.get_sent_headers() + self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked") + self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked") + self.assertEqual( + sent_headers["X-Amz-Trailer"], b"x-amz-checksum-crc32" + ) + self.assertEqual(sent_headers["X-Amz-Decoded-Content-Length"], b"3") + self.assertEqual( + sent_headers["x-amz-content-sha256"], + b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", + ) + body = self.http_stubber.requests[0].body.read() + self.assertIn(b"x-amz-checksum-crc32", body) + - def test_content_md5_set_empty_body(self): + def test_trailing_checksum_set_empty_body(self): with self.http_stubber: self.client.put_object(Bucket='foo', Key='bar', Body='') - self.assertIn('content-md5', self.get_sent_headers()) + sent_headers = self.get_sent_headers() + self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked") + self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked") + self.assertEqual( + sent_headers["X-Amz-Trailer"], b"x-amz-checksum-crc32" + ) + self.assertEqual(sent_headers["X-Amz-Decoded-Content-Length"], b"0") + self.assertEqual( + sent_headers["x-amz-content-sha256"], + b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", + ) + body = self.http_stubber.requests[0].body.read() + self.assertIn(b"x-amz-checksum-crc32", body) - def test_content_md5_set_empty_file(self): + def test_trailing_checksum_set_empty_file(self): with self.http_stubber: with temporary_file('rb') as f: assert f.read() == b'' self.client.put_object(Bucket='foo', Key='bar', Body=f) - self.assertIn('content-md5', self.get_sent_headers()) - - def test_content_sha256_set_if_config_value_is_true(self): - # By default, put_object() does not include an x-amz-content-sha256 - # header because it also includes a `Content-MD5` header. The - # `payload_signing_enabled` config overrides this logic and forces the - # header. - config = Config(signature_version='s3v4', s3={ - 'payload_signing_enabled': True - }) + body = self.http_stubber.requests[0].body.read() + sent_headers = self.get_sent_headers() + self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked") + self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked") + self.assertEqual( + sent_headers["X-Amz-Trailer"], b"x-amz-checksum-crc32" + ) + self.assertEqual(sent_headers["X-Amz-Decoded-Content-Length"], b"0") + self.assertEqual( + sent_headers["x-amz-content-sha256"], + b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", + ) + self.assertIn(b"x-amz-checksum-crc32", body) + + def test_content_sha256_not_set_if_config_value_is_true(self): + # By default, put_object() provides a trailing checksum and includes the + # x-amz-content-sha256 header with a value of "STREAMING-UNSIGNED-PAYLOAD-TRAILER". + # We do not support payload signing for streaming so the `payload_signing_enabled` + # config has no effect here. + config = Config( + signature_version='s3v4', + s3={'payload_signing_enabled': True}) self.client = self.session.create_client( 's3', self.region, config=config) self.http_stubber = ClientHTTPStubber(self.client) @@ -1422,12 +1459,17 @@ def test_content_sha256_set_if_config_value_is_true(self): self.client.put_object(Bucket='foo', Key='bar', Body='baz') sent_headers = self.get_sent_headers() sha_header = sent_headers.get('x-amz-content-sha256') - self.assertNotEqual(sha_header, b'UNSIGNED-PAYLOAD') + self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") def test_content_sha256_not_set_if_config_value_is_false(self): - config = Config(signature_version='s3v4', s3={ - 'payload_signing_enabled': False - }) + # By default, put_object() provides a trailing checksum and includes the + # x-amz-content-sha256 header with a value of "STREAMING-UNSIGNED-PAYLOAD-TRAILER". + # We do not support payload signing for streaming so the `payload_signing_enabled` + # config has no effect here. + config = Config( + signature_version='s3v4', + s3={'payload_signing_enabled': False}, + ) self.client = self.session.create_client( 's3', self.region, config=config) self.http_stubber = ClientHTTPStubber(self.client) @@ -1436,15 +1478,18 @@ def test_content_sha256_not_set_if_config_value_is_false(self): self.client.put_object(Bucket='foo', Key='bar', Body='baz') sent_headers = self.get_sent_headers() sha_header = sent_headers.get('x-amz-content-sha256') - self.assertEqual(sha_header, b'UNSIGNED-PAYLOAD') + self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") - def test_content_sha256_set_if_config_value_not_set_put_object(self): - # The default behavior matches payload_signing_enabled=False. For - # operations where the `Content-MD5` is present this means that - # `x-amz-content-sha256` is present but not set. + def test_content_sha256_not_set_if_config_value_not_set_put_object(self): + # By default, put_object() provides a trailing checksum and includes the + # x-amz-content-sha256 header with a value of "STREAMING-UNSIGNED-PAYLOAD-TRAILER". + # We do not support payload signing for streaming so the `payload_signing_enabled` + # config has no effect here. config = Config(signature_version='s3v4') self.client = self.session.create_client( - 's3', self.region, config=config + "s3", + self.region, + config=config, ) self.http_stubber = ClientHTTPStubber(self.client) self.http_stubber.add_response() @@ -1452,7 +1497,7 @@ def test_content_sha256_set_if_config_value_not_set_put_object(self): self.client.put_object(Bucket='foo', Key='bar', Body='baz') sent_headers = self.get_sent_headers() sha_header = sent_headers.get('x-amz-content-sha256') - self.assertEqual(sha_header, b'UNSIGNED-PAYLOAD') + self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") def test_content_sha256_set_if_config_value_not_set_list_objects(self): # The default behavior matches payload_signing_enabled=False. For @@ -1488,16 +1533,6 @@ def test_content_sha256_set_s3_on_outpost(self): sha_header = sent_headers.get('x-amz-content-sha256') self.assertNotEqual(sha_header, b'UNSIGNED-PAYLOAD') - def test_content_sha256_set_if_md5_is_unavailable(self): - with mock.patch('botocore.auth.MD5_AVAILABLE', False): - with mock.patch('botocore.utils.MD5_AVAILABLE', False): - with self.http_stubber: - self.client.put_object(Bucket='foo', Key='bar', Body='baz') - sent_headers = self.get_sent_headers() - unsigned = 'UNSIGNED-PAYLOAD' - self.assertNotEqual(sent_headers['x-amz-content-sha256'], unsigned) - self.assertNotIn('content-md5', sent_headers) - class TestCanSendIntegerHeaders(BaseSessionTest): @@ -2027,7 +2062,7 @@ def test_checksums_included_in_expected_operations(operation, operation_kwargs): stub.add_response() call = getattr(client, operation) call(**operation_kwargs) - assert 'Content-MD5' in stub.requests[-1].headers + assert "x-amz-checksum-crc32" in stub.requests[-1].headers @pytest.mark.parametrize( @@ -2992,10 +3027,12 @@ def test_can_provide_request_payer_get_tagging(self): class TestS3XMLPayloadEscape(BaseS3OperationTest): - def assert_correct_content_md5(self, request): - content_md5_bytes = get_md5(request.body).digest() - content_md5 = base64.b64encode(content_md5_bytes) - self.assertEqual(content_md5, request.headers['Content-MD5']) + def assert_correct_crc32_checksum(self, request): + checksum = CrtCrc32Checksum() if HAS_CRT else Crc32Checksum() + crc32_checksum = checksum.handle(request.body).encode() + self.assertEqual( + crc32_checksum, request.headers["x-amz-checksum-crc32"] + ) def test_escape_keys_in_xml_delete_objects(self): self.http_stubber.add_response() @@ -3009,7 +3046,7 @@ def test_escape_keys_in_xml_delete_objects(self): request = self.http_stubber.requests[0] self.assertNotIn(b'\r\n\r', request.body) self.assertIn(b' ', request.body) - self.assert_correct_content_md5(request) + self.assert_correct_crc32_checksum(request) def test_escape_keys_in_xml_put_bucket_lifecycle_configuration(self): self.http_stubber.add_response() @@ -3026,7 +3063,7 @@ def test_escape_keys_in_xml_put_bucket_lifecycle_configuration(self): request = self.http_stubber.requests[0] self.assertNotIn(b'my\r\n\rprefix', request.body) self.assertIn(b'my prefix', request.body) - self.assert_correct_content_md5(request) + self.assert_correct_crc32_checksum(request) @pytest.mark.parametrize( diff --git a/tests/unit/botocore/test_handlers.py b/tests/unit/botocore/test_handlers.py index 564abdbf0ad2..7008fe03b319 100644 --- a/tests/unit/botocore/test_handlers.py +++ b/tests/unit/botocore/test_handlers.py @@ -1698,4 +1698,57 @@ def test_document_response_params_without_expires(document_expires_mocks): ) mocks['section'].get_section.assert_not_called() mocks['param_section'].add_new_section.assert_not_called() - mocks['doc_section'].write.assert_not_called() \ No newline at end of file + mocks['doc_section'].write.assert_not_called() + + +@pytest.fixture() +def checksum_operation_model(): + operation_model = mock.Mock(spec=OperationModel) + operation_model.http_checksum = { + "requestValidationModeMember": "ChecksumMode", + } + return operation_model + + +def create_checksum_context( + request_checksum_calculation="when_supported", + response_checksum_validation="when_supported", +): + context = { + "client_config": Config( + request_checksum_calculation=request_checksum_calculation, + response_checksum_validation=response_checksum_validation, + ) + } + return context + + +def test_request_validation_mode_member_default(checksum_operation_model): + params = {} + handlers.handle_request_validation_mode_member( + params, checksum_operation_model, context=create_checksum_context() + ) + assert params["ChecksumMode"] == "ENABLED" + + +def test_request_validation_mode_member_when_required( + checksum_operation_model, +): + params = {} + context = create_checksum_context( + response_checksum_validation="when_required" + ) + handlers.handle_request_validation_mode_member( + params, checksum_operation_model, context=context + ) + assert "ChecksumMode" not in params + + +def test_request_validation_mode_member_is_not_enabled( + checksum_operation_model, +): + params = {"ChecksumMode": "FAKE_VALUE"} + handlers.handle_request_validation_mode_member( + params, checksum_operation_model, context=create_checksum_context() + ) + assert params["ChecksumMode"] == "FAKE_VALUE" \ No newline at end of file diff --git a/tests/unit/botocore/test_httpchecksum.py b/tests/unit/botocore/test_httpchecksum.py index 1df140242d84..d6b7a1a82b08 100644 --- a/tests/unit/botocore/test_httpchecksum.py +++ b/tests/unit/botocore/test_httpchecksum.py @@ -17,6 +17,7 @@ from botocore.awsrequest import AWSResponse from botocore.model import OperationModel +from botocore.config import Config from botocore.exceptions import AwsChunkedWrapperError from botocore.exceptions import FlexibleChecksumError from botocore.httpchecksum import AwsChunkedWrapper @@ -83,7 +84,11 @@ def _build_request(self, body): request = { "headers": {}, "body": body, - "context": {}, + "context": { + "client_config": Config( + request_checksum_calculation="when_supported", + ) + }, "url": "https://example.com", } return request @@ -95,25 +100,31 @@ def test_request_checksum_algorithm_no_model(self): resolve_request_checksum_algorithm(request, operation_model, params) self.assertNotIn("checksum", request["context"]) - def test_request_checksum_algorithm_model_opt_in(self): + def test_request_checksum_algorithm_model_default(self): operation_model = self._make_operation_model( http_checksum={"requestAlgorithmMember": "Algorithm"} ) - # Param is not present, no checksum will be set + # Param is not present, crc32 checksum will be set by default params = {} request = self._build_request(b"") resolve_request_checksum_algorithm(request, operation_model, params) - self.assertNotIn("checksum", request["context"]) + expected_algorithm = { + "algorithm": "crc32", + "in": "header", + "name": "x-amz-checksum-crc32", + } + actual_algorithm = request["context"]["checksum"]["request_algorithm"] + self.assertEqual(actual_algorithm, expected_algorithm) - # Param is present, crc32 checksum will be set - params = {"Algorithm": "crc32"} + # Param is present, sha256 checksum will be set + params = {"Algorithm": "sha256"} request = self._build_request(b"") resolve_request_checksum_algorithm(request, operation_model, params) expected_algorithm = { - "algorithm": "crc32", + "algorithm": "sha256", "in": "header", - "name": "x-amz-checksum-crc32", + "name": "x-amz-checksum-sha256", } actual_algorithm = request["context"]["checksum"]["request_algorithm"] self.assertEqual(actual_algorithm, expected_algorithm) @@ -125,21 +136,16 @@ def test_request_checksum_algorithm_model_opt_in(self): resolve_request_checksum_algorithm(request, operation_model, params) self.assertNotIn("checksum", request["context"]) - def test_request_checksum_algorithm_model_opt_in_streaming(self): + def test_request_checksum_algorithm_model_default_streaming(self): request = self._build_request(b"") operation_model = self._make_operation_model( http_checksum={"requestAlgorithmMember": "Algorithm"}, streaming_input=True, ) - # Param is not present, no checksum will be set + # Param is not present, crc32 checksum will be set params = {} resolve_request_checksum_algorithm(request, operation_model, params) - self.assertNotIn("checksum", request["context"]) - - # Param is present, crc32 checksum will be set in the trailer - params = {"Algorithm": "crc32"} - resolve_request_checksum_algorithm(request, operation_model, params) expected_algorithm = { "algorithm": "crc32", "in": "trailer", @@ -148,14 +154,25 @@ def test_request_checksum_algorithm_model_opt_in_streaming(self): actual_algorithm = request["context"]["checksum"]["request_algorithm"] self.assertEqual(actual_algorithm, expected_algorithm) + # Param is present, sha256 checksum will be set in the trailer + params = {"Algorithm": "sha256"} + resolve_request_checksum_algorithm(request, operation_model, params) + expected_algorithm = { + "algorithm": "sha256", + "in": "trailer", + "name": "x-amz-checksum-sha256", + } + actual_algorithm = request["context"]["checksum"]["request_algorithm"] + self.assertEqual(actual_algorithm, expected_algorithm) + # Trailer should not be used for http endpoints request = self._build_request(b"") request["url"] = "http://example.com" resolve_request_checksum_algorithm(request, operation_model, params) expected_algorithm = { - "algorithm": "crc32", + "algorithm": "sha256", "in": "header", - "name": "x-amz-checksum-crc32", + "name": "x-amz-checksum-sha256", } actual_algorithm = request["context"]["checksum"]["request_algorithm"] self.assertEqual(actual_algorithm, expected_algorithm) @@ -172,17 +189,21 @@ def test_request_checksum_algorithm_model_unsupported_algorithm(self): request, operation_model, params, supported_algorithms=[] ) - def test_request_checksum_algorithm_model_legacy_md5(self): + def test_request_checksum_algorithm_model_legacy_crc32(self): request = self._build_request(b"") operation_model = self._make_operation_model(required=True) params = {} resolve_request_checksum_algorithm(request, operation_model, params) + expected_algorithm = { + "algorithm": "crc32", + "in": "header", + "name": "x-amz-checksum-crc32", + } actual_algorithm = request["context"]["checksum"]["request_algorithm"] - expected_algorithm = "conditional-md5" self.assertEqual(actual_algorithm, expected_algorithm) - def test_request_checksum_algorithm_model_new_md5(self): + def test_request_checksum_algorithm_model_new_crc32(self): request = self._build_request(b"") operation_model = self._make_operation_model( http_checksum={"requestChecksumRequired": True} @@ -191,7 +212,11 @@ def test_request_checksum_algorithm_model_new_md5(self): resolve_request_checksum_algorithm(request, operation_model, params) actual_algorithm = request["context"]["checksum"]["request_algorithm"] - expected_algorithm = "conditional-md5" + expected_algorithm = { + "algorithm": "crc32", + "in": "header", + "name": "x-amz-checksum-crc32", + } self.assertEqual(actual_algorithm, expected_algorithm) def test_apply_request_checksum_handles_no_checksum_context(self): @@ -199,7 +224,9 @@ def test_apply_request_checksum_handles_no_checksum_context(self): apply_request_checksum(request) # Build another request and assert the original request is the same expected_request = self._build_request(b"") - self.assertEqual(request, expected_request) + self.assertEqual(request["headers"], expected_request["headers"]) + self.assertEqual(request["body"], expected_request["body"]) + self.assertEqual(request["url"], expected_request["url"]) def test_apply_request_checksum_handles_invalid_context(self): request = self._build_request(b"") @@ -213,14 +240,6 @@ def test_apply_request_checksum_handles_invalid_context(self): with self.assertRaises(FlexibleChecksumError): apply_request_checksum(request) - def test_apply_request_checksum_conditional_md5(self): - request = self._build_request(b"") - request["context"]["checksum"] = { - "request_algorithm": "conditional-md5" - } - apply_request_checksum(request) - self.assertIn("Content-MD5", request["headers"]) - def test_apply_request_checksum_flex_header_bytes(self): request = self._build_request(b"") request["context"]["checksum"] = { @@ -336,7 +355,7 @@ def test_response_checksum_algorithm_no_model(self): resolve_response_checksum_algorithms(request, operation_model, params) self.assertNotIn("checksum", request["context"]) - def test_response_checksum_algorithm_model_opt_in(self): + def test_response_checksum_algorithm_model_default(self): request = self._build_request(b"") operation_model = self._make_operation_model( http_checksum={ From 173eea15cb94289b0a02d9aae53774dc26f27b2e Mon Sep 17 00:00:00 2001 From: aemous Date: Mon, 18 Nov 2024 10:49:06 -0500 Subject: [PATCH 3/6] More progress on checksum request/response calulation. --- awscli/botocore/utils.py | 2 ++ tests/functional/botocore/test_s3.py | 12 +++++++++--- tests/unit/botocore/test_httpchecksum.py | 25 ++++++++++++++++++++---- tests/utils/botocore/__init__.py | 22 +++++++++++++++++++-- 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/awscli/botocore/utils.py b/awscli/botocore/utils.py index 36ac91029678..c48ae7d82a2b 100644 --- a/awscli/botocore/utils.py +++ b/awscli/botocore/utils.py @@ -2996,6 +2996,7 @@ def _is_s3express_request(params): def has_checksum_header(params): """ Checks if a header starting with "x-amz-checksum-" is provided in a request. + This class is considered private and subject to abrupt breaking changes or removal without prior announcement. Please do not use it directly. """ @@ -3038,6 +3039,7 @@ def conditionally_enable_crc32(params, **kwargs): def conditionally_calculate_md5(params, **kwargs): """ This function has been deprecated, but is kept for backwards compatibility. + Only add a Content-MD5 if the system supports it. """ body = params['body'] diff --git a/tests/functional/botocore/test_s3.py b/tests/functional/botocore/test_s3.py index 0a9a6293e004..c01b2a888fda 100644 --- a/tests/functional/botocore/test_s3.py +++ b/tests/functional/botocore/test_s3.py @@ -17,8 +17,14 @@ from botocore.httpchecksum import HAS_CRT, Crc32Checksum, CrtCrc32Checksum from tests import ( - create_session, mock, temporary_file, unittest, - BaseSessionTest, ClientHTTPStubber, FreezeTime + BaseSessionTest, + ClientHTTPStubber, + FreezeTime, + create_session, + get_checksum_cls, + mock, + temporary_file, unittest, + ) import botocore.session @@ -3028,7 +3034,7 @@ def test_can_provide_request_payer_get_tagging(self): class TestS3XMLPayloadEscape(BaseS3OperationTest): def assert_correct_crc32_checksum(self, request): - checksum = CrtCrc32Checksum() if HAS_CRT else Crc32Checksum() + checksum = get_checksum_cls()() crc32_checksum = checksum.handle(request.body).encode() self.assertEqual( crc32_checksum, request.headers["x-amz-checksum-crc32"] diff --git a/tests/unit/botocore/test_httpchecksum.py b/tests/unit/botocore/test_httpchecksum.py index d6b7a1a82b08..ddb7f491c1f3 100644 --- a/tests/unit/botocore/test_httpchecksum.py +++ b/tests/unit/botocore/test_httpchecksum.py @@ -14,6 +14,7 @@ from io import BytesIO from tests import mock +from tests.utils.botocore import get_checksum_cls, requires_crt from botocore.awsrequest import AWSResponse from botocore.model import OperationModel @@ -27,14 +28,13 @@ Sha256Checksum, CrtCrc64NvmeChecksum, Sha1Checksum, - CrtCrc32Checksum, CrtCrc32cChecksum, -) -from botocore.httpchecksum import ( + CrtCrc32Checksum, + CrtCrc64NvmeChecksum, apply_request_checksum, + handle_checksum_body, resolve_request_checksum_algorithm, resolve_response_checksum_algorithms, - handle_checksum_body, ) @@ -642,6 +642,23 @@ def test_crt_crc64nvme(self): self.assert_base64_checksum(CrtCrc64NvmeChecksum, "jSnVw/bqjr4=") +class TestCrtChecksumOverrides(unittest.TestCase): + @requires_crt() + def test_crt_crc32_available(self): + actual_cls = get_checksum_cls("crc32") + self.assertEqual(actual_cls, CrtCrc32Checksum) + + @requires_crt() + def test_crt_crc32c_available(self): + actual_cls = get_checksum_cls("crc32c") + self.assertEqual(actual_cls, CrtCrc32cChecksum) + + @requires_crt() + def test_crt_crc64nvme_available(self): + actual_cls = get_checksum_cls("crc64nvme") + self.assertEqual(actual_cls, CrtCrc64NvmeChecksum) + + class TestStreamingChecksumBody(unittest.TestCase): def setUp(self): self.raw_bytes = b"hello world" diff --git a/tests/utils/botocore/__init__.py b/tests/utils/botocore/__init__.py index 106736f3e7b7..eb023efb77d4 100644 --- a/tests/utils/botocore/__init__.py +++ b/tests/utils/botocore/__init__.py @@ -39,10 +39,10 @@ import botocore.loaders import botocore.session from botocore.awsrequest import AWSResponse -from botocore.compat import urlparse -from botocore.compat import parse_qs +from botocore.compat import HAS_CRT, parse_qs, urlparse from botocore import utils from botocore import credentials +from botocore.httpchecksum import _CHECKSUM_CLS, DEFAULT_CHECKSUM_ALGORITHM from botocore.stub import Stubber @@ -84,6 +84,14 @@ def decorator(func): platform.system() not in ['Darwin', 'Linux'], reason)(func) return decorator +def requires_crt(reason=None): + if reason is None: + reason = "Test requires awscrt to be installed" + + def decorator(func): + return unittest.skipIf(not HAS_CRT, reason)(func) + + return decorator def random_chars(num_chars): """Returns random hex characters. @@ -590,3 +598,13 @@ def mock_load_service_model(service_name, type_name, api_version=None): loader = session.get_component('data_loader') monkeypatch.setattr(loader, 'load_service_model', mock_load_service_model) + + +def get_checksum_cls(algorithm=DEFAULT_CHECKSUM_ALGORITHM.lower()): + """ + This pass through is grabbing our internally supported list of checksums + to ensure we stay in sync, while not exposing them publicly. + + Returns the default checksum algorithm class if none is specified. + """ + return _CHECKSUM_CLS[algorithm] \ No newline at end of file From ca3ce0416e77e81e09361658059be3c7d7827bbc Mon Sep 17 00:00:00 2001 From: aemous Date: Mon, 18 Nov 2024 16:38:44 -0500 Subject: [PATCH 4/6] Update ports based on crc64nvme updates --- awscli/botocore/httpchecksum.py | 2 +- .../functional/botocore/test_httpchecksum.py | 141 ++++++++---------- tests/functional/botocore/test_s3.py | 7 +- tests/unit/botocore/test_httpchecksum.py | 19 +-- tests/utils/botocore/__init__.py | 11 +- 5 files changed, 66 insertions(+), 114 deletions(-) diff --git a/awscli/botocore/httpchecksum.py b/awscli/botocore/httpchecksum.py index 196237b0883c..8b20f76d6b73 100644 --- a/awscli/botocore/httpchecksum.py +++ b/awscli/botocore/httpchecksum.py @@ -25,7 +25,7 @@ from hashlib import sha1, sha256 from awscrt import checksums as crt_checksums -from botocore.compat import HAS_CRT, urlparse +from botocore.compat import urlparse from botocore.exceptions import AwsChunkedWrapperError, FlexibleChecksumError from botocore.response import StreamingBody from botocore.utils import determine_content_length, has_checksum_header diff --git a/tests/functional/botocore/test_httpchecksum.py b/tests/functional/botocore/test_httpchecksum.py index d7f8ac2b8f84..5b56da39b5bb 100644 --- a/tests/functional/botocore/test_httpchecksum.py +++ b/tests/functional/botocore/test_httpchecksum.py @@ -14,7 +14,6 @@ import pytest -from botocore.compat import HAS_CRT from botocore.exceptions import FlexibleChecksumError from tests import ClientHTTPStubber, patch_load_service_model @@ -151,7 +150,7 @@ def _request_checksum_calculation_cases(): request_payload = "Hello world" - cases = [ + return [ ( "CRC32", request_payload, @@ -176,29 +175,23 @@ def _request_checksum_calculation_cases(): "x-amz-checksum-sha256": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=", }, ), + ( + "CRC32C", + request_payload, + { + "x-amz-request-algorithm": "CRC32C", + "x-amz-checksum-crc32c": "crUfeA==", + }, + ), + ( + "CRC64NVME", + request_payload, + { + "x-amz-request-algorithm": "CRC64NVME", + "x-amz-checksum-crc64nvme": "OOJZ0D8xKts=", + }, + ), ] - if HAS_CRT: - cases.extend( - [ - ( - "CRC32C", - request_payload, - { - "x-amz-request-algorithm": "CRC32C", - "x-amz-checksum-crc32c": "crUfeA==", - }, - ), - ( - "CRC64NVME", - request_payload, - { - "x-amz-request-algorithm": "CRC64NVME", - "x-amz-checksum-crc64nvme": "OOJZ0D8xKts=", - }, - ), - ] - ) - return cases @pytest.mark.parametrize( @@ -235,7 +228,7 @@ def test_request_checksum_calculation( def _streaming_request_checksum_calculation_cases(): request_payload = "Hello world" - cases = [ + return [ ( "CRC32", request_payload, @@ -265,31 +258,25 @@ def _streaming_request_checksum_calculation_cases(): "x-amz-checksum-sha256": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=" }, ), + ( + "CRC32C", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc32c", + }, + {"x-amz-checksum-crc32c": "crUfeA=="}, + ), + ( + "CRC64NVME", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc64nvme", + }, + {"x-amz-checksum-crc64nvme": "OOJZ0D8xKts="}, + ), ] - if HAS_CRT: - cases.extend( - [ - ( - "CRC32C", - request_payload, - { - "content-encoding": "aws-chunked", - "x-amz-trailer": "x-amz-checksum-crc32c", - }, - {"x-amz-checksum-crc32c": "crUfeA=="}, - ), - ( - "CRC64NVME", - request_payload, - { - "content-encoding": "aws-chunked", - "x-amz-trailer": "x-amz-checksum-crc64nvme", - }, - {"x-amz-checksum-crc64nvme": "OOJZ0D8xKts="}, - ), - ] - ) - return cases @pytest.mark.parametrize( @@ -331,7 +318,7 @@ def test_streaming_request_checksum_calculation( def _response_checksum_validation_cases(): response_payload = "Hello world" - cases = [ + return [ ( "CRC32", response_payload, @@ -376,37 +363,31 @@ def _response_checksum_validation_cases(): "calculatedChecksum": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=", }, ), + ( + "CRC32C", + response_payload, + {"x-amz-checksum-crc32c": "crUfeA=="}, + {"kind": "success"}, + ), + ( + "CRC32C", + response_payload, + {"x-amz-checksum-crc32c": "bm90LWEtY2hlY2tzdW0="}, + {"kind": "failure", "calculatedChecksum": "crUfeA=="}, + ), + ( + "CRC64NVME", + response_payload, + {"x-amz-checksum-crc64nvme": "OOJZ0D8xKts="}, + {"kind": "success"}, + ), + ( + "CRC64NVME", + response_payload, + {"x-amz-checksum-crc64nvme": "bm90LWEtY2hlY2tzdW0="}, + {"kind": "failure", "calculatedChecksum": "OOJZ0D8xKts="}, + ), ] - if HAS_CRT: - cases.extend( - [ - ( - "CRC32C", - response_payload, - {"x-amz-checksum-crc32c": "crUfeA=="}, - {"kind": "success"}, - ), - ( - "CRC32C", - response_payload, - {"x-amz-checksum-crc32c": "bm90LWEtY2hlY2tzdW0="}, - {"kind": "failure", "calculatedChecksum": "crUfeA=="}, - ), - ( - "CRC64NVME", - response_payload, - {"x-amz-checksum-crc64nvme": "OOJZ0D8xKts="}, - {"kind": "success"}, - ), - ( - "CRC64NVME", - response_payload, - {"x-amz-checksum-crc64nvme": "bm90LWEtY2hlY2tzdW0="}, - {"kind": "failure", "calculatedChecksum": "OOJZ0D8xKts="}, - ), - ] - ) - return cases @pytest.mark.parametrize( diff --git a/tests/functional/botocore/test_s3.py b/tests/functional/botocore/test_s3.py index c01b2a888fda..c369803b3a85 100644 --- a/tests/functional/botocore/test_s3.py +++ b/tests/functional/botocore/test_s3.py @@ -15,21 +15,19 @@ import pytest from dateutil.tz import tzutc -from botocore.httpchecksum import HAS_CRT, Crc32Checksum, CrtCrc32Checksum from tests import ( BaseSessionTest, ClientHTTPStubber, FreezeTime, create_session, - get_checksum_cls, mock, temporary_file, unittest, - ) +from tests.utils.botocore import get_checksum_cls import botocore.session from botocore.config import Config -from botocore.compat import datetime, urlsplit, parse_qs +from botocore.compat import datetime, parse_qs, urlsplit from botocore.exceptions import ( ParamValidationError, ClientError, UnsupportedS3ConfigurationError, @@ -1412,7 +1410,6 @@ def test_trailing_checksum_set(self): body = self.http_stubber.requests[0].body.read() self.assertIn(b"x-amz-checksum-crc32", body) - def test_trailing_checksum_set_empty_body(self): with self.http_stubber: self.client.put_object(Bucket='foo', Key='bar', Body='') diff --git a/tests/unit/botocore/test_httpchecksum.py b/tests/unit/botocore/test_httpchecksum.py index ddb7f491c1f3..2b999fad28cf 100644 --- a/tests/unit/botocore/test_httpchecksum.py +++ b/tests/unit/botocore/test_httpchecksum.py @@ -14,7 +14,7 @@ from io import BytesIO from tests import mock -from tests.utils.botocore import get_checksum_cls, requires_crt +from tests.utils.botocore import get_checksum_cls from botocore.awsrequest import AWSResponse from botocore.model import OperationModel @@ -642,23 +642,6 @@ def test_crt_crc64nvme(self): self.assert_base64_checksum(CrtCrc64NvmeChecksum, "jSnVw/bqjr4=") -class TestCrtChecksumOverrides(unittest.TestCase): - @requires_crt() - def test_crt_crc32_available(self): - actual_cls = get_checksum_cls("crc32") - self.assertEqual(actual_cls, CrtCrc32Checksum) - - @requires_crt() - def test_crt_crc32c_available(self): - actual_cls = get_checksum_cls("crc32c") - self.assertEqual(actual_cls, CrtCrc32cChecksum) - - @requires_crt() - def test_crt_crc64nvme_available(self): - actual_cls = get_checksum_cls("crc64nvme") - self.assertEqual(actual_cls, CrtCrc64NvmeChecksum) - - class TestStreamingChecksumBody(unittest.TestCase): def setUp(self): self.raw_bytes = b"hello world" diff --git a/tests/utils/botocore/__init__.py b/tests/utils/botocore/__init__.py index eb023efb77d4..3c85cd0b84f6 100644 --- a/tests/utils/botocore/__init__.py +++ b/tests/utils/botocore/__init__.py @@ -39,7 +39,7 @@ import botocore.loaders import botocore.session from botocore.awsrequest import AWSResponse -from botocore.compat import HAS_CRT, parse_qs, urlparse +from botocore.compat import parse_qs, urlparse from botocore import utils from botocore import credentials from botocore.httpchecksum import _CHECKSUM_CLS, DEFAULT_CHECKSUM_ALGORITHM @@ -84,15 +84,6 @@ def decorator(func): platform.system() not in ['Darwin', 'Linux'], reason)(func) return decorator -def requires_crt(reason=None): - if reason is None: - reason = "Test requires awscrt to be installed" - - def decorator(func): - return unittest.skipIf(not HAS_CRT, reason)(func) - - return decorator - def random_chars(num_chars): """Returns random hex characters. From 7d7265a36b92274f2e785bc1150709da8bb0e97f Mon Sep 17 00:00:00 2001 From: aemous Date: Tue, 19 Nov 2024 11:56:51 -0500 Subject: [PATCH 5/6] Fix failing tests --- tests/functional/s3/test_cp_command.py | 2 +- tests/functional/s3api/test_get_object.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/functional/s3/test_cp_command.py b/tests/functional/s3/test_cp_command.py index 683a426aad1c..13661d84d2bc 100644 --- a/tests/functional/s3/test_cp_command.py +++ b/tests/functional/s3/test_cp_command.py @@ -2207,7 +2207,7 @@ def test_streaming_upload_using_crt_client(self): expected_type=S3RequestType.PUT_OBJECT, expected_host=self.get_virtual_s3_host('bucket'), expected_path='/key', - expected_body_content=b'foo', + expected_body_content=b'3\r\nfoo\r\n0\r\nx-amz-checksum-crc32:jHNlIQ==\r\n\r\n', ) def test_streaming_download_using_crt_client(self): diff --git a/tests/functional/s3api/test_get_object.py b/tests/functional/s3api/test_get_object.py index f5be0844600b..462934264d5c 100644 --- a/tests/functional/s3api/test_get_object.py +++ b/tests/functional/s3api/test_get_object.py @@ -38,6 +38,7 @@ def test_simple(self): cmdline += ' outfile' self.addCleanup(self.remove_file_if_exists, 'outfile') self.assert_params_for_cmd(cmdline, {'Bucket': 'mybucket', + 'ChecksumMode': 'ENABLED', 'Key': 'mykey'}) def test_range(self): @@ -48,6 +49,7 @@ def test_range(self): cmdline += ' outfile' self.addCleanup(self.remove_file_if_exists, 'outfile') self.assert_params_for_cmd(cmdline, {'Bucket': 'mybucket', + 'ChecksumMode': 'ENABLED', 'Key': 'mykey', 'Range': 'bytes=0-499'}) @@ -61,7 +63,9 @@ def test_response_headers(self): self.addCleanup(self.remove_file_if_exists, 'outfile') self.assert_params_for_cmd( cmdline, { - 'Bucket': 'mybucket', 'Key': 'mykey', + 'Bucket': 'mybucket', + 'ChecksumMode': 'ENABLED', + 'Key': 'mykey', 'ResponseCacheControl': 'No-cache', 'ResponseContentEncoding': 'x-gzip' } @@ -83,7 +87,7 @@ def test_streaming_output_arg_with_error_response(self): cmdline += ' outfile' self.addCleanup(self.remove_file_if_exists, 'outfile') self.assert_params_for_cmd( - cmdline, {'Bucket': 'mybucket', 'Key': 'mykey'}) + cmdline, {'Bucket': 'mybucket', 'ChecksumMode': 'ENABLED', 'Key': 'mykey'}) if __name__ == "__main__": From a22536a60b367bdda082986b4f9b5a387530c467 Mon Sep 17 00:00:00 2001 From: aemous Date: Wed, 20 Nov 2024 10:30:27 -0500 Subject: [PATCH 6/6] Add back removed newlines. --- tests/utils/botocore/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils/botocore/__init__.py b/tests/utils/botocore/__init__.py index 3c85cd0b84f6..07714439685a 100644 --- a/tests/utils/botocore/__init__.py +++ b/tests/utils/botocore/__init__.py @@ -84,6 +84,7 @@ def decorator(func): platform.system() not in ['Darwin', 'Linux'], reason)(func) return decorator + def random_chars(num_chars): """Returns random hex characters.