From d589fdefcee051fc04c36fe1e6497e5b62f93424 Mon Sep 17 00:00:00 2001 From: majormark Date: Fri, 8 Nov 2024 19:06:20 +0100 Subject: [PATCH 1/4] boto glue standard retry policy configurable max retry --- mkdocs/docs/configuration.md | 19 ++++++++++--------- pyiceberg/catalog/__init__.py | 1 + pyiceberg/catalog/glue.py | 16 +++++++++++++++- tests/catalog/test_glue.py | 1 - 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 2404f28b30..44b36e357a 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -331,16 +331,17 @@ catalog: -| Key | Example | Description | -| ---------------------- | ------------------------------------ | ------------------------------------------------------------------------------- | -| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog | -| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true | +| Key | Example | Description | +|------------------------|----------------------------------------|---------------------------------------------------------------------------------| +| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog | +| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true | | glue.endpoint | | Configure an alternative endpoint of the Glue service for GlueCatalog to access | -| glue.profile-name | default | Configure the static profile used to access the Glue Catalog | -| glue.region | us-east-1 | Set the region of the Glue Catalog | -| glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog | -| glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog | -| glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog | +| glue.profile-name | default | Configure the static profile used to access the Glue Catalog | +| glue.region | us-east-1 | Set the region of the Glue Catalog | +| glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog | +| glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog | +| glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog | +| glue.max-retries | 5 | Configure the maximum number of retries for the Glue service calls | diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index b189b4094d..0cfdfb4b6a 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -94,6 +94,7 @@ URI = "uri" LOCATION = "location" EXTERNAL_TABLE = "EXTERNAL_TABLE" +MAX_RETRIES = 10 TABLE_METADATA_FILE_NAME_REGEX = re.compile( r""" diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 5742173fa6..e6da0dc359 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -29,6 +29,7 @@ ) import boto3 +from botocore.config import Config from mypy_boto3_glue.client import GlueClient from mypy_boto3_glue.type_defs import ( ColumnTypeDef, @@ -44,6 +45,7 @@ EXTERNAL_TABLE, ICEBERG, LOCATION, + MAX_RETRIES, METADATA_LOCATION, PREVIOUS_METADATA_LOCATION, TABLE_TYPE, @@ -128,6 +130,7 @@ GLUE_ACCESS_KEY_ID = "glue.access-key-id" GLUE_SECRET_ACCESS_KEY = "glue.secret-access-key" GLUE_SESSION_TOKEN = "glue.session-token" +GLUE_MAX_RETRIES = "glue.max-retries" def _construct_parameters( @@ -305,7 +308,18 @@ def __init__(self, name: str, **properties: Any): aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), ) - self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT)) + self.glue: GlueClient = session.client( + "glue", + endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT), + config=Config( + retries={ + "max_attempts": get_first_property_value(properties, GLUE_MAX_RETRIES) + if get_first_property_value(properties, GLUE_MAX_RETRIES) + else MAX_RETRIES, + "mode": "standard", + } + ), + ) if glue_catalog_id := properties.get(GLUE_ID): _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id) diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 26c80bc968..891a59714e 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -442,7 +442,6 @@ def test_list_tables( moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, - table_name: str, table_list: List[str], ) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) From c9bfeeb851de34a2422fc1dace9ef11ebf2e5eb4 Mon Sep 17 00:00:00 2001 From: Mark Major <32452238+mark-major@users.noreply.github.com> Date: Sat, 9 Nov 2024 00:35:31 +0100 Subject: [PATCH 2/4] Update configuration.md Co-authored-by: Fokko Driesprong --- mkdocs/docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 44b36e357a..6690d08fc4 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -341,7 +341,7 @@ catalog: | glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog | | glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog | | glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog | -| glue.max-retries | 5 | Configure the maximum number of retries for the Glue service calls | +| glue.max-retries | 10 | Configure the maximum number of retries for the Glue service calls | From cd21f9706cd4cdc7ee9427b603b3949b005ecda7 Mon Sep 17 00:00:00 2001 From: Mark Major <32452238+mark-major@users.noreply.github.com> Date: Sat, 9 Nov 2024 00:35:36 +0100 Subject: [PATCH 3/4] Update glue.py Co-authored-by: Fokko Driesprong --- pyiceberg/catalog/glue.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index e6da0dc359..532eafd684 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -313,9 +313,7 @@ def __init__(self, name: str, **properties: Any): endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT), config=Config( retries={ - "max_attempts": get_first_property_value(properties, GLUE_MAX_RETRIES) - if get_first_property_value(properties, GLUE_MAX_RETRIES) - else MAX_RETRIES, + "max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES), "mode": "standard", } ), From 2ba50243615e6550963f31cf0c5dc33c396216cb Mon Sep 17 00:00:00 2001 From: majormark Date: Wed, 20 Nov 2024 07:27:19 +0100 Subject: [PATCH 4/4] boto glue retry mode configurable --- mkdocs/docs/configuration.md | 1 + pyiceberg/catalog/__init__.py | 1 - pyiceberg/catalog/glue.py | 12 ++++++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 6690d08fc4..133f02060a 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -342,6 +342,7 @@ catalog: | glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog | | glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog | | glue.max-retries | 10 | Configure the maximum number of retries for the Glue service calls | +| glue.retry-mode | standard | Configure the retry mode for the Glue service. Default to standard. | diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 0cfdfb4b6a..b189b4094d 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -94,7 +94,6 @@ URI = "uri" LOCATION = "location" EXTERNAL_TABLE = "EXTERNAL_TABLE" -MAX_RETRIES = 10 TABLE_METADATA_FILE_NAME_REGEX = re.compile( r""" diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 532eafd684..65028d6b92 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -45,7 +45,6 @@ EXTERNAL_TABLE, ICEBERG, LOCATION, - MAX_RETRIES, METADATA_LOCATION, PREVIOUS_METADATA_LOCATION, TABLE_TYPE, @@ -131,6 +130,13 @@ GLUE_SECRET_ACCESS_KEY = "glue.secret-access-key" GLUE_SESSION_TOKEN = "glue.session-token" GLUE_MAX_RETRIES = "glue.max-retries" +GLUE_RETRY_MODE = "glue.retry-mode" + +MAX_RETRIES = 10 +STANDARD_RETRY_MODE = "standard" +ADAPTIVE_RETRY_MODE = "adaptive" +LEGACY_RETRY_MODE = "legacy" +EXISTING_RETRY_MODES = [STANDARD_RETRY_MODE, ADAPTIVE_RETRY_MODE, LEGACY_RETRY_MODE] def _construct_parameters( @@ -300,6 +306,8 @@ class GlueCatalog(MetastoreCatalog): def __init__(self, name: str, **properties: Any): super().__init__(name, **properties) + retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE) + session = boto3.Session( profile_name=properties.get(GLUE_PROFILE_NAME), region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), @@ -314,7 +322,7 @@ def __init__(self, name: str, **properties: Any): config=Config( retries={ "max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES), - "mode": "standard", + "mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE, } ), )