From 9a453dd7b5e0b38cbfefb8c46e021cb6d6e37cf1 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 6 Nov 2024 03:30:43 +0100 Subject: [PATCH] Allow passing in ARN Role and Session name (#1296) --- mkdocs/docs/configuration.md | 20 +++++++++++--------- pyiceberg/io/__init__.py | 4 ++++ pyiceberg/io/pyarrow.py | 10 ++++++++++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 606a18ce91..ba77867ba7 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -101,18 +101,20 @@ For the FileIO there are several configuration options available: -| Key | Example | Description | -| -------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Key | Example | Description | +|----------------------|----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | s3.endpoint | | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | -| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | -| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | -| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | -| s3.signer | bearer | Configure the signature version of the FileIO. | +| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | +| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | +| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | +| s3.session-name | session | An optional identifier for the assumed role session. | +| s3.role-arn | arn:aws:... | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role. | +| s3.signer | bearer | Configure the signature version of the FileIO. | | s3.signer.uri | | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. | -| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). | -| s3.region | us-west-2 | Sets the region of the bucket | +| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). | +| s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | | Configure the proxy server to be used by the FileIO. | -| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | +| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index fe3ea43e10..23a2cf3594 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -60,6 +60,8 @@ AWS_ACCESS_KEY_ID = "client.access-key-id" AWS_SECRET_ACCESS_KEY = "client.secret-access-key" AWS_SESSION_TOKEN = "client.session-token" +AWS_ROLE_ARN = "aws.role-arn" +AWS_SESSION_NAME = "aws.session-name" S3_ENDPOINT = "s3.endpoint" S3_ACCESS_KEY_ID = "s3.access-key-id" S3_SECRET_ACCESS_KEY = "s3.secret-access-key" @@ -70,6 +72,8 @@ S3_SIGNER_URI = "s3.signer.uri" S3_SIGNER_ENDPOINT = "s3.signer.endpoint" S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign" +S3_ROLE_ARN = "s3.role-arn" +S3_SESSION_NAME = "s3.session-name" HDFS_HOST = "hdfs.host" HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index ab4de5185b..a053b83ac9 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -85,7 +85,9 @@ from pyiceberg.io import ( AWS_ACCESS_KEY_ID, AWS_REGION, + AWS_ROLE_ARN, AWS_SECRET_ACCESS_KEY, + AWS_SESSION_NAME, AWS_SESSION_TOKEN, GCS_DEFAULT_LOCATION, GCS_ENDPOINT, @@ -101,7 +103,9 @@ S3_ENDPOINT, S3_PROXY_URI, S3_REGION, + S3_ROLE_ARN, S3_SECRET_ACCESS_KEY, + S3_SESSION_NAME, S3_SESSION_TOKEN, FileIO, InputFile, @@ -362,6 +366,12 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT): client_kwargs["connect_timeout"] = float(connect_timeout) + if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN): + client_kwargs["role_arn"] = role_arn + + if session_name := get_first_property_value(self.properties, S3_SESSION_NAME, AWS_SESSION_NAME): + client_kwargs["session_name"] = session_name + return S3FileSystem(**client_kwargs) elif scheme in ("hdfs", "viewfs"): from pyarrow.fs import HadoopFileSystem