-
Notifications
You must be signed in to change notification settings - Fork 207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Glue: Allow for assuming role for Glue #1299
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
) | ||
|
||
import boto3 | ||
from botocore.credentials import AssumeRoleCredentialFetcher, Credentials, DeferredRefreshableCredentials | ||
from mypy_boto3_glue.client import GlueClient | ||
from mypy_boto3_glue.type_defs import ( | ||
ColumnTypeDef, | ||
|
@@ -59,7 +60,14 @@ | |
NoSuchTableError, | ||
TableAlreadyExistsError, | ||
) | ||
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN | ||
from pyiceberg.io import ( | ||
AWS_ACCESS_KEY_ID, | ||
AWS_REGION, | ||
AWS_ROLE_ARN, | ||
AWS_ROLE_SESSION_NAME, | ||
AWS_SECRET_ACCESS_KEY, | ||
AWS_SESSION_TOKEN, | ||
) | ||
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec | ||
from pyiceberg.schema import Schema, SchemaVisitor, visit | ||
from pyiceberg.serializers import FromInputFile | ||
|
@@ -127,6 +135,8 @@ | |
GLUE_ACCESS_KEY_ID = "glue.access-key-id" | ||
GLUE_SECRET_ACCESS_KEY = "glue.secret-access-key" | ||
GLUE_SESSION_TOKEN = "glue.session-token" | ||
GLUE_ROLE_ARN = "glue.role-arn" | ||
GLUE_ROLE_SESSION_NAME = "glue.role-session-name" | ||
|
||
|
||
def _construct_parameters( | ||
|
@@ -296,13 +306,48 @@ class GlueCatalog(MetastoreCatalog): | |
def __init__(self, name: str, **properties: Any): | ||
super().__init__(name, **properties) | ||
|
||
credentials = Credentials( | ||
access_key=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), | ||
secret_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), | ||
token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), | ||
) | ||
|
||
session = boto3.Session( | ||
profile_name=properties.get(GLUE_PROFILE_NAME), | ||
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), | ||
aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), | ||
aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), | ||
aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), | ||
aws_access_key_id=credentials.access_key, | ||
aws_secret_access_key=credentials.secret_key, | ||
aws_session_token=credentials.token, | ||
) | ||
|
||
if role_arn := get_first_property_value(properties, GLUE_ROLE_ARN, AWS_ROLE_ARN): | ||
extra_args = {} | ||
if role_session_name := get_first_property_value(properties, GLUE_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME): | ||
extra_args["RoleSessionName"] = role_session_name | ||
|
||
fetcher = AssumeRoleCredentialFetcher( | ||
client_creator=session.client, | ||
source_credentials=credentials, | ||
role_arn=role_arn, | ||
extra_args=extra_args, | ||
) | ||
refreshable_credentials = DeferredRefreshableCredentials( | ||
method="assume-role", | ||
refresh_using=fetcher.fetch_credentials, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm relying on calling an in-house webservice that will provide new credentials. As I see it, this implementation will not allow a custom implementation of |
||
) | ||
from botocore.session import Session as BotoSession | ||
|
||
botocore_session = BotoSession() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about accepting a boto3.Session as input? Then the client can configure it as needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First of all, thanks @cshenrik for the quick reply! I think we could do both, so you can also configure the Glue client similar to the S3 one (it uses similar properties). @HonahX @kevinjqliu How strong are we on only passing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Im leaning towards loading custom clients instead of passing everything through properties. Every time we add another property, we need to map a name, a possible default, and pass to the underlying object. The logic above is already pretty complicated, might as well split it out and call it something like |
||
botocore_session._credentials = refreshable_credentials # noqa: SLF001 | ||
session = boto3.Session( | ||
profile_name=properties.get(GLUE_PROFILE_NAME), | ||
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), | ||
aws_access_key_id=credentials.access_key, | ||
aws_secret_access_key=credentials.secret_key, | ||
aws_session_token=credentials.token, | ||
botocore_session=botocore_session, | ||
) | ||
|
||
self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT)) | ||
|
||
if glue_catalog_id := properties.get(GLUE_ID): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,7 +61,7 @@ | |
AWS_SECRET_ACCESS_KEY = "client.secret-access-key" | ||
AWS_SESSION_TOKEN = "client.session-token" | ||
AWS_ROLE_ARN = "aws.role-arn" | ||
AWS_SESSION_NAME = "aws.session-name" | ||
AWS_ROLE_SESSION_NAME = "aws.role-session-name" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is originally from #1296, which has not been released yet. So we don't need to worry about backwards compatibility |
||
S3_ENDPOINT = "s3.endpoint" | ||
S3_ACCESS_KEY_ID = "s3.access-key-id" | ||
S3_SECRET_ACCESS_KEY = "s3.secret-access-key" | ||
|
@@ -73,7 +73,7 @@ | |
S3_SIGNER_ENDPOINT = "s3.signer.endpoint" | ||
S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign" | ||
S3_ROLE_ARN = "s3.role-arn" | ||
S3_SESSION_NAME = "s3.session-name" | ||
S3_ROLE_SESSION_NAME = "s3.role-session-name" | ||
HDFS_HOST = "hdfs.host" | ||
HDFS_PORT = "hdfs.port" | ||
HDFS_USER = "hdfs.user" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
took me a long time to find this, but it looks like
token
is correct herehttps://github.com/boto/botocore/blob/179e8b5361cd83d4c4acdf0c1bc1708b4a6966e9/botocore/session.py#L944-L948