diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index 013fc5d88f..c072282ece 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -19,6 +19,7 @@ import pickle import tempfile import uuid +from unittest import mock import pytest from botocore.awsrequest import AWSRequest @@ -29,6 +30,7 @@ from pyiceberg.io import fsspec from pyiceberg.io.fsspec import FsspecFileIO, s3v4_rest_signer from pyiceberg.io.pyarrow import PyArrowFileIO +from pyiceberg.typedef import Properties def test_fsspec_infer_local_fs_from_path(fsspec_fileio: FsspecFileIO) -> None: @@ -235,6 +237,66 @@ def test_fsspec_pickle_round_trip_s3(fsspec_fileio: FsspecFileIO) -> None: _test_fsspec_pickle_round_trip(fsspec_fileio, "s3://warehouse/foo.txt") +@pytest.mark.s3 +def test_fsspec_s3_session_properties() -> None: + session_properties: Properties = { + "s3.endpoint": "http://localhost:9000", + "s3.access-key-id": "admin", + "s3.secret-access-key": "password", + "s3.region": "us-east-1", + "s3.session-token": "s3.session-token", + "client.access-key-id": "client.access-key-id", + "client.secret-access-key": "client.secret-access-key", + "client.region": "client.region", + "client.session-token": "client.session-token", + } + + with mock.patch("s3fs.S3FileSystem") as mock_s3fs: + s3_fileio = FsspecFileIO(properties=session_properties) + filename = str(uuid.uuid4()) + + s3_fileio.new_input(location=f"s3://warehouse/{filename}") + + mock_s3fs.assert_called_with( + client_kwargs={ + "endpoint_url": "http://localhost:9000", + "aws_access_key_id": "admin", + "aws_secret_access_key": "password", + "region_name": "us-east-1", + "aws_session_token": "s3.session-token", + }, + config_kwargs={}, + ) + + +@pytest.mark.s3 +def test_fsspec_unified_session_properties() -> None: + session_properties: Properties = { + "s3.endpoint": "http://localhost:9000", + "client.access-key-id": "admin", + "client.secret-access-key": "password", + "client.region": "us-east-1", + "client.session-token": "client.session-token", + } + + with mock.patch("s3fs.S3FileSystem") as mock_s3fs: + s3_fileio = FsspecFileIO(properties=session_properties) + filename = str(uuid.uuid4()) + + s3_fileio.new_input(location=f"s3://warehouse/{filename}") + + mock_s3fs.assert_called_with( + client_kwargs={ + "endpoint_url": "http://localhost:9000", + "aws_access_key_id": "admin", + "aws_secret_access_key": "password", + "region_name": "us-east-1", + "aws_session_token": "client.session-token", + }, + config_kwargs={}, + ) + + @pytest.mark.adlfs def test_fsspec_new_input_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: """Test creating a new input file from an fsspec file-io""" diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index 37198b7edb..69bfcbab2b 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -18,6 +18,7 @@ import os import tempfile +import uuid from datetime import date from typing import Any, List, Optional from unittest.mock import MagicMock, patch @@ -77,7 +78,7 @@ from pyiceberg.table import FileScanTask, TableProperties from pyiceberg.table.metadata import TableMetadataV2 from pyiceberg.transforms import IdentityTransform -from pyiceberg.typedef import UTF8, Record +from pyiceberg.typedef import UTF8, Properties, Record from pyiceberg.types import ( BinaryType, BooleanType, @@ -348,6 +349,58 @@ def test_deleting_hdfs_file_not_found() -> None: assert "Cannot delete file, does not exist:" in str(exc_info.value) +def test_pyarrow_s3_session_properties() -> None: + session_properties: Properties = { + "s3.endpoint": "http://localhost:9000", + "s3.access-key-id": "admin", + "s3.secret-access-key": "password", + "s3.region": "us-east-1", + "s3.session-token": "s3.session-token", + "client.access-key-id": "client.access-key-id", + "client.secret-access-key": "client.secret-access-key", + "client.region": "client.region", + "client.session-token": "client.session-token", + } + + with patch("pyarrow.fs.S3FileSystem") as mock_s3fs: + s3_fileio = PyArrowFileIO(properties=session_properties) + filename = str(uuid.uuid4()) + + s3_fileio.new_input(location=f"s3://warehouse/{filename}") + + mock_s3fs.assert_called_with( + endpoint_override="http://localhost:9000", + access_key="admin", + secret_key="password", + region="us-east-1", + session_token="s3.session-token", + ) + + +def test_pyarrow_unified_session_properties() -> None: + session_properties: Properties = { + "s3.endpoint": "http://localhost:9000", + "client.access-key-id": "admin", + "client.secret-access-key": "password", + "client.region": "us-east-1", + "client.session-token": "client.session-token", + } + + with patch("pyarrow.fs.S3FileSystem") as mock_s3fs: + s3_fileio = PyArrowFileIO(properties=session_properties) + filename = str(uuid.uuid4()) + + s3_fileio.new_input(location=f"s3://warehouse/{filename}") + + mock_s3fs.assert_called_with( + endpoint_override="http://localhost:9000", + access_key="admin", + secret_key="password", + region="us-east-1", + session_token="client.session-token", + ) + + def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) -> None: actual = schema_to_pyarrow(table_schema_nested) expected = """foo: large_string