Skip to content

Commit

Permalink
add unit tests for fileio
Browse files Browse the repository at this point in the history
  • Loading branch information
HonahX committed Jul 14, 2024
1 parent a67f549 commit 7a85275
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 1 deletion.
62 changes: 62 additions & 0 deletions tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pickle
import tempfile
import uuid
from unittest import mock

import pytest
from botocore.awsrequest import AWSRequest
Expand All @@ -29,6 +30,7 @@
from pyiceberg.io import fsspec
from pyiceberg.io.fsspec import FsspecFileIO, s3v4_rest_signer
from pyiceberg.io.pyarrow import PyArrowFileIO
from pyiceberg.typedef import Properties


def test_fsspec_infer_local_fs_from_path(fsspec_fileio: FsspecFileIO) -> None:
Expand Down Expand Up @@ -235,6 +237,66 @@ def test_fsspec_pickle_round_trip_s3(fsspec_fileio: FsspecFileIO) -> None:
_test_fsspec_pickle_round_trip(fsspec_fileio, "s3://warehouse/foo.txt")


@pytest.mark.s3
def test_fsspec_s3_session_properties() -> None:
session_properties: Properties = {
"s3.endpoint": "http://localhost:9000",
"s3.access-key-id": "admin",
"s3.secret-access-key": "password",
"s3.region": "us-east-1",
"s3.session-token": "s3.session-token",
"client.access-key-id": "client.access-key-id",
"client.secret-access-key": "client.secret-access-key",
"client.region": "client.region",
"client.session-token": "client.session-token",
}

with mock.patch("s3fs.S3FileSystem") as mock_s3fs:
s3_fileio = FsspecFileIO(properties=session_properties)
filename = str(uuid.uuid4())

s3_fileio.new_input(location=f"s3://warehouse/{filename}")

mock_s3fs.assert_called_with(
client_kwargs={
"endpoint_url": "http://localhost:9000",
"aws_access_key_id": "admin",
"aws_secret_access_key": "password",
"region_name": "us-east-1",
"aws_session_token": "s3.session-token",
},
config_kwargs={},
)


@pytest.mark.s3
def test_fsspec_unified_session_properties() -> None:
session_properties: Properties = {
"s3.endpoint": "http://localhost:9000",
"client.access-key-id": "admin",
"client.secret-access-key": "password",
"client.region": "us-east-1",
"client.session-token": "client.session-token",
}

with mock.patch("s3fs.S3FileSystem") as mock_s3fs:
s3_fileio = FsspecFileIO(properties=session_properties)
filename = str(uuid.uuid4())

s3_fileio.new_input(location=f"s3://warehouse/{filename}")

mock_s3fs.assert_called_with(
client_kwargs={
"endpoint_url": "http://localhost:9000",
"aws_access_key_id": "admin",
"aws_secret_access_key": "password",
"region_name": "us-east-1",
"aws_session_token": "client.session-token",
},
config_kwargs={},
)


@pytest.mark.adlfs
def test_fsspec_new_input_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None:
"""Test creating a new input file from an fsspec file-io"""
Expand Down
55 changes: 54 additions & 1 deletion tests/io/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import os
import tempfile
import uuid
from datetime import date
from typing import Any, List, Optional
from unittest.mock import MagicMock, patch
Expand Down Expand Up @@ -77,7 +78,7 @@
from pyiceberg.table import FileScanTask, TableProperties
from pyiceberg.table.metadata import TableMetadataV2
from pyiceberg.transforms import IdentityTransform
from pyiceberg.typedef import UTF8, Record
from pyiceberg.typedef import UTF8, Properties, Record
from pyiceberg.types import (
BinaryType,
BooleanType,
Expand Down Expand Up @@ -348,6 +349,58 @@ def test_deleting_hdfs_file_not_found() -> None:
assert "Cannot delete file, does not exist:" in str(exc_info.value)


def test_pyarrow_s3_session_properties() -> None:
session_properties: Properties = {
"s3.endpoint": "http://localhost:9000",
"s3.access-key-id": "admin",
"s3.secret-access-key": "password",
"s3.region": "us-east-1",
"s3.session-token": "s3.session-token",
"client.access-key-id": "client.access-key-id",
"client.secret-access-key": "client.secret-access-key",
"client.region": "client.region",
"client.session-token": "client.session-token",
}

with patch("pyarrow.fs.S3FileSystem") as mock_s3fs:
s3_fileio = PyArrowFileIO(properties=session_properties)
filename = str(uuid.uuid4())

s3_fileio.new_input(location=f"s3://warehouse/{filename}")

mock_s3fs.assert_called_with(
endpoint_override="http://localhost:9000",
access_key="admin",
secret_key="password",
region="us-east-1",
session_token="s3.session-token",
)


def test_pyarrow_unified_session_properties() -> None:
session_properties: Properties = {
"s3.endpoint": "http://localhost:9000",
"client.access-key-id": "admin",
"client.secret-access-key": "password",
"client.region": "us-east-1",
"client.session-token": "client.session-token",
}

with patch("pyarrow.fs.S3FileSystem") as mock_s3fs:
s3_fileio = PyArrowFileIO(properties=session_properties)
filename = str(uuid.uuid4())

s3_fileio.new_input(location=f"s3://warehouse/{filename}")

mock_s3fs.assert_called_with(
endpoint_override="http://localhost:9000",
access_key="admin",
secret_key="password",
region="us-east-1",
session_token="client.session-token",
)


def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) -> None:
actual = schema_to_pyarrow(table_schema_nested)
expected = """foo: large_string
Expand Down

0 comments on commit 7a85275

Please sign in to comment.