Skip to content

Commit

Permalink
Enable pyiceberg.table.Table.add_files ns downcasting
Browse files Browse the repository at this point in the history
  • Loading branch information
Matej Šrubař committed Jan 24, 2025
1 parent 5df7468 commit 1b715a9
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2422,6 +2422,8 @@ def _check_pyarrow_schema_compatible(


def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_paths: Iterator[str]) -> Iterator[DataFile]:
from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE

for file_path in file_paths:
input_file = io.new_input(file_path)
with input_file.open() as input_stream:
Expand All @@ -2432,7 +2434,12 @@ def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_
f"Cannot add file {file_path} because it has field IDs. `add_files` only supports addition of files without field_ids"
)
schema = table_metadata.schema()
_check_pyarrow_schema_compatible(schema, parquet_metadata.schema.to_arrow_schema())
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
_check_pyarrow_schema_compatible(
schema,
parquet_metadata.schema.to_arrow_schema(),
downcast_ns_timestamp_to_us
)

statistics = data_file_statistics_from_parquet_metadata(
parquet_metadata=parquet_metadata,
Expand Down

0 comments on commit 1b715a9

Please sign in to comment.