Skip to content

Commit

Permalink
Do not set Parquet version (apache#377)
Browse files Browse the repository at this point in the history
* Do not set Parquet version

* Fix filesizes

---------

Co-authored-by: Fokko Driesprong <[email protected]>
  • Loading branch information
jonashaag and Fokko authored Feb 6, 2024
1 parent 8789fc2 commit 12e3353
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ def write_file(table: Table, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:

fo = table.io.new_output(file_path)
with fo.create(overwrite=True) as fos:
with pq.ParquetWriter(fos, schema=file_schema, version="1.0", **parquet_writer_kwargs) as writer:
with pq.ParquetWriter(fos, schema=file_schema, **parquet_writer_kwargs) as writer:
writer.write_table(task.df)

data_file = DataFile(
Expand Down
18 changes: 9 additions & 9 deletions tests/integration/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,39 +357,39 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi

assert summaries[0] == {
'added-data-files': '1',
'added-files-size': '5437',
'added-files-size': '5459',
'added-records': '3',
'total-data-files': '1',
'total-delete-files': '0',
'total-equality-deletes': '0',
'total-files-size': '5437',
'total-files-size': '5459',
'total-position-deletes': '0',
'total-records': '3',
}

assert summaries[1] == {
'added-data-files': '1',
'added-files-size': '5437',
'added-files-size': '5459',
'added-records': '3',
'total-data-files': '2',
'total-delete-files': '0',
'total-equality-deletes': '0',
'total-files-size': '10874',
'total-files-size': '10918',
'total-position-deletes': '0',
'total-records': '6',
}

assert summaries[2] == {
'added-data-files': '1',
'added-files-size': '5437',
'added-files-size': '5459',
'added-records': '3',
'deleted-data-files': '2',
'deleted-records': '6',
'removed-files-size': '10874',
'removed-files-size': '10918',
'total-data-files': '1',
'total-delete-files': '0',
'total-equality-deletes': '0',
'total-files-size': '5437',
'total-files-size': '5459',
'total-position-deletes': '0',
'total-records': '3',
}
Expand Down Expand Up @@ -555,12 +555,12 @@ def test_summaries_with_only_nulls(

assert summaries[1] == {
'added-data-files': '1',
'added-files-size': '4217',
'added-files-size': '4239',
'added-records': '2',
'total-data-files': '1',
'total-delete-files': '0',
'total-equality-deletes': '0',
'total-files-size': '4217',
'total-files-size': '4239',
'total-position-deletes': '0',
'total-records': '2',
}
Expand Down

0 comments on commit 12e3353

Please sign in to comment.