From 3042de208f1e91bf9125be44f4d5df65a99f3f92 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 30 Mar 2024 18:39:17 -0700 Subject: [PATCH 1/4] call as_arrow() once --- pyiceberg/table/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 18fac99312..b20ea22674 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1131,8 +1131,9 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) _check_schema_compatible(self.schema(), other_schema=df.schema) # cast if the two schemas are compatible but not equal - if self.schema().as_arrow() != df.schema: - df = df.cast(self.schema().as_arrow()) + table_arrow_schema = self.schema().as_arrow() + if table_arrow_schema != df.schema: + df = df.cast(table_arrow_schema) with self.transaction() as txn: with txn.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot: From 7989672dd4a3ea27e5199fc75c571680bf9d6fb6 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 30 Mar 2024 18:39:40 -0700 Subject: [PATCH 2/4] remove print in test --- tests/catalog/test_sql.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index b20f617e32..01cfa2255e 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -220,9 +220,6 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier database_name, _table_name = random_identifier catalog.create_namespace(database_name) table = catalog.create_table(random_identifier, pyarrow_table.schema) - print(pyarrow_table.schema) - print(table.schema().as_struct()) - print() table.overwrite(pyarrow_table) From 9bcce50c1c9acb7d5676dae92b39d143a2ff6855 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 30 Mar 2024 18:47:45 -0700 Subject: [PATCH 3/4] also test for sqlite --- tests/catalog/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 01cfa2255e..99b8550602 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -197,7 +197,7 @@ def test_create_table_with_pyarrow_schema( 'catalog', [ lazy_fixture('catalog_memory'), - # lazy_fixture('catalog_sqlite'), + lazy_fixture('catalog_sqlite'), ], ) def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier) -> None: From 4ad9fe168ecc7124b15c6bf6cc1b0d7b11b7c01d Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 30 Mar 2024 19:14:50 -0700 Subject: [PATCH 4/4] remove all print in test --- tests/integration/test_reads.py | 1 - tests/integration/test_writes.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index fdc13ae752..c670bc4846 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -274,7 +274,6 @@ def test_ray_nan_rewritten(catalog: Catalog) -> None: def test_ray_not_nan_count(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") ray_dataset = table_test_null_nan_rewritten.scan(row_filter=NotNaN("col_numeric"), selected_fields=("idx",)).to_ray() - print(ray_dataset.take()) assert ray_dataset.count() == 2 diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py index 5d6be0a7a4..0186e662dc 100644 --- a/tests/integration/test_writes.py +++ b/tests/integration/test_writes.py @@ -480,7 +480,6 @@ def test_write_parquet_other_properties( properties: Dict[str, Any], expected_kwargs: Dict[str, Any], ) -> None: - print(type(mocker)) identifier = "default.test_write_parquet_other_properties" # The properties we test cannot be checked on the resulting Parquet file, so we spy on the ParquetWriter call instead