Skip to content

Commit dafcf22

Browse files
authored
Treat warning as error in CI/Dev (#973)
* Treat warning as error in CI/Dev This will help us avoid propagating warnings to our users, as occurred in #971. * fixup! Treat warning as error in CI/Dev
1 parent 3809708 commit dafcf22

File tree

5 files changed

+32
-19
lines changed

5 files changed

+32
-19
lines changed

pyproject.toml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,18 @@ classifiers = [
2929
"Programming Language :: Python :: 3.8",
3030
"Programming Language :: Python :: 3.9",
3131
"Programming Language :: Python :: 3.10",
32-
"Programming Language :: Python :: 3.11"
32+
"Programming Language :: Python :: 3.11",
3333
]
3434
packages = [
3535
{ include = "pyiceberg" },
3636
{ from = "vendor", include = "fb303" },
3737
{ from = "vendor", include = "hive_metastore" },
3838
{ include = "tests", format = "sdist" },
3939
{ include = "Makefile", format = "sdist" },
40-
{ include = "NOTICE", format = ["sdist", "wheel"] }
40+
{ include = "NOTICE", format = [
41+
"sdist",
42+
"wheel",
43+
] },
4144
]
4245
include = [
4346
{ path = "dev", format = "sdist" },
@@ -62,8 +65,8 @@ pyarrow = { version = ">=9.0.0,<18.0.0", optional = true }
6265
pandas = { version = ">=1.0.0,<3.0.0", optional = true }
6366
duckdb = { version = ">=0.5.0,<2.0.0", optional = true }
6467
ray = [
65-
{ version = "==2.10.0", python = "<3.9", optional = true},
66-
{ version = ">=2.10.0,<3.0.0", python = ">=3.9", optional = true}
68+
{ version = "==2.10.0", python = "<3.9", optional = true },
69+
{ version = ">=2.10.0,<3.0.0", python = ">=3.9", optional = true },
6770
]
6871
python-snappy = { version = ">=0.6.0,<1.0.0", optional = true }
6972
thrift = { version = ">=0.13.0,<1.0.0", optional = true }
@@ -599,13 +602,17 @@ markers = [
599602
"s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
600603
"adlfs: marks a test as requiring access to adlfs compliant storage (use with --adlfs.account-name, --adlfs.account-key, and --adlfs.endpoint args)",
601604
"integration: marks integration tests against Apache Spark",
602-
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)"
605+
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)",
603606
]
604607

605608
# Turns a warning into an error
606-
#filterwarnings = [
607-
# "error"
608-
#]
609+
filterwarnings = [
610+
"error",
611+
"ignore:A plugin raised an exception during an old-style hookwrapper teardown.",
612+
"ignore:unclosed <socket.socket",
613+
# Remove this in a future release of PySpark.
614+
"ignore:distutils Version classes are deprecated. Use packaging.version instead.",
615+
]
609616

610617
[tool.black]
611618
line-length = 130

tests/catalog/test_sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier)
355355
namespace = Catalog.namespace_from(table_identifier_nocatalog)
356356
catalog.create_namespace(namespace)
357357
table = catalog.create_table(table_identifier, pyarrow_table.schema)
358-
table.overwrite(pyarrow_table)
358+
table.append(pyarrow_table)
359359

360360

361361
@pytest.mark.parametrize(

tests/integration/test_deletes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ def test_rewrite_partitioned_table_with_null(spark: SparkSession, session_catalo
145145

146146
@pytest.mark.integration
147147
@pytest.mark.parametrize("format_version", [1, 2])
148+
@pytest.mark.filterwarnings("ignore:Delete operation did not match any records")
148149
def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCatalog, format_version: int) -> None:
149150
identifier = "default.table_partitioned_delete"
150151

@@ -175,6 +176,7 @@ def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCa
175176

176177

177178
@pytest.mark.integration
179+
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
178180
def test_delete_partitioned_table_positional_deletes(spark: SparkSession, session_catalog: RestCatalog) -> None:
179181
identifier = "default.table_partitioned_delete"
180182

@@ -223,6 +225,7 @@ def test_delete_partitioned_table_positional_deletes(spark: SparkSession, sessio
223225

224226

225227
@pytest.mark.integration
228+
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
226229
def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: RestCatalog) -> None:
227230
identifier = "default.table_partitioned_delete"
228231

@@ -274,6 +277,7 @@ def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: RestC
274277

275278

276279
@pytest.mark.integration
280+
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
277281
def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSession, session_catalog: RestCatalog) -> None:
278282
identifier = "default.table_partitioned_delete_sequence_number"
279283

tests/integration/test_inspect_table.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def test_inspect_snapshots(
7979
identifier = "default.table_metadata_snapshots"
8080
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})
8181

82-
tbl.overwrite(arrow_table_with_null)
82+
tbl.append(arrow_table_with_null)
8383
# should produce a DELETE entry
8484
tbl.overwrite(arrow_table_with_null)
8585
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
@@ -295,7 +295,7 @@ def test_inspect_refs(
295295
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})
296296

297297
# write data to create snapshot
298-
tbl.overwrite(arrow_table_with_null)
298+
tbl.append(arrow_table_with_null)
299299

300300
# create a test branch
301301
spark.sql(
@@ -667,7 +667,7 @@ def test_inspect_files(
667667

668668
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})
669669

670-
tbl.overwrite(arrow_table_with_null)
670+
tbl.append(arrow_table_with_null)
671671

672672
# append more data
673673
tbl.append(arrow_table_with_null)

tests/integration/test_writes/test_writes.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def test_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_w
256256
identifier = "default.arrow_data_files"
257257
tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [])
258258

259-
tbl.overwrite(arrow_table_with_null)
259+
tbl.append(arrow_table_with_null)
260260
# should produce a DELETE entry
261261
tbl.overwrite(arrow_table_with_null)
262262
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
@@ -288,7 +288,7 @@ def get_current_snapshot_id(identifier: str) -> int:
288288
.snapshot_id
289289
)
290290

291-
tbl.overwrite(arrow_table_with_null)
291+
tbl.append(arrow_table_with_null)
292292
assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore
293293
tbl.overwrite(arrow_table_with_null)
294294
assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore
@@ -330,7 +330,7 @@ def test_python_writes_special_character_column_with_spark_reads(
330330
arrow_table_with_special_character_column = pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema)
331331
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
332332

333-
tbl.overwrite(arrow_table_with_special_character_column)
333+
tbl.append(arrow_table_with_special_character_column)
334334
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
335335
pyiceberg_df = tbl.scan().to_pandas()
336336
assert spark_df.equals(pyiceberg_df)
@@ -354,7 +354,7 @@ def test_python_writes_dictionary_encoded_column_with_spark_reads(
354354

355355
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
356356

357-
tbl.overwrite(arrow_table)
357+
tbl.append(arrow_table)
358358
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
359359
pyiceberg_df = tbl.scan().to_pandas()
360360
assert spark_df.equals(pyiceberg_df)
@@ -393,7 +393,7 @@ def test_python_writes_with_small_and_large_types_spark_reads(
393393
arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
394394
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
395395

396-
tbl.overwrite(arrow_table)
396+
tbl.append(arrow_table)
397397
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
398398
pyiceberg_df = tbl.scan().to_pandas()
399399
assert spark_df.equals(pyiceberg_df)
@@ -429,7 +429,7 @@ def get_data_files_count(identifier: str) -> int:
429429

430430
# writes 1 data file since the table is smaller than default target file size
431431
assert arrow_table_with_null.nbytes < TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT
432-
tbl.overwrite(arrow_table_with_null)
432+
tbl.append(arrow_table_with_null)
433433
assert get_data_files_count(identifier) == 1
434434

435435
# writes 1 data file as long as table is smaller than default target file size
@@ -820,7 +820,7 @@ def test_inspect_snapshots(
820820
identifier = "default.table_metadata_snapshots"
821821
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})
822822

823-
tbl.overwrite(arrow_table_with_null)
823+
tbl.append(arrow_table_with_null)
824824
# should produce a DELETE entry
825825
tbl.overwrite(arrow_table_with_null)
826826
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
@@ -979,6 +979,7 @@ def test_table_write_subset_of_schema(session_catalog: Catalog, arrow_table_with
979979

980980
@pytest.mark.integration
981981
@pytest.mark.parametrize("format_version", [1, 2])
982+
@pytest.mark.filterwarnings("ignore:Delete operation did not match any records")
982983
def test_table_write_out_of_order_schema(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None:
983984
identifier = "default.test_table_write_out_of_order_schema"
984985
# rotate the schema fields by 1
@@ -989,6 +990,7 @@ def test_table_write_out_of_order_schema(session_catalog: Catalog, arrow_table_w
989990
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=rotated_schema)
990991

991992
tbl.overwrite(arrow_table_with_null)
993+
992994
tbl.append(arrow_table_with_null)
993995
# overwrite and then append should produce twice the data
994996
assert len(tbl.scan().to_arrow()) == len(arrow_table_with_null) * 2

0 commit comments

Comments
 (0)