Skip to content

Commit a403c65

Browse files
authored
Include DELETE entries when inspecting (apache#1731)
While doing some checks, I've noticed that these are missing.
1 parent e9d35d5 commit a403c65

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

pyiceberg/table/inspect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
161161
entries = []
162162
snapshot = self._get_snapshot(snapshot_id)
163163
for manifest in snapshot.manifests(self.tbl.io):
164-
for entry in manifest.fetch_manifest_entry(io=self.tbl.io):
164+
for entry in manifest.fetch_manifest_entry(io=self.tbl.io, discard_deleted=False):
165165
column_sizes = entry.data_file.column_sizes or {}
166166
value_counts = entry.data_file.value_counts or {}
167167
null_value_counts = entry.data_file.null_value_counts or {}

tests/integration/test_inspect_table.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ def test_inspect_entries(
164164

165165
# Write some data
166166
tbl.append(arrow_table_with_null)
167+
# Generate a DELETE entry
168+
tbl.overwrite(arrow_table_with_null)
167169

168170
def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None:
169171
assert df.column_names == [
@@ -185,6 +187,8 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non
185187

186188
lhs = df.to_pandas()
187189
rhs = spark_df.toPandas()
190+
assert len(lhs) == len(rhs)
191+
188192
for column in df.column_names:
189193
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
190194
if column == "data_file":

0 commit comments

Comments
 (0)