Skip to content

Commit 0a7db61

Browse files
committed
Migrate implementation of files() table from __init__.py
1 parent fb68c6b commit 0a7db61

File tree

1 file changed

+19
-8
lines changed

1 file changed

+19
-8
lines changed

pyiceberg/table/inspect.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from __future__ import annotations
1818

1919
from datetime import datetime
20-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
20+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
2121

2222
from pyiceberg.conversions import from_bytes
2323
from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, PartitionFieldSummary
@@ -473,7 +473,7 @@ def history(self) -> "pa.Table":
473473

474474
return pa.Table.from_pylist(history, schema=history_schema)
475475

476-
def files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
476+
def _files(self, snapshot_id: Optional[int] = None, data_file_filter: Optional[Set[DataFileContent]] = None) -> "pa.Table":
477477
import pyarrow as pa
478478

479479
from pyiceberg.io.pyarrow import schema_to_pyarrow
@@ -530,6 +530,8 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
530530
for manifest_list in snapshot.manifests(io):
531531
for manifest_entry in manifest_list.fetch_manifest_entry(io):
532532
data_file = manifest_entry.data_file
533+
if data_file_filter and data_file.content not in data_file_filter:
534+
continue
533535
column_sizes = data_file.column_sizes or {}
534536
value_counts = data_file.value_counts or {}
535537
null_value_counts = data_file.null_value_counts or {}
@@ -558,12 +560,12 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
558560
"spec_id": data_file.spec_id,
559561
"record_count": data_file.record_count,
560562
"file_size_in_bytes": data_file.file_size_in_bytes,
561-
"column_sizes": dict(data_file.column_sizes),
562-
"value_counts": dict(data_file.value_counts),
563-
"null_value_counts": dict(data_file.null_value_counts),
564-
"nan_value_counts": dict(data_file.nan_value_counts),
565-
"lower_bounds": dict(data_file.lower_bounds),
566-
"upper_bounds": dict(data_file.upper_bounds),
563+
"column_sizes": dict(data_file.column_sizes) if data_file.column_sizes is not None else None,
564+
"value_counts": dict(data_file.value_counts) if data_file.value_counts is not None else None,
565+
"null_value_counts": dict(data_file.null_value_counts) if data_file.null_value_counts is not None else None,
566+
"nan_value_counts": dict(data_file.nan_value_counts) if data_file.nan_value_counts is not None else None,
567+
"lower_bounds": dict(data_file.lower_bounds) if data_file.lower_bounds is not None else None,
568+
"upper_bounds": dict(data_file.upper_bounds) if data_file.upper_bounds is not None else None,
567569
"key_metadata": data_file.key_metadata,
568570
"split_offsets": data_file.split_offsets,
569571
"equality_ids": data_file.equality_ids,
@@ -575,3 +577,12 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
575577
files,
576578
schema=files_schema,
577579
)
580+
581+
def files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
582+
return self._files(snapshot_id)
583+
584+
def data_files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
585+
return self._files(snapshot_id, {DataFileContent.DATA})
586+
587+
def delete_files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
588+
return self._files(snapshot_id, {DataFileContent.POSITION_DELETES, DataFileContent.EQUALITY_DELETES})

0 commit comments

Comments
 (0)