diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 0b01e9cf24..4b3b4c22c0 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -1140,6 +1140,7 @@ table.update_statistics().set_statistics(snapshot_id, statistics_file).commit() table.update_statistics() .set_statistics(snapshot_id1, statistics_file1) .remove_statistics(snapshot_id2) + .commit() # Operations are applied on commit. ``` @@ -1147,8 +1148,8 @@ You can also use context managers to make more changes: ```python with table.update_statistics() as update: - update.set_statistics(1, statistics_file) - update.remove_statistics(2) + update.set_statistics(snaphsot_id1, statistics_file) + update.remove_statistics(snapshot_id2) ``` ## Query the data diff --git a/pyiceberg/table/statistics.py b/pyiceberg/table/statistics.py index 93132cf6a8..a6c73e4aa7 100644 --- a/pyiceberg/table/statistics.py +++ b/pyiceberg/table/statistics.py @@ -42,7 +42,7 @@ class StatisticsFile(IcebergBaseModel): blob_metadata: List[BlobMetadata] = Field(alias="blob-metadata") -def reject_statistics( +def filter_statistics_by_snapshot_id( statistics: List[StatisticsFile], reject_snapshot_id: int, ) -> List[StatisticsFile]: diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index 533efe8b8f..073ad9dd81 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -37,7 +37,7 @@ SnapshotLogEntry, ) from pyiceberg.table.sorting import SortOrder -from pyiceberg.table.statistics import StatisticsFile, reject_statistics +from pyiceberg.table.statistics import StatisticsFile, filter_statistics_by_snapshot_id from pyiceberg.typedef import ( IcebergBaseModel, Properties, @@ -496,7 +496,7 @@ def _(update: SetStatisticsUpdate, base_metadata: TableMetadata, context: _Table if update.snapshot_id != update.statistics.snapshot_id: raise ValueError("Snapshot id in statistics does not match the snapshot id in the update") - statistics = reject_statistics(base_metadata.statistics, update.snapshot_id) + statistics = filter_statistics_by_snapshot_id(base_metadata.statistics, update.snapshot_id) context.add_update(update) return base_metadata.model_copy(update={"statistics": statistics + [update.statistics]}) @@ -507,7 +507,7 @@ def _(update: RemoveStatisticsUpdate, base_metadata: TableMetadata, context: _Ta if not any(stat.snapshot_id == update.snapshot_id for stat in base_metadata.statistics): raise ValueError(f"Statistics with snapshot id {update.snapshot_id} does not exist") - statistics = reject_statistics(base_metadata.statistics, update.snapshot_id) + statistics = filter_statistics_by_snapshot_id(base_metadata.statistics, update.snapshot_id) context.add_update(update) return base_metadata.model_copy(update={"statistics": statistics})