Skip to content

Commit

Permalink
Remove redundant snapshot_id from SetStatisticsUpdate
Browse files Browse the repository at this point in the history
  • Loading branch information
ndrluis committed Jan 23, 2025
1 parent 36d383d commit a7d4814
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 23 deletions.
6 changes: 1 addition & 5 deletions pyiceberg/table/update/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,6 @@ class RemovePropertiesUpdate(IcebergBaseModel):

class SetStatisticsUpdate(IcebergBaseModel):
action: Literal["set-statistics"] = Field(default="set-statistics")
snapshot_id: int = Field(alias="snapshot-id")
statistics: StatisticsFile


Expand Down Expand Up @@ -491,10 +490,7 @@ def _(

@_apply_table_update.register(SetStatisticsUpdate)
def _(update: SetStatisticsUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
if update.snapshot_id != update.statistics.snapshot_id:
raise ValueError("Snapshot id in statistics does not match the snapshot id in the update")

statistics = filter_statistics_by_snapshot_id(base_metadata.statistics, update.snapshot_id)
statistics = filter_statistics_by_snapshot_id(base_metadata.statistics, update.statistics.snapshot_id)
context.add_update(update)

return base_metadata.model_copy(update={"statistics": statistics + [update.statistics]})
Expand Down
3 changes: 1 addition & 2 deletions pyiceberg/table/update/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@ class UpdateStatistics(UpdateTableMetadata["UpdateStatistics"]):
def __init__(self, transaction: "Transaction") -> None:
super().__init__(transaction)

def set_statistics(self, snapshot_id: int, statistics_file: StatisticsFile) -> "UpdateStatistics":
def set_statistics(self, statistics_file: StatisticsFile) -> "UpdateStatistics":
self._updates += (
SetStatisticsUpdate(
snapshot_id=snapshot_id,
statistics=statistics_file,
),
)
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_statistics_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def create_statistics_file(snapshot_id: int, type_name: str) -> StatisticsFile:
statistics_file_snap_2 = create_statistics_file(add_snapshot_id_2, "deletion-vector-v1")

with tbl.update_statistics() as update:
update.set_statistics(add_snapshot_id_1, statistics_file_snap_1)
update.set_statistics(add_snapshot_id_2, statistics_file_snap_2)
update.set_statistics(statistics_file_snap_1)
update.set_statistics(statistics_file_snap_2)

assert len(tbl.metadata.statistics) == 2

Expand Down
14 changes: 0 additions & 14 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -1310,20 +1310,6 @@ def test_set_statistics_update(table_v2_with_statistics: Table) -> None:
assert len(updated_statistics) == 1
assert json.loads(updated_statistics[0].model_dump_json()) == json.loads(expected)

update = SetStatisticsUpdate(
snapshot_id=123456789,
statistics=statistics_file,
)

with pytest.raises(
ValueError,
match="Snapshot id in statistics does not match the snapshot id in the update",
):
update_table_metadata(
table_v2_with_statistics.metadata,
(update,),
)


def test_remove_statistics_update(table_v2_with_statistics: Table) -> None:
update = RemoveStatisticsUpdate(
Expand Down

0 comments on commit a7d4814

Please sign in to comment.