Skip to content

Commit 19148d3

Browse files
authored
Implement update for remove-snapshots action (#1561)
1 parent 7200a92 commit 19148d3

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

pyiceberg/table/update/__init__.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# under the License.
1717
from __future__ import annotations
1818

19+
import itertools
1920
import uuid
2021
from abc import ABC, abstractmethod
2122
from datetime import datetime
@@ -466,6 +467,46 @@ def _(update: SetSnapshotRefUpdate, base_metadata: TableMetadata, context: _Tabl
466467
return base_metadata.model_copy(update=metadata_updates)
467468

468469

470+
@_apply_table_update.register(RemoveSnapshotsUpdate)
471+
def _(update: RemoveSnapshotsUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
472+
for remove_snapshot_id in update.snapshot_ids:
473+
if not any(snapshot.snapshot_id == remove_snapshot_id for snapshot in base_metadata.snapshots):
474+
raise ValueError(f"Snapshot with snapshot id {remove_snapshot_id} does not exist: {base_metadata.snapshots}")
475+
476+
snapshots = [
477+
(
478+
snapshot.model_copy(update={"parent_snapshot_id": None})
479+
if snapshot.parent_snapshot_id in update.snapshot_ids
480+
else snapshot
481+
)
482+
for snapshot in base_metadata.snapshots
483+
if snapshot.snapshot_id not in update.snapshot_ids
484+
]
485+
snapshot_log = [
486+
snapshot_log_entry
487+
for snapshot_log_entry in base_metadata.snapshot_log
488+
if snapshot_log_entry.snapshot_id not in update.snapshot_ids
489+
]
490+
491+
remove_ref_updates = (
492+
RemoveSnapshotRefUpdate(ref_name=ref_name)
493+
for ref_name, ref in base_metadata.refs.items()
494+
if ref.snapshot_id in update.snapshot_ids
495+
)
496+
remove_statistics_updates = (
497+
RemoveStatisticsUpdate(statistics_file.snapshot_id)
498+
for statistics_file in base_metadata.statistics
499+
if statistics_file.snapshot_id in update.snapshot_ids
500+
)
501+
updates = itertools.chain(remove_ref_updates, remove_statistics_updates)
502+
new_metadata = base_metadata
503+
for upd in updates:
504+
new_metadata = _apply_table_update(upd, new_metadata, context)
505+
506+
context.add_update(update)
507+
return new_metadata.model_copy(update={"snapshots": snapshots, "snapshot_log": snapshot_log})
508+
509+
469510
@_apply_table_update.register(RemoveSnapshotRefUpdate)
470511
def _(update: RemoveSnapshotRefUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
471512
if update.ref_name not in base_metadata.refs:

tests/table/test_init.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
AssertTableUUID,
8080
RemovePropertiesUpdate,
8181
RemoveSnapshotRefUpdate,
82+
RemoveSnapshotsUpdate,
8283
RemoveStatisticsUpdate,
8384
SetDefaultSortOrderUpdate,
8485
SetPropertiesUpdate,
@@ -795,6 +796,42 @@ def test_update_metadata_set_snapshot_ref(table_v2: Table) -> None:
795796

796797

797798
def test_update_remove_snapshots(table_v2: Table) -> None:
799+
REMOVE_SNAPSHOT = 3051729675574597004
800+
KEEP_SNAPSHOT = 3055729675574597004
801+
# assert fixture data to easily understand the test assumptions
802+
assert len(table_v2.metadata.snapshots) == 2
803+
assert len(table_v2.metadata.snapshot_log) == 2
804+
assert len(table_v2.metadata.refs) == 2
805+
update = RemoveSnapshotsUpdate(snapshot_ids=[REMOVE_SNAPSHOT])
806+
new_metadata = update_table_metadata(table_v2.metadata, (update,))
807+
assert len(new_metadata.snapshots) == 1
808+
assert new_metadata.snapshots[0].snapshot_id == KEEP_SNAPSHOT
809+
assert new_metadata.snapshots[0].parent_snapshot_id is None
810+
assert new_metadata.current_snapshot_id == KEEP_SNAPSHOT
811+
assert new_metadata.last_updated_ms > table_v2.metadata.last_updated_ms
812+
assert len(new_metadata.snapshot_log) == 1
813+
assert new_metadata.snapshot_log[0].snapshot_id == KEEP_SNAPSHOT
814+
assert len(new_metadata.refs) == 1
815+
assert new_metadata.refs["main"].snapshot_id == KEEP_SNAPSHOT
816+
817+
818+
def test_update_remove_snapshots_doesnt_exist(table_v2: Table) -> None:
819+
update = RemoveSnapshotsUpdate(
820+
snapshot_ids=[123],
821+
)
822+
with pytest.raises(ValueError, match="Snapshot with snapshot id 123 does not exist"):
823+
update_table_metadata(table_v2.metadata, (update,))
824+
825+
826+
def test_update_remove_snapshots_remove_current_snapshot_id(table_v2: Table) -> None:
827+
update = RemoveSnapshotsUpdate(snapshot_ids=[3055729675574597004])
828+
new_metadata = update_table_metadata(table_v2.metadata, (update,))
829+
assert len(new_metadata.refs) == 1
830+
assert new_metadata.refs["test"].snapshot_id == 3051729675574597004
831+
assert new_metadata.current_snapshot_id is None
832+
833+
834+
def test_update_remove_snapshot_ref(table_v2: Table) -> None:
798835
# assert fixture data to easily understand the test assumptions
799836
assert len(table_v2.metadata.refs) == 2
800837
update = RemoveSnapshotRefUpdate(ref_name="test")

0 commit comments

Comments
 (0)