Skip to content

Commit 18448fd

Browse files
authored
Support getting snapshot at or right before the given timestamp (#748)
1 parent e61ef57 commit 18448fd

File tree

3 files changed

+64
-1
lines changed

3 files changed

+64
-1
lines changed

pyiceberg/table/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,18 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]:
13021302
return self.snapshot_by_id(ref.snapshot_id)
13031303
return None
13041304

1305+
def snapshot_as_of_timestamp(self, timestamp_ms: int, inclusive: bool = True) -> Optional[Snapshot]:
1306+
"""Get the snapshot that was current as of or right before the given timestamp, or None if there is no matching snapshot.
1307+
1308+
Args:
1309+
timestamp_ms: Find snapshot that was current at/before this timestamp
1310+
inclusive: Includes timestamp_ms in search when True. Excludes timestamp_ms when False
1311+
"""
1312+
for log_entry in reversed(self.history()):
1313+
if (inclusive and log_entry.timestamp_ms <= timestamp_ms) or log_entry.timestamp_ms < timestamp_ms:
1314+
return self.snapshot_by_id(log_entry.snapshot_id)
1315+
return None
1316+
13051317
def history(self) -> List[SnapshotLogEntry]:
13061318
"""Get the snapshot history of this table."""
13071319
return self.metadata.snapshot_log

pyiceberg/table/snapshots.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,22 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
from __future__ import annotations
18+
1719
import time
1820
from collections import defaultdict
1921
from enum import Enum
20-
from typing import Any, DefaultDict, Dict, List, Mapping, Optional
22+
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional
2123

2224
from pydantic import Field, PrivateAttr, model_serializer
2325

2426
from pyiceberg.io import FileIO
2527
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, read_manifest_list
2628
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
2729
from pyiceberg.schema import Schema
30+
31+
if TYPE_CHECKING:
32+
from pyiceberg.table.metadata import TableMetadata
2833
from pyiceberg.typedef import IcebergBaseModel
2934

3035
ADDED_DATA_FILES = "added-data-files"
@@ -412,3 +417,12 @@ def _update_totals(total_property: str, added_property: str, removed_property: s
412417
def set_when_positive(properties: Dict[str, str], num: int, property_name: str) -> None:
413418
if num > 0:
414419
properties[property_name] = str(num)
420+
421+
422+
def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMetadata) -> Iterable[Snapshot]:
423+
"""Get the ancestors of and including the given snapshot."""
424+
if current_snapshot:
425+
yield current_snapshot
426+
if current_snapshot.parent_snapshot_id is not None:
427+
if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id):
428+
yield from ancestors_of(parent, table_metadata)

tests/table/test_init.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
Snapshot,
7777
SnapshotLogEntry,
7878
Summary,
79+
ancestors_of,
7980
)
8081
from pyiceberg.table.sorting import (
8182
NullOrder,
@@ -204,6 +205,42 @@ def test_snapshot_by_id(table_v2: Table) -> None:
204205
)
205206

206207

208+
def test_snapshot_by_timestamp(table_v2: Table) -> None:
209+
assert table_v2.snapshot_as_of_timestamp(1515100955770) == Snapshot(
210+
snapshot_id=3051729675574597004,
211+
parent_snapshot_id=None,
212+
sequence_number=0,
213+
timestamp_ms=1515100955770,
214+
manifest_list="s3://a/b/1.avro",
215+
summary=Summary(Operation.APPEND),
216+
schema_id=None,
217+
)
218+
assert table_v2.snapshot_as_of_timestamp(1515100955770, inclusive=False) is None
219+
220+
221+
def test_ancestors_of(table_v2: Table) -> None:
222+
assert list(ancestors_of(table_v2.current_snapshot(), table_v2.metadata)) == [
223+
Snapshot(
224+
snapshot_id=3055729675574597004,
225+
parent_snapshot_id=3051729675574597004,
226+
sequence_number=1,
227+
timestamp_ms=1555100955770,
228+
manifest_list="s3://a/b/2.avro",
229+
summary=Summary(Operation.APPEND),
230+
schema_id=1,
231+
),
232+
Snapshot(
233+
snapshot_id=3051729675574597004,
234+
parent_snapshot_id=None,
235+
sequence_number=0,
236+
timestamp_ms=1515100955770,
237+
manifest_list="s3://a/b/1.avro",
238+
summary=Summary(Operation.APPEND),
239+
schema_id=None,
240+
),
241+
]
242+
243+
207244
def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None:
208245
assert table_v2.snapshot_by_id(-1) is None
209246

0 commit comments

Comments
 (0)