Skip to content

Commit e15f355

Browse files
authored
Snapshot: Make manifest-list required (#1385)
* Snapshot: Make manifest-list required * Remove default * Update tests
1 parent b0ea716 commit e15f355

File tree

5 files changed

+10
-15
lines changed

5 files changed

+10
-15
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -818,8 +818,7 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None:
818818
manifests_to_delete: List[ManifestFile] = []
819819
for snapshot in metadata.snapshots:
820820
manifests_to_delete += snapshot.manifests(io)
821-
if snapshot.manifest_list is not None:
822-
manifest_lists_to_delete.add(snapshot.manifest_list)
821+
manifest_lists_to_delete.add(snapshot.manifest_list)
823822

824823
manifest_paths_to_delete = {manifest.manifest_path for manifest in manifests_to_delete}
825824
prev_metadata_files = {log.metadata_file for log in metadata.metadata_log}

pyiceberg/cli/output.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ def describe_table(self, table: Table) -> None:
112112

113113
snapshot_tree = Tree("Snapshots")
114114
for snapshot in metadata.snapshots:
115-
manifest_list_str = f": {snapshot.manifest_list}" if snapshot.manifest_list else ""
116-
snapshot_tree.add(f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}{manifest_list_str}")
115+
snapshot_tree.add(f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}: {snapshot.manifest_list}")
117116

118117
output_table = self._table
119118
output_table.add_row("Table format version", str(metadata.format_version))
@@ -141,8 +140,9 @@ def files(self, table: Table, history: bool) -> None:
141140
io = table.io
142141

143142
for snapshot in snapshots:
144-
manifest_list_str = f": {snapshot.manifest_list}" if snapshot.manifest_list else ""
145-
list_tree = snapshot_tree.add(f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}{manifest_list_str}")
143+
list_tree = snapshot_tree.add(
144+
f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}: {snapshot.manifest_list}"
145+
)
146146

147147
manifest_list = snapshot.manifests(io)
148148
for manifest in manifest_list:

pyiceberg/table/snapshots.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,7 @@ class Snapshot(IcebergBaseModel):
239239
parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None)
240240
sequence_number: Optional[int] = Field(alias="sequence-number", default=INITIAL_SEQUENCE_NUMBER)
241241
timestamp_ms: int = Field(alias="timestamp-ms", default_factory=lambda: int(time.time() * 1000))
242-
manifest_list: Optional[str] = Field(
243-
alias="manifest-list", description="Location of the snapshot's manifest list file", default=None
244-
)
242+
manifest_list: str = Field(alias="manifest-list", description="Location of the snapshot's manifest list file")
245243
summary: Optional[Summary] = Field(default=None)
246244
schema_id: Optional[int] = Field(alias="schema-id", default=None)
247245

@@ -255,9 +253,7 @@ def __str__(self) -> str:
255253

256254
def manifests(self, io: FileIO) -> List[ManifestFile]:
257255
"""Return the manifests for the given snapshot."""
258-
if self.manifest_list:
259-
return list(_manifests(io, self.manifest_list))
260-
return []
256+
return list(_manifests(io, self.manifest_list))
261257

262258

263259
class MetadataLogEntry(IcebergBaseModel):

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ def all_avro_types() -> Dict[str, Any]:
625625
"partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}],
626626
"properties": {},
627627
"current-snapshot-id": -1,
628-
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
628+
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822, "manifest-list": "s3://bucket/test/manifest-list"}],
629629
}
630630

631631

tests/table/test_metadata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_updating_metadata(example_table_metadata_v2: Dict[str, Any]) -> None:
168168
def test_serialize_v1(example_table_metadata_v1: Dict[str, Any]) -> None:
169169
table_metadata = TableMetadataV1(**example_table_metadata_v1)
170170
table_metadata_json = table_metadata.model_dump_json()
171-
expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}"""
171+
expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822,"manifest-list":"s3://bucket/test/manifest-list"}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}"""
172172
assert table_metadata_json == expected
173173

174174

@@ -497,7 +497,7 @@ def test_v1_write_metadata_for_v2() -> None:
497497
"partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}],
498498
"properties": {},
499499
"current-snapshot-id": -1,
500-
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
500+
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822, "manifest-list": "s3://bucket/test/manifests"}],
501501
}
502502

503503
table_metadata = TableMetadataV1(**minimal_example_v1).to_v2()

0 commit comments

Comments
 (0)