Skip to content

Commit 9da253c

Browse files
committed
fix: formatting
1 parent 7bf4170 commit 9da253c

File tree

1 file changed

+52
-108
lines changed

1 file changed

+52
-108
lines changed

pyiceberg/manifest.py

Lines changed: 52 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,22 @@
2121
from copy import copy
2222
from enum import Enum
2323
from types import TracebackType
24-
from typing import Any, Generator
25-
from typing import Callable
26-
from typing import Dict
27-
from typing import Iterator
28-
from typing import List
29-
from typing import Literal
30-
from typing import Optional
31-
from typing import Type
24+
from typing import (
25+
Any,
26+
Dict,
27+
Iterator,
28+
List,
29+
Literal,
30+
Optional,
31+
Type,
32+
)
3233

3334
from pydantic_core import to_json
3435

3536
from pyiceberg.avro.file import AvroFile, AvroOutputFile
3637
from pyiceberg.conversions import to_bytes
3738
from pyiceberg.exceptions import ValidationError
38-
from pyiceberg.io import FileIO
39-
from pyiceberg.io import InputFile
40-
from pyiceberg.io import OutputFile
39+
from pyiceberg.io import FileIO, InputFile, OutputFile
4140
from pyiceberg.partitioning import PartitionSpec
4241
from pyiceberg.schema import Schema
4342
from pyiceberg.typedef import Record, TableVersion
@@ -53,7 +52,6 @@
5352
StringType,
5453
StructType,
5554
)
56-
from pyiceberg.typedef import EMPTY_DICT
5755

5856
UNASSIGNED_SEQ = -1
5957
DEFAULT_BLOCK_SIZE = 67108864 # 64 * 1024 * 1024
@@ -103,9 +101,7 @@ def __repr__(self) -> str:
103101

104102
DATA_FILE_TYPE: Dict[int, StructType] = {
105103
1: StructType(
106-
NestedField(
107-
field_id=100, name="file_path", field_type=StringType(), required=True, doc="Location URI with FS scheme"
108-
),
104+
NestedField(field_id=100, name="file_path", field_type=StringType(), required=True, doc="Location URI with FS scheme"),
109105
NestedField(
110106
field_id=101,
111107
name="file_format",
@@ -120,15 +116,9 @@ def __repr__(self) -> str:
120116
required=True,
121117
doc="Partition data tuple, schema based on the partition spec",
122118
),
119+
NestedField(field_id=103, name="record_count", field_type=LongType(), required=True, doc="Number of records in the file"),
123120
NestedField(
124-
field_id=103, name="record_count", field_type=LongType(), required=True, doc="Number of records in the file"
125-
),
126-
NestedField(
127-
field_id=104,
128-
name="file_size_in_bytes",
129-
field_type=LongType(),
130-
required=True,
131-
doc="Total file size in bytes",
121+
field_id=104, name="file_size_in_bytes", field_type=LongType(), required=True, doc="Total file size in bytes"
132122
),
133123
NestedField(
134124
field_id=105,
@@ -181,11 +171,7 @@ def __repr__(self) -> str:
181171
doc="Map of column id to upper bound",
182172
),
183173
NestedField(
184-
field_id=131,
185-
name="key_metadata",
186-
field_type=BinaryType(),
187-
required=False,
188-
doc="Encryption key metadata blob",
174+
field_id=131, name="key_metadata", field_type=BinaryType(), required=False, doc="Encryption key metadata blob"
189175
),
190176
NestedField(
191177
field_id=132,
@@ -205,9 +191,7 @@ def __repr__(self) -> str:
205191
doc="File format name: avro, orc, or parquet",
206192
initial_default=DataFileContent.DATA,
207193
),
208-
NestedField(
209-
field_id=100, name="file_path", field_type=StringType(), required=True, doc="Location URI with FS scheme"
210-
),
194+
NestedField(field_id=100, name="file_path", field_type=StringType(), required=True, doc="Location URI with FS scheme"),
211195
NestedField(
212196
field_id=101,
213197
name="file_format",
@@ -222,15 +206,9 @@ def __repr__(self) -> str:
222206
required=True,
223207
doc="Partition data tuple, schema based on the partition spec",
224208
),
209+
NestedField(field_id=103, name="record_count", field_type=LongType(), required=True, doc="Number of records in the file"),
225210
NestedField(
226-
field_id=103, name="record_count", field_type=LongType(), required=True, doc="Number of records in the file"
227-
),
228-
NestedField(
229-
field_id=104,
230-
name="file_size_in_bytes",
231-
field_type=LongType(),
232-
required=True,
233-
doc="Total file size in bytes",
211+
field_id=104, name="file_size_in_bytes", field_type=LongType(), required=True, doc="Total file size in bytes"
234212
),
235213
NestedField(
236214
field_id=108,
@@ -275,11 +253,7 @@ def __repr__(self) -> str:
275253
doc="Map of column id to upper bound",
276254
),
277255
NestedField(
278-
field_id=131,
279-
name="key_metadata",
280-
field_type=BinaryType(),
281-
required=False,
282-
doc="Encryption key metadata blob",
256+
field_id=131, name="key_metadata", field_type=BinaryType(), required=False, doc="Encryption key metadata blob"
283257
),
284258
NestedField(
285259
field_id=132,
@@ -307,34 +281,28 @@ def __repr__(self) -> str:
307281

308282

309283
def data_file_with_partition(partition_type: StructType, format_version: TableVersion) -> StructType:
310-
data_file_partition_type = StructType(
311-
*[
312-
NestedField(
313-
field_id=field.field_id,
314-
name=field.name,
315-
field_type=field.field_type,
316-
required=field.required,
317-
)
318-
for field in partition_type.fields
319-
]
320-
)
284+
data_file_partition_type = StructType(*[
285+
NestedField(
286+
field_id=field.field_id,
287+
name=field.name,
288+
field_type=field.field_type,
289+
required=field.required,
290+
)
291+
for field in partition_type.fields
292+
])
321293

322-
return StructType(
323-
*[
324-
(
325-
NestedField(
326-
field_id=102,
327-
name="partition",
328-
field_type=data_file_partition_type,
329-
required=True,
330-
doc="Partition data tuple, schema based on the partition spec",
331-
)
332-
if field.field_id == 102
333-
else field
334-
)
335-
for field in DATA_FILE_TYPE[format_version].fields
336-
]
337-
)
294+
return StructType(*[
295+
NestedField(
296+
field_id=102,
297+
name="partition",
298+
field_type=data_file_partition_type,
299+
required=True,
300+
doc="Partition data tuple, schema based on the partition spec",
301+
)
302+
if field.field_id == 102
303+
else field
304+
for field in DATA_FILE_TYPE[format_version].fields
305+
])
338306

339307

340308
class DataFile(Record):
@@ -415,18 +383,14 @@ def __eq__(self, other: Any) -> bool:
415383
),
416384
}
417385

418-
MANIFEST_ENTRY_SCHEMAS_STRUCT = {
419-
format_version: schema.as_struct() for format_version, schema in MANIFEST_ENTRY_SCHEMAS.items()
420-
}
386+
MANIFEST_ENTRY_SCHEMAS_STRUCT = {format_version: schema.as_struct() for format_version, schema in MANIFEST_ENTRY_SCHEMAS.items()}
421387

422388

423389
def manifest_entry_schema_with_data_file(format_version: TableVersion, data_file: StructType) -> Schema:
424-
return Schema(
425-
*[
426-
NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
427-
for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
428-
]
429-
)
390+
return Schema(*[
391+
NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
392+
for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
393+
])
430394

431395

432396
class ManifestEntry(Record):
@@ -496,9 +460,7 @@ def update(self, value: Any) -> None:
496460
self._min = min(self._min, value)
497461

498462

499-
def construct_partition_summaries(
500-
spec: PartitionSpec, schema: Schema, partitions: List[Record]
501-
) -> List[PartitionFieldSummary]:
463+
def construct_partition_summaries(spec: PartitionSpec, schema: Schema, partitions: List[Record]) -> List[PartitionFieldSummary]:
502464
types = [field.field_type for field in spec.partition_type(schema).fields]
503465
field_stats = [PartitionFieldStats(field_type) for field_type in types]
504466
for partition_keys in partitions:
@@ -522,9 +484,7 @@ def construct_partition_summaries(
522484
NestedField(512, "added_rows_count", LongType(), required=False),
523485
NestedField(513, "existing_rows_count", LongType(), required=False),
524486
NestedField(514, "deleted_rows_count", LongType(), required=False),
525-
NestedField(
526-
507, "partitions", ListType(508, PARTITION_FIELD_SUMMARY_TYPE, element_required=True), required=False
527-
),
487+
NestedField(507, "partitions", ListType(508, PARTITION_FIELD_SUMMARY_TYPE, element_required=True), required=False),
528488
NestedField(519, "key_metadata", BinaryType(), required=False),
529489
),
530490
2: Schema(
@@ -541,16 +501,12 @@ def construct_partition_summaries(
541501
NestedField(512, "added_rows_count", LongType(), required=True),
542502
NestedField(513, "existing_rows_count", LongType(), required=True),
543503
NestedField(514, "deleted_rows_count", LongType(), required=True),
544-
NestedField(
545-
507, "partitions", ListType(508, PARTITION_FIELD_SUMMARY_TYPE, element_required=True), required=False
546-
),
504+
NestedField(507, "partitions", ListType(508, PARTITION_FIELD_SUMMARY_TYPE, element_required=True), required=False),
547505
NestedField(519, "key_metadata", BinaryType(), required=False),
548506
),
549507
}
550508

551-
MANIFEST_LIST_FILE_STRUCTS = {
552-
format_version: schema.as_struct() for format_version, schema in MANIFEST_LIST_FILE_SCHEMAS.items()
553-
}
509+
MANIFEST_LIST_FILE_STRUCTS = {format_version: schema.as_struct() for format_version, schema in MANIFEST_LIST_FILE_SCHEMAS.items()}
554510

555511

556512
POSITIONAL_DELETE_SCHEMA = Schema(
@@ -669,16 +625,12 @@ def _inherit_from_manifest(entry: ManifestEntry, manifest: ManifestFile) -> Mani
669625

670626
# in v1 tables, the data sequence number is not persisted and can be safely defaulted to 0
671627
# in v2 tables, the data sequence number should be inherited iff the entry status is ADDED
672-
if entry.data_sequence_number is None and (
673-
manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED
674-
):
628+
if entry.data_sequence_number is None and (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED):
675629
entry.data_sequence_number = manifest.sequence_number
676630

677631
# in v1 tables, the file sequence number is not persisted and can be safely defaulted to 0
678632
# in v2 tables, the file sequence number should be inherited iff the entry status is ADDED
679-
if entry.file_sequence_number is None and (
680-
manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED
681-
):
633+
if entry.file_sequence_number is None and (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED):
682634
# Only available in V2, always 0 in V1
683635
entry.file_sequence_number = manifest.sequence_number
684636

@@ -1001,11 +953,7 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id
1001953
super().__init__(
1002954
format_version=1,
1003955
output_file=output_file,
1004-
meta={
1005-
"snapshot-id": str(snapshot_id),
1006-
"parent-snapshot-id": str(parent_snapshot_id),
1007-
"format-version": "1",
1008-
},
956+
meta={"snapshot-id": str(snapshot_id), "parent-snapshot-id": str(parent_snapshot_id), "format-version": "1"},
1009957
)
1010958

1011959
def prepare_manifest(self, manifest_file: ManifestFile) -> ManifestFile:
@@ -1018,9 +966,7 @@ class ManifestListWriterV2(ManifestListWriter):
1018966
_commit_snapshot_id: int
1019967
_sequence_number: int
1020968

1021-
def __init__(
1022-
self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id: Optional[int], sequence_number: int
1023-
):
969+
def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id: Optional[int], sequence_number: int):
1024970
super().__init__(
1025971
format_version=2,
1026972
output_file=output_file,
@@ -1072,5 +1018,3 @@ def write_manifest_list(
10721018
return ManifestListWriterV2(output_file, snapshot_id, parent_snapshot_id, sequence_number)
10731019
else:
10741020
raise ValueError(f"Cannot write manifest list for table version: {format_version}")
1075-
1076-

0 commit comments

Comments
 (0)