@@ -402,6 +402,46 @@ class ManifestEntry(Record):
402
402
def __init__ (self , * data : Any , ** named_data : Any ) -> None :
403
403
super ().__init__ (* data , ** {"struct" : MANIFEST_ENTRY_SCHEMAS_STRUCT [DEFAULT_READ_VERSION ], ** named_data })
404
404
405
+ def _wrap (
406
+ self ,
407
+ new_status : ManifestEntryStatus ,
408
+ new_snapshot_id : int ,
409
+ new_data_sequence_number : Optional [int ],
410
+ new_file_sequence_number : Optional [int ],
411
+ new_file : DataFile ,
412
+ ) -> ManifestEntry :
413
+ self .status = new_status
414
+ self .snapshot_id = new_snapshot_id
415
+ self .data_sequence_number = new_data_sequence_number
416
+ self .file_sequence_number = new_file_sequence_number
417
+ self .data_file = new_file
418
+ return self
419
+
420
+ def _wrap_append (self , new_snapshot_id : int , new_data_sequence_number : Optional [int ], new_file : DataFile ) -> ManifestEntry :
421
+ return self ._wrap (ManifestEntryStatus .ADDED , new_snapshot_id , new_data_sequence_number , None , new_file )
422
+
423
+ def _wrap_delete (
424
+ self ,
425
+ new_snapshot_id : int ,
426
+ new_data_sequence_number : Optional [int ],
427
+ new_file_sequence_number : Optional [int ],
428
+ new_file : DataFile ,
429
+ ) -> ManifestEntry :
430
+ return self ._wrap (
431
+ ManifestEntryStatus .DELETED , new_snapshot_id , new_data_sequence_number , new_file_sequence_number , new_file
432
+ )
433
+
434
+ def _wrap_existing (
435
+ self ,
436
+ new_snapshot_id : int ,
437
+ new_data_sequence_number : Optional [int ],
438
+ new_file_sequence_number : Optional [int ],
439
+ new_file : DataFile ,
440
+ ) -> ManifestEntry :
441
+ return self ._wrap (
442
+ ManifestEntryStatus .EXISTING , new_snapshot_id , new_data_sequence_number , new_file_sequence_number , new_file
443
+ )
444
+
405
445
406
446
PARTITION_FIELD_SUMMARY_TYPE = StructType (
407
447
NestedField (509 , "contains_null" , BooleanType (), required = True ),
@@ -654,6 +694,7 @@ class ManifestWriter(ABC):
654
694
_deleted_rows : int
655
695
_min_data_sequence_number : Optional [int ]
656
696
_partitions : List [Record ]
697
+ _reused_entry_wrapper : ManifestEntry
657
698
658
699
def __init__ (
659
700
self , spec : PartitionSpec , schema : Schema , output_file : OutputFile , snapshot_id : int , meta : Dict [str , str ] = EMPTY_DICT
@@ -673,6 +714,7 @@ def __init__(
673
714
self ._deleted_rows = 0
674
715
self ._min_data_sequence_number = None
675
716
self ._partitions = []
717
+ self ._reused_entry_wrapper = ManifestEntry ()
676
718
677
719
def __enter__ (self ) -> ManifestWriter :
678
720
"""Open the writer."""
@@ -763,6 +805,31 @@ def add_entry(self, entry: ManifestEntry) -> ManifestWriter:
763
805
self ._writer .write_block ([self .prepare_entry (entry )])
764
806
return self
765
807
808
+ def add (self , entry : ManifestEntry ) -> ManifestWriter :
809
+ if entry .data_sequence_number is not None and entry .data_sequence_number >= 0 :
810
+ self .add_entry (
811
+ self ._reused_entry_wrapper ._wrap_append (self ._snapshot_id , entry .data_sequence_number , entry .data_file )
812
+ )
813
+ else :
814
+ self .add_entry (self ._reused_entry_wrapper ._wrap_append (self ._snapshot_id , None , entry .data_file ))
815
+ return self
816
+
817
+ def delete (self , entry : ManifestEntry ) -> ManifestWriter :
818
+ self .add_entry (
819
+ self ._reused_entry_wrapper ._wrap_delete (
820
+ self ._snapshot_id , entry .data_sequence_number , entry .file_sequence_number , entry .data_file
821
+ )
822
+ )
823
+ return self
824
+
825
+ def existing (self , entry : ManifestEntry ) -> ManifestWriter :
826
+ self .add_entry (
827
+ self ._reused_entry_wrapper ._wrap_existing (
828
+ self ._snapshot_id , entry .data_sequence_number , entry .file_sequence_number , entry .data_file
829
+ )
830
+ )
831
+ return self
832
+
766
833
767
834
class ManifestWriterV1 (ManifestWriter ):
768
835
def __init__ (self , spec : PartitionSpec , schema : Schema , output_file : OutputFile , snapshot_id : int ):
0 commit comments