@@ -569,6 +569,27 @@ def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] =
569
569
for data_file in data_files :
570
570
update_snapshot .append_data_file (data_file )
571
571
572
+ def add_files_overwrite (self , file_paths : List [str ], snapshot_properties : Dict [str , str ] = EMPTY_DICT ) -> None :
573
+ """
574
+ Shorthand API for adding files as data files and overwriting the table.
575
+
576
+ Args:
577
+ file_paths: The list of full file paths to be added as data files to the table
578
+ snapshot_properties: Custom properties to be added to the snapshot summary
579
+
580
+ Raises:
581
+ FileNotFoundError: If the file does not exist.
582
+ """
583
+ if self ._table .name_mapping () is None :
584
+ self .set_properties (** {TableProperties .DEFAULT_NAME_MAPPING : self ._table .schema ().name_mapping .model_dump_json ()})
585
+ self .delete (delete_filter = ALWAYS_TRUE , snapshot_properties = snapshot_properties )
586
+ with self .update_snapshot (snapshot_properties = snapshot_properties ).fast_append () as update_snapshot :
587
+ data_files = _parquet_files_to_data_files (
588
+ table_metadata = self ._table .metadata , file_paths = file_paths , io = self ._table .io
589
+ )
590
+ for data_file in data_files :
591
+ update_snapshot .append_data_file (data_file )
592
+
572
593
def update_spec (self ) -> UpdateSpec :
573
594
"""Create a new UpdateSpec to update the partitioning of the table.
574
595
@@ -1480,6 +1501,20 @@ def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] =
1480
1501
with self .transaction () as tx :
1481
1502
tx .add_files (file_paths = file_paths , snapshot_properties = snapshot_properties )
1482
1503
1504
+ def add_files_overwrite (self , file_paths : List [str ], snapshot_properties : Dict [str , str ] = EMPTY_DICT ) -> None :
1505
+ """
1506
+ Shorthand API for adding files as data files and overwriting the table.
1507
+
1508
+ Args:
1509
+ file_paths: The list of full file paths to be added as data files to the table
1510
+ snapshot_properties: Custom properties to be added to the snapshot summary
1511
+
1512
+ Raises:
1513
+ FileNotFoundError: If the file does not exist.
1514
+ """
1515
+ with self .transaction () as tx :
1516
+ tx .add_files_overwrite (file_paths = file_paths , snapshot_properties = snapshot_properties )
1517
+
1483
1518
def update_spec (self , case_sensitive : bool = True ) -> UpdateSpec :
1484
1519
return UpdateSpec (Transaction (self , autocommit = True ), case_sensitive = case_sensitive )
1485
1520
@@ -3273,9 +3308,9 @@ def fast_append(self) -> FastAppendFiles:
3273
3308
def overwrite (self , commit_uuid : Optional [uuid .UUID ] = None ) -> OverwriteFiles :
3274
3309
return OverwriteFiles (
3275
3310
commit_uuid = commit_uuid ,
3276
- operation = Operation . OVERWRITE
3277
- if self ._transaction .table_metadata .current_snapshot () is not None
3278
- else Operation . APPEND ,
3311
+ operation = (
3312
+ Operation . OVERWRITE if self ._transaction .table_metadata .current_snapshot () is not None else Operation . APPEND
3313
+ ) ,
3279
3314
transaction = self ._transaction ,
3280
3315
io = self ._io ,
3281
3316
snapshot_properties = self ._snapshot_properties ,
@@ -3665,12 +3700,16 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
3665
3700
"null_value_count" : null_value_counts .get (field .field_id ),
3666
3701
"nan_value_count" : nan_value_counts .get (field .field_id ),
3667
3702
# Makes them readable
3668
- "lower_bound" : from_bytes (field .field_type , lower_bound )
3669
- if (lower_bound := lower_bounds .get (field .field_id ))
3670
- else None ,
3671
- "upper_bound" : from_bytes (field .field_type , upper_bound )
3672
- if (upper_bound := upper_bounds .get (field .field_id ))
3673
- else None ,
3703
+ "lower_bound" : (
3704
+ from_bytes (field .field_type , lower_bound )
3705
+ if (lower_bound := lower_bounds .get (field .field_id ))
3706
+ else None
3707
+ ),
3708
+ "upper_bound" : (
3709
+ from_bytes (field .field_type , upper_bound )
3710
+ if (upper_bound := upper_bounds .get (field .field_id ))
3711
+ else None
3712
+ ),
3674
3713
}
3675
3714
for field in self .tbl .metadata .schema ().fields
3676
3715
}
@@ -3905,9 +3944,11 @@ def _partition_summaries_to_rows(
3905
3944
"added_delete_files_count" : manifest .added_files_count if is_delete_file else 0 ,
3906
3945
"existing_delete_files_count" : manifest .existing_files_count if is_delete_file else 0 ,
3907
3946
"deleted_delete_files_count" : manifest .deleted_files_count if is_delete_file else 0 ,
3908
- "partition_summaries" : _partition_summaries_to_rows (specs [manifest .partition_spec_id ], manifest .partitions )
3909
- if manifest .partitions
3910
- else [],
3947
+ "partition_summaries" : (
3948
+ _partition_summaries_to_rows (specs [manifest .partition_spec_id ], manifest .partitions )
3949
+ if manifest .partitions
3950
+ else []
3951
+ ),
3911
3952
})
3912
3953
3913
3954
return pa .Table .from_pylist (
0 commit comments