From 6201cb9215034f12e5c9811c0fde82d766b0d3f2 Mon Sep 17 00:00:00 2001 From: Maksym Shalenyi Date: Tue, 23 Jul 2024 22:45:30 -0700 Subject: [PATCH] Add 'add_files_overwrite' method --- pyiceberg/table/__init__.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 0cbe4630e4..bfc1f28936 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -664,6 +664,32 @@ def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] = for data_file in data_files: update_snapshot.append_data_file(data_file) + def add_files_overwrite( + self, + file_paths: List[str], + overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE, + snapshot_properties: Dict[str, str] = EMPTY_DICT, + ) -> None: + """Shorthand API for adding files as data files and overwriting the table. + + Args: + file_paths: The list of full file paths to be added as data files to the table + overwrite_filter: ALWAYS_TRUE when you overwrite all the data, + or a boolean expression in case of a partial overwrite + snapshot_properties: Custom properties to be added to the snapshot summary + Raises: + FileNotFoundError: If the file does not exist. + """ + if self._table.name_mapping() is None: + self.set_properties(**{TableProperties.DEFAULT_NAME_MAPPING: self._table.schema().name_mapping.model_dump_json()}) + self.delete(delete_filter=overwrite_filter, snapshot_properties=snapshot_properties) + with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot: + data_files = _parquet_files_to_data_files( + table_metadata=self._table.metadata, file_paths=file_paths, io=self._table.io + ) + for data_file in data_files: + update_snapshot.append_data_file(data_file) + def update_spec(self) -> UpdateSpec: """Create a new UpdateSpec to update the partitioning of the table.