Skip to content

Commit

Permalink
Adding add_files_overwrite method
Browse files Browse the repository at this point in the history
  • Loading branch information
enkidulan committed Jul 25, 2024
1 parent ee7e9f0 commit a9d8a1a
Show file tree
Hide file tree
Showing 2 changed files with 440 additions and 1 deletion.
49 changes: 49 additions & 0 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,32 @@ def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] =
for data_file in data_files:
update_snapshot.append_data_file(data_file)

def add_files_overwrite(
self,
file_paths: List[str],
overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
snapshot_properties: Dict[str, str] = EMPTY_DICT,
) -> None:
"""Shorthand API for adding files as data files and overwriting the table.
Args:
file_paths: The list of full file paths to be added as data files to the table
overwrite_filter: ALWAYS_TRUE when you overwrite all the data,
or a boolean expression in case of a partial overwrite
snapshot_properties: Custom properties to be added to the snapshot summary
Raises:
FileNotFoundError: If the file does not exist.
"""
if self._table.name_mapping() is None:
self.set_properties(**{TableProperties.DEFAULT_NAME_MAPPING: self._table.schema().name_mapping.model_dump_json()})
self.delete(delete_filter=overwrite_filter, snapshot_properties=snapshot_properties)
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
data_files = _parquet_files_to_data_files(
table_metadata=self._table.metadata, file_paths=file_paths, io=self._table.io
)
for data_file in data_files:
update_snapshot.append_data_file(data_file)

def update_spec(self) -> UpdateSpec:
"""Create a new UpdateSpec to update the partitioning of the table.
Expand Down Expand Up @@ -1613,6 +1639,29 @@ def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] =
with self.transaction() as tx:
tx.add_files(file_paths=file_paths, snapshot_properties=snapshot_properties)

def add_files_overwrite(
self,
file_paths: List[str],
overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
snapshot_properties: Dict[str, str] = EMPTY_DICT,
) -> None:
"""
Shorthand API for adding files as data files and overwriting the table.
Args:
file_paths: The list of full file paths to be added as data files to the table
overwrite_filter: ALWAYS_TRUE when you overwrite all the data,
or a boolean expression in case of a partial overwrite
snapshot_properties: Custom properties to be added to the snapshot summary
Raises:
FileNotFoundError: If the file does not exist.
"""
with self.transaction() as tx:
tx.add_files_overwrite(
file_paths=file_paths, overwrite_filter=overwrite_filter, snapshot_properties=snapshot_properties
)

def update_spec(self, case_sensitive: bool = True) -> UpdateSpec:
return UpdateSpec(Transaction(self, autocommit=True), case_sensitive=case_sensitive)

Expand Down
Loading

0 comments on commit a9d8a1a

Please sign in to comment.