Skip to content

Commit

Permalink
Enable case_sensitive delete for Transaction.delete, `Table.delet…
Browse files Browse the repository at this point in the history
…e` and `Transaction.overwrite`
  • Loading branch information
jiakai-li committed Dec 11, 2024
1 parent 21fae76 commit f484697
Showing 1 changed file with 17 additions and 5 deletions.
22 changes: 17 additions & 5 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ def overwrite(
self,
df: pa.Table,
overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
case_sensitive: bool = True,
snapshot_properties: Dict[str, str] = EMPTY_DICT,
) -> None:
"""
Expand All @@ -436,6 +437,7 @@ def overwrite(
df: The Arrow dataframe that will be used to overwrite the table
overwrite_filter: ALWAYS_TRUE when you overwrite all the data,
or a boolean expression in case of a partial overwrite
case_sensitive: A bool determine if the provided `overwrite_filter` is case-sensitive
snapshot_properties: Custom properties to be added to the snapshot summary
"""
try:
Expand All @@ -459,7 +461,7 @@ def overwrite(
self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us
)

self.delete(delete_filter=overwrite_filter, snapshot_properties=snapshot_properties)
self.delete(delete_filter=overwrite_filter, case_sensitive=case_sensitive, snapshot_properties=snapshot_properties)

with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
# skip writing data files if the dataframe is empty
Expand All @@ -470,7 +472,12 @@ def overwrite(
for data_file in data_files:
update_snapshot.append_data_file(data_file)

def delete(self, delete_filter: Union[str, BooleanExpression], snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
def delete(
self,
delete_filter: Union[str, BooleanExpression],
case_sensitive: bool = True,
snapshot_properties: Dict[str, str] = EMPTY_DICT,
) -> None:
"""
Shorthand for deleting record from a table.
Expand All @@ -481,6 +488,7 @@ def delete(self, delete_filter: Union[str, BooleanExpression], snapshot_properti
Args:
delete_filter: A boolean expression to delete rows from a table
case_sensitive: A bool determine if the provided `delete_filter` is case-sensitive
snapshot_properties: Custom properties to be added to the snapshot summary
"""
from pyiceberg.io.pyarrow import (
Expand All @@ -503,7 +511,7 @@ def delete(self, delete_filter: Union[str, BooleanExpression], snapshot_properti

# Check if there are any files that require an actual rewrite of a data file
if delete_snapshot.rewrites_needed is True:
bound_delete_filter = bind(self.table_metadata.schema(), delete_filter, case_sensitive=True)
bound_delete_filter = bind(self.table_metadata.schema(), delete_filter, case_sensitive)
preserve_row_filter = _expression_to_complementary_pyarrow(bound_delete_filter)

files = self._scan(row_filter=delete_filter).plan_files()
Expand Down Expand Up @@ -1008,17 +1016,21 @@ def overwrite(
tx.overwrite(df=df, overwrite_filter=overwrite_filter, snapshot_properties=snapshot_properties)

def delete(
self, delete_filter: Union[BooleanExpression, str] = ALWAYS_TRUE, snapshot_properties: Dict[str, str] = EMPTY_DICT
self,
delete_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
case_sensitive: bool = True,
snapshot_properties: Dict[str, str] = EMPTY_DICT,
) -> None:
"""
Shorthand for deleting rows from the table.
Args:
delete_filter: The predicate that used to remove rows
case_sensitive: A bool determine if the provided `delete_filter` is case-sensitive
snapshot_properties: Custom properties to be added to the snapshot summary
"""
with self.transaction() as tx:
tx.delete(delete_filter=delete_filter, snapshot_properties=snapshot_properties)
tx.delete(delete_filter=delete_filter, case_sensitive=case_sensitive, snapshot_properties=snapshot_properties)

def add_files(
self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT, check_duplicate_files: bool = True
Expand Down

0 comments on commit f484697

Please sign in to comment.