Skip to content

Commit 4feb4b9

Browse files
committed
add sql expression for repartition
1 parent 29bcb0f commit 4feb4b9

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

python/datafusion/dataframe.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -914,17 +914,20 @@ def repartition(self, num: int) -> DataFrame:
914914
"""
915915
return DataFrame(self.df.repartition(num))
916916

917-
def repartition_by_hash(self, *exprs: Expr, num: int) -> DataFrame:
917+
def repartition_by_hash(self, *exprs: Expr | str, num: int) -> DataFrame:
918918
"""Repartition a DataFrame using a hash partitioning scheme.
919919
920920
Args:
921-
exprs: Expressions to evaluate and perform hashing on.
921+
exprs: Expressions or a SQL expression string to evaluate
922+
and perform hashing on.
922923
num: Number of partitions to repartition the DataFrame into.
923924
924925
Returns:
925926
Repartitioned DataFrame.
926927
"""
927-
exprs = [expr.expr for expr in exprs]
928+
exprs = [self.parse_sql_expr(e) if isinstance(e, str) else e for e in exprs]
929+
exprs = expr_list_to_raw_expr_list(exprs)
930+
928931
return DataFrame(self.df.repartition_by_hash(*exprs, num=num))
929932

930933
def union(self, other: DataFrame, distinct: bool = False) -> DataFrame:

python/tests/test_dataframe.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,11 @@ def test_repartition(df):
16131613
def test_repartition_by_hash(df):
16141614
df.repartition_by_hash(column("a"), num=2)
16151615

1616+
def test_repartition_by_hash_sql_expression(df):
1617+
df.repartition_by_hash("a", num=2)
1618+
1619+
def test_repartition_by_hash_mix(df):
1620+
df.repartition_by_hash(column("a"), "b", num=2)
16161621

16171622
def test_intersect():
16181623
ctx = SessionContext()

0 commit comments

Comments
 (0)