From b69ad8e70fc5b50b90b8f1b0983f728e6fc27c0b Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Wed, 5 Nov 2025 13:12:53 +0100 Subject: [PATCH 1/4] feat(let): add strftime function --- .../xorq/backends/let/datafusion/compiler.py | 4 +++- python/xorq/backends/let/datafusion/udfs.py | 12 ++++++++++++ .../xorq/backends/let/tests/test_temporal.py | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/python/xorq/backends/let/datafusion/compiler.py b/python/xorq/backends/let/datafusion/compiler.py index 8531d048a..730a9b3fa 100644 --- a/python/xorq/backends/let/datafusion/compiler.py +++ b/python/xorq/backends/let/datafusion/compiler.py @@ -93,7 +93,6 @@ class DataFusionCompiler(SQLGlotCompiler): ops.CountDistinctStar, ops.DateDelta, ops.RowID, - ops.Strftime, ops.TimeDelta, ops.TimestampDelta, ) @@ -664,5 +663,8 @@ def visit_ArrayAny(self, op, *, arg): ), ) + def visit_Strftime(self, op, *, arg, format_str): + return self.f.temporal_strftime(arg, format_str) + compiler = DataFusionCompiler() diff --git a/python/xorq/backends/let/datafusion/udfs.py b/python/xorq/backends/let/datafusion/udfs.py index 1c2035433..18f601e5a 100644 --- a/python/xorq/backends/let/datafusion/udfs.py +++ b/python/xorq/backends/let/datafusion/udfs.py @@ -129,3 +129,15 @@ def regex_split(s: str, pattern: str) -> list[str]: ) pattern = patterns[0].as_py() return pc.split_pattern_regex(s, pattern) + + +def temporal_strftime(array: dt.Timestamp(scale=9), pattern: str) -> dt.string: + patterns = pattern.unique() + + if len(patterns) != 1: + raise com.XorqError( + "Only a single scalar pattern is supported for DataFusion strftime" + ) + + pattern = patterns[0].as_py() + return pc.strftime(array, pattern) diff --git a/python/xorq/backends/let/tests/test_temporal.py b/python/xorq/backends/let/tests/test_temporal.py index b1adabdea..30ac4e316 100644 --- a/python/xorq/backends/let/tests/test_temporal.py +++ b/python/xorq/backends/let/tests/test_temporal.py @@ -158,6 +158,24 @@ def test_timestamp_extract_week_of_year(alltypes, alltypes_df): assert_series_equal(result, expected) +@pytest.mark.parametrize( + ("expr_fn", "pandas_pattern"), + [ + param( + lambda t: t.timestamp_col.strftime("%Y%m%d").name("formatted"), + "%Y%m%d", + id="literal_format_str", + ), + ], +) +def test_strftime(alltypes, alltypes_df, expr_fn, pandas_pattern): + expr = expr_fn(alltypes) + expected = alltypes_df.timestamp_col.dt.strftime(pandas_pattern).rename("formatted") + + result = expr.execute() + assert_series_equal(result, expected) + + PANDAS_UNITS = { "m": "Min", "ms": "L", From 6073e8a5639f9ae8048679ff6dc5abb2446cf8e3 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Thu, 6 Nov 2025 09:22:25 +0100 Subject: [PATCH 2/4] chore: use dt.string --- python/xorq/backends/let/datafusion/udfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/xorq/backends/let/datafusion/udfs.py b/python/xorq/backends/let/datafusion/udfs.py index 18f601e5a..079a5ab00 100644 --- a/python/xorq/backends/let/datafusion/udfs.py +++ b/python/xorq/backends/let/datafusion/udfs.py @@ -131,7 +131,7 @@ def regex_split(s: str, pattern: str) -> list[str]: return pc.split_pattern_regex(s, pattern) -def temporal_strftime(array: dt.Timestamp(scale=9), pattern: str) -> dt.string: +def temporal_strftime(array: dt.Timestamp(scale=9), pattern: dt.string) -> dt.string: patterns = pattern.unique() if len(patterns) != 1: From cce7a08c27c8a88720bbf7ecd84621c6b3813615 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Thu, 6 Nov 2025 09:32:14 +0100 Subject: [PATCH 3/4] fix: cast to StringArray, so it is clear the type expected --- python/xorq/backends/let/datafusion/udfs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/xorq/backends/let/datafusion/udfs.py b/python/xorq/backends/let/datafusion/udfs.py index 079a5ab00..b7b1c82b1 100644 --- a/python/xorq/backends/let/datafusion/udfs.py +++ b/python/xorq/backends/let/datafusion/udfs.py @@ -1,6 +1,7 @@ from __future__ import annotations import itertools +import typing from urllib.parse import parse_qs, urlsplit import pyarrow as pa @@ -132,12 +133,11 @@ def regex_split(s: str, pattern: str) -> list[str]: def temporal_strftime(array: dt.Timestamp(scale=9), pattern: dt.string) -> dt.string: - patterns = pattern.unique() + pattern, *_rest = typing.cast(pa.StringArray, pattern).unique().to_pylist() - if len(patterns) != 1: + if len(_rest) > 0: raise com.XorqError( "Only a single scalar pattern is supported for DataFusion strftime" ) - pattern = patterns[0].as_py() return pc.strftime(array, pattern) From 6659431d7291d3094df76264376f02a6421f127a Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 11 Nov 2025 15:09:29 +0100 Subject: [PATCH 4/4] ref: use truth value --- python/xorq/backends/let/datafusion/udfs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/xorq/backends/let/datafusion/udfs.py b/python/xorq/backends/let/datafusion/udfs.py index b7b1c82b1..c5df5edf2 100644 --- a/python/xorq/backends/let/datafusion/udfs.py +++ b/python/xorq/backends/let/datafusion/udfs.py @@ -133,9 +133,9 @@ def regex_split(s: str, pattern: str) -> list[str]: def temporal_strftime(array: dt.Timestamp(scale=9), pattern: dt.string) -> dt.string: - pattern, *_rest = typing.cast(pa.StringArray, pattern).unique().to_pylist() + pattern, *rest = typing.cast(pa.StringArray, pattern).unique().to_pylist() - if len(_rest) > 0: + if rest: raise com.XorqError( "Only a single scalar pattern is supported for DataFusion strftime" )