Skip to content

Commit 98f02ff

Browse files
authored
Clean internal implementation of WindowUDF (#8746)
* Clean internal implementation of WindowUDF * fix doc
1 parent 29f23eb commit 98f02ff

File tree

5 files changed

+103
-50
lines changed

5 files changed

+103
-50
lines changed

datafusion-examples/examples/advanced_udwf.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ use datafusion_expr::{
3434
/// a function `partition_evaluator` that returns the `MyPartitionEvaluator` instance.
3535
///
3636
/// To do so, we must implement the `WindowUDFImpl` trait.
37+
#[derive(Debug, Clone)]
3738
struct SmoothItUdf {
3839
signature: Signature,
3940
}

datafusion/core/tests/user_defined/user_defined_window_functions.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,7 @@ impl OddCounter {
471471
}
472472

473473
fn register(ctx: &mut SessionContext, test_state: Arc<TestState>) {
474+
#[derive(Debug, Clone)]
474475
struct SimpleWindowUDF {
475476
signature: Signature,
476477
return_type: DataType,

datafusion/expr/src/expr_fn.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use crate::{ColumnarValue, ScalarUDFImpl, WindowUDF, WindowUDFImpl};
3232
use arrow::datatypes::DataType;
3333
use datafusion_common::{Column, Result};
3434
use std::any::Any;
35+
use std::fmt::Debug;
3536
use std::ops::Not;
3637
use std::sync::Arc;
3738

@@ -1078,6 +1079,17 @@ pub struct SimpleWindowUDF {
10781079
partition_evaluator_factory: PartitionEvaluatorFactory,
10791080
}
10801081

1082+
impl Debug for SimpleWindowUDF {
1083+
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1084+
f.debug_struct("WindowUDF")
1085+
.field("name", &self.name)
1086+
.field("signature", &self.signature)
1087+
.field("return_type", &"<func>")
1088+
.field("partition_evaluator_factory", &"<FUNC>")
1089+
.finish()
1090+
}
1091+
}
1092+
10811093
impl SimpleWindowUDF {
10821094
/// Create a new `SimpleWindowUDF` from a name, input types, return type and
10831095
/// implementation. Implementing [`WindowUDFImpl`] allows more flexibility

datafusion/expr/src/udwf.rs

Lines changed: 88 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -34,49 +34,42 @@ use std::{
3434
///
3535
/// See the documetnation on [`PartitionEvaluator`] for more details
3636
///
37+
/// 1. For simple (less performant) use cases, use [`create_udwf`] and [`simple_udwf.rs`].
38+
///
39+
/// 2. For advanced use cases, use [`WindowUDFImpl`] and [`advanced_udf.rs`].
40+
///
41+
/// # API Note
42+
/// This is a separate struct from `WindowUDFImpl` to maintain backwards
43+
/// compatibility with the older API.
44+
///
3745
/// [`PartitionEvaluator`]: crate::PartitionEvaluator
38-
#[derive(Clone)]
46+
/// [`create_udwf`]: crate::expr_fn::create_udwf
47+
/// [`simple_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs
48+
/// [`advanced_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
49+
#[derive(Debug, Clone)]
3950
pub struct WindowUDF {
40-
/// name
41-
name: String,
42-
/// signature
43-
signature: Signature,
44-
/// Return type
45-
return_type: ReturnTypeFunction,
46-
/// Return the partition evaluator
47-
partition_evaluator_factory: PartitionEvaluatorFactory,
48-
}
49-
50-
impl Debug for WindowUDF {
51-
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
52-
f.debug_struct("WindowUDF")
53-
.field("name", &self.name)
54-
.field("signature", &self.signature)
55-
.field("return_type", &"<func>")
56-
.field("partition_evaluator_factory", &"<func>")
57-
.finish_non_exhaustive()
58-
}
51+
inner: Arc<dyn WindowUDFImpl>,
5952
}
6053

6154
/// Defines how the WindowUDF is shown to users
6255
impl Display for WindowUDF {
6356
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
64-
write!(f, "{}", self.name)
57+
write!(f, "{}", self.name())
6558
}
6659
}
6760

6861
impl PartialEq for WindowUDF {
6962
fn eq(&self, other: &Self) -> bool {
70-
self.name == other.name && self.signature == other.signature
63+
self.name() == other.name() && self.signature() == other.signature()
7164
}
7265
}
7366

7467
impl Eq for WindowUDF {}
7568

7669
impl std::hash::Hash for WindowUDF {
7770
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
78-
self.name.hash(state);
79-
self.signature.hash(state);
71+
self.name().hash(state);
72+
self.signature().hash(state);
8073
}
8174
}
8275

@@ -92,40 +85,31 @@ impl WindowUDF {
9285
return_type: &ReturnTypeFunction,
9386
partition_evaluator_factory: &PartitionEvaluatorFactory,
9487
) -> Self {
95-
Self {
96-
name: name.to_string(),
88+
Self::new_from_impl(WindowUDFLegacyWrapper {
89+
name: name.to_owned(),
9790
signature: signature.clone(),
9891
return_type: return_type.clone(),
9992
partition_evaluator_factory: partition_evaluator_factory.clone(),
100-
}
93+
})
10194
}
10295

10396
/// Create a new `WindowUDF` from a `[WindowUDFImpl]` trait object
10497
///
10598
/// Note this is the same as using the `From` impl (`WindowUDF::from`)
10699
pub fn new_from_impl<F>(fun: F) -> WindowUDF
107100
where
108-
F: WindowUDFImpl + Send + Sync + 'static,
101+
F: WindowUDFImpl + 'static,
109102
{
110-
let arc_fun = Arc::new(fun);
111-
let captured_self = arc_fun.clone();
112-
let return_type: ReturnTypeFunction = Arc::new(move |arg_types| {
113-
let return_type = captured_self.return_type(arg_types)?;
114-
Ok(Arc::new(return_type))
115-
});
116-
117-
let captured_self = arc_fun.clone();
118-
let partition_evaluator_factory: PartitionEvaluatorFactory =
119-
Arc::new(move || captured_self.partition_evaluator());
120-
121103
Self {
122-
name: arc_fun.name().to_string(),
123-
signature: arc_fun.signature().clone(),
124-
return_type: return_type.clone(),
125-
partition_evaluator_factory,
104+
inner: Arc::new(fun),
126105
}
127106
}
128107

108+
/// Return the underlying [`WindowUDFImpl`] trait object for this function
109+
pub fn inner(&self) -> Arc<dyn WindowUDFImpl> {
110+
self.inner.clone()
111+
}
112+
129113
/// creates a [`Expr`] that calls the window function given
130114
/// the `partition_by`, `order_by`, and `window_frame` definition
131115
///
@@ -150,25 +134,29 @@ impl WindowUDF {
150134
}
151135

152136
/// Returns this function's name
137+
///
138+
/// See [`WindowUDFImpl::name`] for more details.
153139
pub fn name(&self) -> &str {
154-
&self.name
140+
self.inner.name()
155141
}
156142

157143
/// Returns this function's signature (what input types are accepted)
144+
///
145+
/// See [`WindowUDFImpl::signature`] for more details.
158146
pub fn signature(&self) -> &Signature {
159-
&self.signature
147+
self.inner.signature()
160148
}
161149

162150
/// Return the type of the function given its input types
151+
///
152+
/// See [`WindowUDFImpl::return_type`] for more details.
163153
pub fn return_type(&self, args: &[DataType]) -> Result<DataType> {
164-
// Old API returns an Arc of the datatype for some reason
165-
let res = (self.return_type)(args)?;
166-
Ok(res.as_ref().clone())
154+
self.inner.return_type(args)
167155
}
168156

169157
/// Return a `PartitionEvaluator` for evaluating this window function
170158
pub fn partition_evaluator_factory(&self) -> Result<Box<dyn PartitionEvaluator>> {
171-
(self.partition_evaluator_factory)()
159+
self.inner.partition_evaluator()
172160
}
173161
}
174162

@@ -198,6 +186,7 @@ where
198186
/// # use datafusion_common::{DataFusionError, plan_err, Result};
199187
/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame};
200188
/// # use datafusion_expr::{WindowUDFImpl, WindowUDF};
189+
/// #[derive(Debug, Clone)]
201190
/// struct SmoothIt {
202191
/// signature: Signature
203192
/// };
@@ -236,7 +225,7 @@ where
236225
/// WindowFrame::new(false),
237226
/// );
238227
/// ```
239-
pub trait WindowUDFImpl {
228+
pub trait WindowUDFImpl: Debug + Send + Sync {
240229
/// Returns this object as an [`Any`] trait object
241230
fn as_any(&self) -> &dyn Any;
242231

@@ -254,3 +243,52 @@ pub trait WindowUDFImpl {
254243
/// Invoke the function, returning the [`PartitionEvaluator`] instance
255244
fn partition_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>>;
256245
}
246+
247+
/// Implementation of [`WindowUDFImpl`] that wraps the function style pointers
248+
/// of the older API (see <https://github.com/apache/arrow-datafusion/pull/8719>
249+
/// for more details)
250+
pub struct WindowUDFLegacyWrapper {
251+
/// name
252+
name: String,
253+
/// signature
254+
signature: Signature,
255+
/// Return type
256+
return_type: ReturnTypeFunction,
257+
/// Return the partition evaluator
258+
partition_evaluator_factory: PartitionEvaluatorFactory,
259+
}
260+
261+
impl Debug for WindowUDFLegacyWrapper {
262+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
263+
f.debug_struct("WindowUDF")
264+
.field("name", &self.name)
265+
.field("signature", &self.signature)
266+
.field("return_type", &"<func>")
267+
.field("partition_evaluator_factory", &"<func>")
268+
.finish_non_exhaustive()
269+
}
270+
}
271+
272+
impl WindowUDFImpl for WindowUDFLegacyWrapper {
273+
fn as_any(&self) -> &dyn Any {
274+
self
275+
}
276+
277+
fn name(&self) -> &str {
278+
&self.name
279+
}
280+
281+
fn signature(&self) -> &Signature {
282+
&self.signature
283+
}
284+
285+
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
286+
// Old API returns an Arc of the datatype for some reason
287+
let res = (self.return_type)(arg_types)?;
288+
Ok(res.as_ref().clone())
289+
}
290+
291+
fn partition_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
292+
(self.partition_evaluator_factory)()
293+
}
294+
}

datafusion/proto/tests/cases/roundtrip_logical_plan.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,6 +1787,7 @@ fn roundtrip_window() {
17871787
}
17881788
}
17891789

1790+
#[derive(Debug, Clone)]
17901791
struct SimpleWindowUDF {
17911792
signature: Signature,
17921793
}

0 commit comments

Comments
 (0)