Skip to content

Commit 383f279

Browse files
Chen-Yuan-LaiCheng-Yuan-Lai
andauthored
doc-gen: migrate scalar functions (array) documentation 2/3 (#13929)
* doc-gen: migrate scalar functions (array) documentation 2/3 * fix: import doc and macro, fix typo and update function docs --------- Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
1 parent 2b4e200 commit 383f279

File tree

9 files changed

+243
-342
lines changed

9 files changed

+243
-342
lines changed

datafusion/functions-nested/src/flatten.rs

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ use datafusion_common::cast::{
2626
as_generic_list_array, as_large_list_array, as_list_array,
2727
};
2828
use datafusion_common::{exec_err, Result};
29-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
3029
use datafusion_expr::{
3130
ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
3231
TypeSignature, Volatility,
3332
};
33+
use datafusion_macros::user_doc;
3434
use std::any::Any;
35-
use std::sync::{Arc, OnceLock};
35+
use std::sync::Arc;
3636

3737
make_udf_expr_and_func!(
3838
Flatten,
@@ -42,6 +42,23 @@ make_udf_expr_and_func!(
4242
flatten_udf
4343
);
4444

45+
#[user_doc(
46+
doc_section(label = "Array Functions"),
47+
description = "Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.",
48+
syntax_example = "flatten(array)",
49+
sql_example = r#"```sql
50+
> select flatten([[1, 2], [3, 4]]);
51+
+------------------------------+
52+
| flatten(List([1,2], [3,4])) |
53+
+------------------------------+
54+
| [1, 2, 3, 4] |
55+
+------------------------------+
56+
```"#,
57+
argument(
58+
name = "array",
59+
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
60+
)
61+
)]
4562
#[derive(Debug)]
4663
pub struct Flatten {
4764
signature: Signature,
@@ -118,35 +135,9 @@ impl ScalarUDFImpl for Flatten {
118135
}
119136

120137
fn documentation(&self) -> Option<&Documentation> {
121-
Some(get_flatten_doc())
138+
self.doc()
122139
}
123140
}
124-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
125-
126-
fn get_flatten_doc() -> &'static Documentation {
127-
DOCUMENTATION.get_or_init(|| {
128-
Documentation::builder(
129-
DOC_SECTION_ARRAY,
130-
"Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.",
131-
132-
"flatten(array)")
133-
.with_sql_example(
134-
r#"```sql
135-
> select flatten([[1, 2], [3, 4]]);
136-
+------------------------------+
137-
| flatten(List([1,2], [3,4])) |
138-
+------------------------------+
139-
| [1, 2, 3, 4] |
140-
+------------------------------+
141-
```"#,
142-
)
143-
.with_argument(
144-
"array",
145-
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
146-
)
147-
.build()
148-
})
149-
}
150141

151142
/// Flatten SQL function
152143
pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {

datafusion/functions-nested/src/length.rs

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ use arrow_schema::DataType;
2525
use arrow_schema::DataType::{FixedSizeList, LargeList, List, UInt64};
2626
use datafusion_common::cast::{as_generic_list_array, as_int64_array};
2727
use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result};
28-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
2928
use datafusion_expr::{
3029
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3130
};
3231
use datafusion_functions::{downcast_arg, downcast_named_arg};
32+
use datafusion_macros::user_doc;
3333
use std::any::Any;
34-
use std::sync::{Arc, OnceLock};
34+
use std::sync::Arc;
3535

3636
make_udf_expr_and_func!(
3737
ArrayLength,
@@ -41,6 +41,24 @@ make_udf_expr_and_func!(
4141
array_length_udf
4242
);
4343

44+
#[user_doc(
45+
doc_section(label = "Array Functions"),
46+
description = "Returns the length of the array dimension.",
47+
syntax_example = "array_length(array, dimension)",
48+
sql_example = r#"```sql
49+
> select array_length([1, 2, 3, 4, 5], 1);
50+
+-------------------------------------------+
51+
| array_length(List([1,2,3,4,5]), 1) |
52+
+-------------------------------------------+
53+
| 5 |
54+
+-------------------------------------------+
55+
```"#,
56+
argument(
57+
name = "array",
58+
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
59+
),
60+
argument(name = "dimension", description = "Array dimension.")
61+
)]
4462
#[derive(Debug)]
4563
pub struct ArrayLength {
4664
signature: Signature,
@@ -96,41 +114,10 @@ impl ScalarUDFImpl for ArrayLength {
96114
}
97115

98116
fn documentation(&self) -> Option<&Documentation> {
99-
Some(get_array_length_doc())
117+
self.doc()
100118
}
101119
}
102120

103-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
104-
105-
fn get_array_length_doc() -> &'static Documentation {
106-
DOCUMENTATION.get_or_init(|| {
107-
Documentation::builder(
108-
DOC_SECTION_ARRAY,
109-
"Returns the length of the array dimension.",
110-
111-
"array_length(array, dimension)")
112-
.with_sql_example(
113-
r#"```sql
114-
> select array_length([1, 2, 3, 4, 5], 1);
115-
+-------------------------------------------+
116-
| array_length(List([1,2,3,4,5]), 1) |
117-
+-------------------------------------------+
118-
| 5 |
119-
+-------------------------------------------+
120-
```"#,
121-
)
122-
.with_argument(
123-
"array",
124-
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
125-
)
126-
.with_argument(
127-
"dimension",
128-
"Array dimension.",
129-
)
130-
.build()
131-
})
132-
}
133-
134121
/// Array_length SQL function
135122
pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
136123
if args.len() != 1 && args.len() != 2 {

datafusion/functions-nested/src/make_array.rs

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
//! [`ScalarUDFImpl`] definitions for `make_array` function.
1919
2020
use std::any::Any;
21-
use std::sync::{Arc, OnceLock};
21+
use std::sync::Arc;
2222
use std::vec;
2323

24+
use crate::utils::make_scalar_function;
2425
use arrow::array::{ArrayData, Capacities, MutableArrayData};
2526
use arrow_array::{
2627
new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait,
@@ -33,13 +34,11 @@ use datafusion_common::{plan_err, Result};
3334
use datafusion_expr::binary::{
3435
try_type_union_resolution_with_struct, type_union_resolution,
3536
};
36-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
3737
use datafusion_expr::TypeSignature;
3838
use datafusion_expr::{
3939
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
4040
};
41-
42-
use crate::utils::make_scalar_function;
41+
use datafusion_macros::user_doc;
4342

4443
make_udf_expr_and_func!(
4544
MakeArray,
@@ -48,6 +47,23 @@ make_udf_expr_and_func!(
4847
make_array_udf
4948
);
5049

50+
#[user_doc(
51+
doc_section(label = "Array Functions"),
52+
description = "Returns an array using the specified input expressions.",
53+
syntax_example = "make_array(expression1[, ..., expression_n])",
54+
sql_example = r#"```sql
55+
> select make_array(1, 2, 3, 4, 5);
56+
+----------------------------------------------------------+
57+
| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) |
58+
+----------------------------------------------------------+
59+
| [1, 2, 3, 4, 5] |
60+
+----------------------------------------------------------+
61+
```"#,
62+
argument(
63+
name = "expression_n",
64+
description = "Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators."
65+
)
66+
)]
5167
#[derive(Debug)]
5268
pub struct MakeArray {
5369
signature: Signature,
@@ -139,37 +155,10 @@ impl ScalarUDFImpl for MakeArray {
139155
}
140156

141157
fn documentation(&self) -> Option<&Documentation> {
142-
Some(get_make_array_doc())
158+
self.doc()
143159
}
144160
}
145161

146-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
147-
148-
fn get_make_array_doc() -> &'static Documentation {
149-
DOCUMENTATION.get_or_init(|| {
150-
Documentation::builder(
151-
DOC_SECTION_ARRAY,
152-
"Returns an array using the specified input expressions.",
153-
154-
"make_array(expression1[, ..., expression_n])")
155-
.with_sql_example(
156-
r#"```sql
157-
> select make_array(1, 2, 3, 4, 5);
158-
+----------------------------------------------------------+
159-
| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) |
160-
+----------------------------------------------------------+
161-
| [1, 2, 3, 4, 5] |
162-
+----------------------------------------------------------+
163-
```"#,
164-
)
165-
.with_argument(
166-
"expression_n",
167-
"Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators.",
168-
)
169-
.build()
170-
})
171-
}
172-
173162
// Empty array is a special case that is useful for many other array functions
174163
pub(super) fn empty_array_type() -> DataType {
175164
List(Arc::new(Field::new_list_field(DataType::Int64, true)))

datafusion/functions-nested/src/map.rs

Lines changed: 47 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::collections::VecDeque;
20-
use std::sync::{Arc, OnceLock};
20+
use std::sync::Arc;
2121

2222
use arrow::array::ArrayData;
2323
use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray};
@@ -27,10 +27,10 @@ use arrow_schema::{DataType, Field, SchemaBuilder};
2727
use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
2828
use datafusion_common::{exec_err, HashSet, Result, ScalarValue};
2929
use datafusion_expr::expr::ScalarFunction;
30-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP;
3130
use datafusion_expr::{
3231
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
3332
};
33+
use datafusion_macros::user_doc;
3434

3535
use crate::make_array::make_array;
3636

@@ -181,6 +181,50 @@ fn make_map_batch_internal(
181181
})
182182
}
183183

184+
#[user_doc(
185+
doc_section(label = "Map Functions"),
186+
description = "Returns an Arrow map with the specified key-value pairs.\n\n\
187+
The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.",
188+
syntax_example = "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])",
189+
sql_example = r#"
190+
```sql
191+
-- Using map function
192+
SELECT MAP('type', 'test');
193+
----
194+
{type: test}
195+
196+
SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
197+
----
198+
{POST: 41, HEAD: 33, PATCH: }
199+
200+
SELECT MAP([[1,2], [3,4]], ['a', 'b']);
201+
----
202+
{[1, 2]: a, [3, 4]: b}
203+
204+
SELECT MAP { 'a': 1, 'b': 2 };
205+
----
206+
{a: 1, b: 2}
207+
208+
-- Using make_map function
209+
SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]);
210+
----
211+
{POST: 41, HEAD: 33}
212+
213+
SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]);
214+
----
215+
{key1: value1, key2: }
216+
```"#,
217+
argument(
218+
name = "key",
219+
description = "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
220+
For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null."
221+
),
222+
argument(
223+
name = "value",
224+
description = "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
225+
For `make_map`: The list of values to be mapped to the corresponding keys."
226+
)
227+
)]
184228
#[derive(Debug)]
185229
pub struct MapFunc {
186230
signature: Signature,
@@ -247,65 +291,10 @@ impl ScalarUDFImpl for MapFunc {
247291
}
248292

249293
fn documentation(&self) -> Option<&Documentation> {
250-
Some(get_map_doc())
294+
self.doc()
251295
}
252296
}
253297

254-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
255-
256-
fn get_map_doc() -> &'static Documentation {
257-
DOCUMENTATION.get_or_init(|| {
258-
Documentation::builder(
259-
DOC_SECTION_MAP,
260-
"Returns an Arrow map with the specified key-value pairs.\n\n\
261-
The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.",
262-
263-
"map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])"
264-
)
265-
.with_sql_example(
266-
r#"
267-
```sql
268-
-- Using map function
269-
SELECT MAP('type', 'test');
270-
----
271-
{type: test}
272-
273-
SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
274-
----
275-
{POST: 41, HEAD: 33, PATCH: }
276-
277-
SELECT MAP([[1,2], [3,4]], ['a', 'b']);
278-
----
279-
{[1, 2]: a, [3, 4]: b}
280-
281-
SELECT MAP { 'a': 1, 'b': 2 };
282-
----
283-
{a: 1, b: 2}
284-
285-
-- Using make_map function
286-
SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]);
287-
----
288-
{POST: 41, HEAD: 33}
289-
290-
SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]);
291-
----
292-
{key1: value1, key2: }
293-
```"#,
294-
)
295-
.with_argument(
296-
"key",
297-
"For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
298-
For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null."
299-
)
300-
.with_argument(
301-
"value",
302-
"For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
303-
For `make_map`: The list of values to be mapped to the corresponding keys."
304-
)
305-
.build()
306-
})
307-
}
308-
309298
fn get_element_type(data_type: &DataType) -> Result<&DataType> {
310299
match data_type {
311300
DataType::List(element) => Ok(element.data_type()),

0 commit comments

Comments
 (0)