Skip to content

Commit a203c2b

Browse files
Chen-Yuan-LaiCheng-Yuan-Lai
andauthored
doc-gen: migrate scalar functions (datetime) documentation 1/2 (#13920)
* doc-gen: migrate scalar functions (datetime) documentation 1/2 * fix: fix typo and update function docs --------- Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
1 parent fb5378d commit a203c2b

File tree

7 files changed

+175
-218
lines changed

7 files changed

+175
-218
lines changed

datafusion/functions/src/datetime/current_date.rs

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,21 @@ use arrow::datatypes::DataType::Date32;
2222
use chrono::{Datelike, NaiveDate};
2323

2424
use datafusion_common::{internal_err, Result, ScalarValue};
25-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
2625
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2726
use datafusion_expr::{
2827
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
2928
};
30-
use std::sync::OnceLock;
29+
use datafusion_macros::user_doc;
3130

31+
#[user_doc(
32+
doc_section(label = "Time and Date Functions"),
33+
description = r#"
34+
Returns the current UTC date.
35+
36+
The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
37+
"#,
38+
syntax_example = "current_date()"
39+
)]
3240
#[derive(Debug)]
3341
pub struct CurrentDateFunc {
3442
signature: Signature,
@@ -105,22 +113,6 @@ impl ScalarUDFImpl for CurrentDateFunc {
105113
}
106114

107115
fn documentation(&self) -> Option<&Documentation> {
108-
Some(get_current_date_doc())
116+
self.doc()
109117
}
110118
}
111-
112-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
113-
114-
fn get_current_date_doc() -> &'static Documentation {
115-
DOCUMENTATION.get_or_init(|| {
116-
Documentation::builder(
117-
DOC_SECTION_DATETIME,
118-
r#"
119-
Returns the current UTC date.
120-
121-
The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
122-
"#,
123-
"current_date()")
124-
.build()
125-
})
126-
}

datafusion/functions/src/datetime/current_time.rs

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,23 @@ use arrow::datatypes::DataType;
1919
use arrow::datatypes::DataType::Time64;
2020
use arrow::datatypes::TimeUnit::Nanosecond;
2121
use std::any::Any;
22-
use std::sync::OnceLock;
2322

2423
use datafusion_common::{internal_err, Result, ScalarValue};
25-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
2624
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2725
use datafusion_expr::{
2826
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
2927
};
28+
use datafusion_macros::user_doc;
3029

30+
#[user_doc(
31+
doc_section(label = "Time and Date Functions"),
32+
description = r#"
33+
Returns the current UTC time.
34+
35+
The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
36+
"#,
37+
syntax_example = "current_time()"
38+
)]
3139
#[derive(Debug)]
3240
pub struct CurrentTimeFunc {
3341
signature: Signature,
@@ -93,22 +101,6 @@ impl ScalarUDFImpl for CurrentTimeFunc {
93101
}
94102

95103
fn documentation(&self) -> Option<&Documentation> {
96-
Some(get_current_time_doc())
104+
self.doc()
97105
}
98106
}
99-
100-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
101-
102-
fn get_current_time_doc() -> &'static Documentation {
103-
DOCUMENTATION.get_or_init(|| {
104-
Documentation::builder(
105-
DOC_SECTION_DATETIME,
106-
r#"
107-
Returns the current UTC time.
108-
109-
The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
110-
"#,
111-
"current_time()")
112-
.build()
113-
})
114-
}

datafusion/functions/src/datetime/date_bin.rs

Lines changed: 57 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::{Arc, OnceLock};
19+
use std::sync::Arc;
2020

2121
use arrow::array::temporal_conversions::NANOSECONDS;
2222
use arrow::array::types::{
@@ -37,10 +37,64 @@ use datafusion_expr::TypeSignature::Exact;
3737
use datafusion_expr::{
3838
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
3939
};
40+
use datafusion_macros::user_doc;
4041

4142
use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
42-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
4343

44+
#[user_doc(
45+
doc_section(label = "Time and Date Functions"),
46+
description = r#"
47+
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
48+
49+
For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
50+
"#,
51+
syntax_example = "date_bin(interval, expression, origin-timestamp)",
52+
sql_example = r#"```sql
53+
-- Bin the timestamp into 1 day intervals
54+
> SELECT date_bin(interval '1 day', time) as bin
55+
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
56+
+---------------------+
57+
| bin |
58+
+---------------------+
59+
| 2023-01-01T00:00:00 |
60+
| 2023-01-03T00:00:00 |
61+
+---------------------+
62+
2 row(s) fetched.
63+
64+
-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
65+
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
66+
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
67+
+---------------------+
68+
| bin |
69+
+---------------------+
70+
| 2023-01-01T03:00:00 |
71+
| 2023-01-03T03:00:00 |
72+
+---------------------+
73+
2 row(s) fetched.
74+
```"#,
75+
argument(name = "interval", description = "Bin interval."),
76+
argument(
77+
name = "expression",
78+
description = "Time expression to operate on. Can be a constant, column, or function."
79+
),
80+
argument(
81+
name = "origin-timestamp",
82+
description = r#"Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). The following intervals are supported:
83+
84+
- nanoseconds
85+
- microseconds
86+
- milliseconds
87+
- seconds
88+
- minutes
89+
- hours
90+
- days
91+
- weeks
92+
- months
93+
- years
94+
- century
95+
"#
96+
)
97+
)]
4498
#[derive(Debug)]
4599
pub struct DateBinFunc {
46100
signature: Signature,
@@ -169,68 +223,10 @@ impl ScalarUDFImpl for DateBinFunc {
169223
}
170224
}
171225
fn documentation(&self) -> Option<&Documentation> {
172-
Some(get_date_bin_doc())
226+
self.doc()
173227
}
174228
}
175229

176-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
177-
178-
fn get_date_bin_doc() -> &'static Documentation {
179-
DOCUMENTATION.get_or_init(|| {
180-
Documentation::builder(
181-
DOC_SECTION_DATETIME,
182-
r#"
183-
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
184-
185-
For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
186-
"#,
187-
"date_bin(interval, expression, origin-timestamp)")
188-
.with_sql_example(r#"```sql
189-
-- Bin the timestamp into 1 day intervals
190-
> SELECT date_bin(interval '1 day', time) as bin
191-
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
192-
+---------------------+
193-
| bin |
194-
+---------------------+
195-
| 2023-01-01T00:00:00 |
196-
| 2023-01-03T00:00:00 |
197-
+---------------------+
198-
2 row(s) fetched.
199-
200-
-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
201-
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
202-
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
203-
+---------------------+
204-
| bin |
205-
+---------------------+
206-
| 2023-01-01T03:00:00 |
207-
| 2023-01-03T03:00:00 |
208-
+---------------------+
209-
2 row(s) fetched.
210-
```
211-
"#)
212-
.with_argument("interval", "Bin interval.")
213-
.with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.")
214-
.with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
215-
216-
The following intervals are supported:
217-
218-
- nanoseconds
219-
- microseconds
220-
- milliseconds
221-
- seconds
222-
- minutes
223-
- hours
224-
- days
225-
- weeks
226-
- months
227-
- years
228-
- century
229-
")
230-
.build()
231-
})
232-
}
233-
234230
enum Interval {
235231
Nanoseconds(i64),
236232
Months(i64),

datafusion/functions/src/datetime/date_part.rs

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::str::FromStr;
20-
use std::sync::{Arc, OnceLock};
20+
use std::sync::Arc;
2121

2222
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
2323
use arrow::compute::kernels::cast_utils::IntervalUnit;
@@ -41,11 +41,42 @@ use datafusion_common::{
4141
ExprSchema, Result, ScalarValue,
4242
};
4343
use datafusion_expr::{
44-
scalar_doc_sections::DOC_SECTION_DATETIME, ColumnarValue, Documentation, Expr,
45-
ScalarUDFImpl, Signature, TypeSignature, Volatility,
44+
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, TypeSignature,
45+
Volatility,
4646
};
4747
use datafusion_expr_common::signature::TypeSignatureClass;
48-
48+
use datafusion_macros::user_doc;
49+
50+
#[user_doc(
51+
doc_section(label = "Time and Date Functions"),
52+
description = "Returns the specified part of the date as an integer.",
53+
syntax_example = "date_part(part, expression)",
54+
alternative_syntax = "extract(field FROM source)",
55+
argument(
56+
name = "part",
57+
description = r#"Part of the date to return. The following date parts are supported:
58+
59+
- year
60+
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
61+
- month
62+
- week (week of the year)
63+
- day (day of the month)
64+
- hour
65+
- minute
66+
- second
67+
- millisecond
68+
- microsecond
69+
- nanosecond
70+
- dow (day of the week)
71+
- doy (day of the year)
72+
- epoch (seconds since Unix epoch)
73+
"#
74+
),
75+
argument(
76+
name = "expression",
77+
description = "Time expression to operate on. Can be a constant, column, or function."
78+
)
79+
)]
4980
#[derive(Debug)]
5081
pub struct DatePartFunc {
5182
signature: Signature,
@@ -190,7 +221,7 @@ impl ScalarUDFImpl for DatePartFunc {
190221
&self.aliases
191222
}
192223
fn documentation(&self) -> Option<&Documentation> {
193-
Some(get_date_part_doc())
224+
self.doc()
194225
}
195226
}
196227

@@ -206,43 +237,6 @@ fn part_normalization(part: &str) -> &str {
206237
.unwrap_or(part)
207238
}
208239

209-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
210-
211-
fn get_date_part_doc() -> &'static Documentation {
212-
DOCUMENTATION.get_or_init(|| {
213-
Documentation::builder(
214-
DOC_SECTION_DATETIME,
215-
"Returns the specified part of the date as an integer.",
216-
"date_part(part, expression)")
217-
.with_argument(
218-
"part",
219-
r#"Part of the date to return. The following date parts are supported:
220-
221-
- year
222-
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
223-
- month
224-
- week (week of the year)
225-
- day (day of the month)
226-
- hour
227-
- minute
228-
- second
229-
- millisecond
230-
- microsecond
231-
- nanosecond
232-
- dow (day of the week)
233-
- doy (day of the year)
234-
- epoch (seconds since Unix epoch)
235-
"#,
236-
)
237-
.with_argument(
238-
"expression",
239-
"Time expression to operate on. Can be a constant, column, or function.",
240-
)
241-
.with_alternative_syntax("extract(field FROM source)")
242-
.build()
243-
})
244-
}
245-
246240
/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
247241
/// result to a total number of seconds, milliseconds, microseconds or
248242
/// nanoseconds

0 commit comments

Comments
 (0)