Skip to content

Commit 3ad7734

Browse files
authored
Update sql doc (#6025)
* Update/touch-up user-guide sql doc pages * Update ddl doc
1 parent 18c0f1c commit 3ad7734

File tree

8 files changed

+251
-72
lines changed

8 files changed

+251
-72
lines changed

datafusion/expr/src/aggregate_function.rs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -77,30 +77,34 @@ impl FromStr for AggregateFunction {
7777
type Err = DataFusionError;
7878
fn from_str(name: &str) -> Result<AggregateFunction> {
7979
Ok(match name {
80-
"min" => AggregateFunction::Min,
81-
"max" => AggregateFunction::Max,
82-
"count" => AggregateFunction::Count,
80+
// general
8381
"avg" => AggregateFunction::Avg,
82+
"count" => AggregateFunction::Count,
83+
"max" => AggregateFunction::Max,
8484
"mean" => AggregateFunction::Avg,
85-
"sum" => AggregateFunction::Sum,
8685
"median" => AggregateFunction::Median,
87-
"approx_distinct" => AggregateFunction::ApproxDistinct,
86+
"min" => AggregateFunction::Min,
87+
"sum" => AggregateFunction::Sum,
8888
"array_agg" => AggregateFunction::ArrayAgg,
89-
"var" => AggregateFunction::Variance,
90-
"var_samp" => AggregateFunction::Variance,
91-
"var_pop" => AggregateFunction::VariancePop,
92-
"stddev" => AggregateFunction::Stddev,
93-
"stddev_samp" => AggregateFunction::Stddev,
94-
"stddev_pop" => AggregateFunction::StddevPop,
89+
// statistical
90+
"corr" => AggregateFunction::Correlation,
9591
"covar" => AggregateFunction::Covariance,
96-
"covar_samp" => AggregateFunction::Covariance,
9792
"covar_pop" => AggregateFunction::CovariancePop,
98-
"corr" => AggregateFunction::Correlation,
93+
"covar_samp" => AggregateFunction::Covariance,
94+
"stddev" => AggregateFunction::Stddev,
95+
"stddev_pop" => AggregateFunction::StddevPop,
96+
"stddev_samp" => AggregateFunction::Stddev,
97+
"var" => AggregateFunction::Variance,
98+
"var_pop" => AggregateFunction::VariancePop,
99+
"var_samp" => AggregateFunction::Variance,
100+
// approximate
101+
"approx_distinct" => AggregateFunction::ApproxDistinct,
102+
"approx_median" => AggregateFunction::ApproxMedian,
99103
"approx_percentile_cont" => AggregateFunction::ApproxPercentileCont,
100104
"approx_percentile_cont_with_weight" => {
101105
AggregateFunction::ApproxPercentileContWithWeight
102106
}
103-
"approx_median" => AggregateFunction::ApproxMedian,
107+
// other
104108
"grouping" => AggregateFunction::Grouping,
105109
_ => {
106110
return Err(DataFusionError::Plan(format!(

datafusion/expr/src/built_in_function.rs

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -313,12 +313,13 @@ impl FromStr for BuiltinScalarFunction {
313313
// math functions
314314
"abs" => BuiltinScalarFunction::Abs,
315315
"acos" => BuiltinScalarFunction::Acos,
316-
"asin" => BuiltinScalarFunction::Asin,
317-
"atan" => BuiltinScalarFunction::Atan,
318316
"acosh" => BuiltinScalarFunction::Acosh,
317+
"asin" => BuiltinScalarFunction::Asin,
319318
"asinh" => BuiltinScalarFunction::Asinh,
319+
"atan" => BuiltinScalarFunction::Atan,
320320
"atanh" => BuiltinScalarFunction::Atanh,
321321
"atan2" => BuiltinScalarFunction::Atan2,
322+
"cbrt" => BuiltinScalarFunction::Cbrt,
322323
"ceil" => BuiltinScalarFunction::Ceil,
323324
"cos" => BuiltinScalarFunction::Cos,
324325
"cosh" => BuiltinScalarFunction::Cosh,
@@ -330,21 +331,19 @@ impl FromStr for BuiltinScalarFunction {
330331
"log2" => BuiltinScalarFunction::Log2,
331332
"pi" => BuiltinScalarFunction::Pi,
332333
"power" | "pow" => BuiltinScalarFunction::Power,
334+
"random" => BuiltinScalarFunction::Random,
333335
"round" => BuiltinScalarFunction::Round,
334336
"signum" => BuiltinScalarFunction::Signum,
335337
"sin" => BuiltinScalarFunction::Sin,
336338
"sinh" => BuiltinScalarFunction::Sinh,
337339
"sqrt" => BuiltinScalarFunction::Sqrt,
338-
"cbrt" => BuiltinScalarFunction::Cbrt,
339340
"tan" => BuiltinScalarFunction::Tan,
340341
"tanh" => BuiltinScalarFunction::Tanh,
341342
"trunc" => BuiltinScalarFunction::Trunc,
342343

343344
// conditional functions
344345
"coalesce" => BuiltinScalarFunction::Coalesce,
345-
346-
// array functions
347-
"make_array" => BuiltinScalarFunction::MakeArray,
346+
"nullif" => BuiltinScalarFunction::NullIf,
348347

349348
// string functions
350349
"ascii" => BuiltinScalarFunction::Ascii,
@@ -355,51 +354,61 @@ impl FromStr for BuiltinScalarFunction {
355354
"concat" => BuiltinScalarFunction::Concat,
356355
"concat_ws" => BuiltinScalarFunction::ConcatWithSeparator,
357356
"chr" => BuiltinScalarFunction::Chr,
358-
"current_date" => BuiltinScalarFunction::CurrentDate,
359-
"current_time" => BuiltinScalarFunction::CurrentTime,
360-
"date_part" | "datepart" => BuiltinScalarFunction::DatePart,
361-
"date_trunc" | "datetrunc" => BuiltinScalarFunction::DateTrunc,
362-
"date_bin" => BuiltinScalarFunction::DateBin,
363357
"initcap" => BuiltinScalarFunction::InitCap,
364358
"left" => BuiltinScalarFunction::Left,
365359
"length" => BuiltinScalarFunction::CharacterLength,
366360
"lower" => BuiltinScalarFunction::Lower,
367361
"lpad" => BuiltinScalarFunction::Lpad,
368362
"ltrim" => BuiltinScalarFunction::Ltrim,
369-
"md5" => BuiltinScalarFunction::MD5,
370-
"nullif" => BuiltinScalarFunction::NullIf,
371363
"octet_length" => BuiltinScalarFunction::OctetLength,
372-
"random" => BuiltinScalarFunction::Random,
373-
"regexp_replace" => BuiltinScalarFunction::RegexpReplace,
374364
"repeat" => BuiltinScalarFunction::Repeat,
375365
"replace" => BuiltinScalarFunction::Replace,
376366
"reverse" => BuiltinScalarFunction::Reverse,
377367
"right" => BuiltinScalarFunction::Right,
378368
"rpad" => BuiltinScalarFunction::Rpad,
379369
"rtrim" => BuiltinScalarFunction::Rtrim,
380-
"sha224" => BuiltinScalarFunction::SHA224,
381-
"sha256" => BuiltinScalarFunction::SHA256,
382-
"sha384" => BuiltinScalarFunction::SHA384,
383-
"sha512" => BuiltinScalarFunction::SHA512,
384-
"digest" => BuiltinScalarFunction::Digest,
385370
"split_part" => BuiltinScalarFunction::SplitPart,
386371
"starts_with" => BuiltinScalarFunction::StartsWith,
387372
"strpos" => BuiltinScalarFunction::Strpos,
388373
"substr" => BuiltinScalarFunction::Substr,
389374
"to_hex" => BuiltinScalarFunction::ToHex,
390-
"to_timestamp" => BuiltinScalarFunction::ToTimestamp,
391-
"to_timestamp_millis" => BuiltinScalarFunction::ToTimestampMillis,
392-
"to_timestamp_micros" => BuiltinScalarFunction::ToTimestampMicros,
393-
"to_timestamp_seconds" => BuiltinScalarFunction::ToTimestampSeconds,
394-
"now" => BuiltinScalarFunction::Now,
395375
"translate" => BuiltinScalarFunction::Translate,
396376
"trim" => BuiltinScalarFunction::Trim,
397377
"upper" => BuiltinScalarFunction::Upper,
398378
"uuid" => BuiltinScalarFunction::Uuid,
379+
380+
// regex functions
399381
"regexp_match" => BuiltinScalarFunction::RegexpMatch,
400-
"struct" => BuiltinScalarFunction::Struct,
382+
"regexp_replace" => BuiltinScalarFunction::RegexpReplace,
383+
384+
// time/date functions
385+
"now" => BuiltinScalarFunction::Now,
386+
"current_date" => BuiltinScalarFunction::CurrentDate,
387+
"current_time" => BuiltinScalarFunction::CurrentTime,
388+
"date_bin" => BuiltinScalarFunction::DateBin,
389+
"date_trunc" | "datetrunc" => BuiltinScalarFunction::DateTrunc,
390+
"date_part" | "datepart" => BuiltinScalarFunction::DatePart,
391+
"to_timestamp" => BuiltinScalarFunction::ToTimestamp,
392+
"to_timestamp_millis" => BuiltinScalarFunction::ToTimestampMillis,
393+
"to_timestamp_micros" => BuiltinScalarFunction::ToTimestampMicros,
394+
"to_timestamp_seconds" => BuiltinScalarFunction::ToTimestampSeconds,
401395
"from_unixtime" => BuiltinScalarFunction::FromUnixtime,
396+
397+
// hashing functions
398+
"digest" => BuiltinScalarFunction::Digest,
399+
"md5" => BuiltinScalarFunction::MD5,
400+
"sha224" => BuiltinScalarFunction::SHA224,
401+
"sha256" => BuiltinScalarFunction::SHA256,
402+
"sha384" => BuiltinScalarFunction::SHA384,
403+
"sha512" => BuiltinScalarFunction::SHA512,
404+
405+
// other functions
406+
"struct" => BuiltinScalarFunction::Struct,
402407
"arrow_typeof" => BuiltinScalarFunction::ArrowTypeof,
408+
409+
// array functions
410+
"make_array" => BuiltinScalarFunction::MakeArray,
411+
403412
_ => {
404413
return Err(DataFusionError::Plan(format!(
405414
"There is no built-in function named {name}"

docs/source/user-guide/sql/aggregate_functions.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Aggregate functions operate on a set of values to compute a single result.
2727
- [count](#count)
2828
- [max](#max)
2929
- [mean](#mean)
30+
- [median](#median)
3031
- [min](#min)
3132
- [sum](#sum)
3233
- [array_agg](#array_agg)
@@ -82,6 +83,19 @@ max(expression)
8283

8384
_Alias of [avg](#avg)._
8485

86+
### `median`
87+
88+
Returns the median value in the specified column.
89+
90+
```
91+
median(expression)
92+
```
93+
94+
#### Arguments
95+
96+
- **expression**: Expression to operate on.
97+
Can be a constant, column, or function, and any combination of arithmetic operators.
98+
8599
### `min`
86100

87101
Returns the minimum value in the specified column.
@@ -110,7 +124,16 @@ sum(expression)
110124

111125
### `array_agg`
112126

113-
<!-- TODO: Add array_agg documentation -->
127+
Returns an array created from the expression elements.
128+
129+
```
130+
array_agg(expression)
131+
```
132+
133+
#### Arguments
134+
135+
- **expression**: Expression to operate on.
136+
Can be a constant, column, or function, and any combination of arithmetic operators.
114137

115138
## Statistical
116139

docs/source/user-guide/sql/data_types.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,20 +60,20 @@ For example, to cast the output of `now()` to a `Timestamp` with second precisio
6060

6161
## Numeric Types
6262

63-
| SQL DataType | Arrow DataType | Notes |
64-
| ------------------------------------ | :---------------------------- | ----------------------------------------------------------------------------------------------------------- |
65-
| `TINYINT` | `Int8` | |
66-
| `SMALLINT` | `Int16` | |
67-
| `INT` or `INTEGER` | `Int32` | |
68-
| `BIGINT` | `Int64` | |
69-
| `TINYINT UNSIGNED` | `UInt8` | |
70-
| `SMALLINT UNSIGNED` | `UInt16` | |
71-
| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32` | |
72-
| `BIGINT UNSIGNED` | `UInt64` | |
73-
| `FLOAT` | `Float32` | |
74-
| `REAL` | `Float32` | |
75-
| `DOUBLE` | `Float64` | |
76-
| `DECIMAL(precision,scale)` | `Decimal128(precision,scale)` | Decimal support is currently experimental ([#3523](https://github.com/apache/arrow-datafusion/issues/3523)) |
63+
| SQL DataType | Arrow DataType | Notes |
64+
| ------------------------------------ | :----------------------------- | ----------------------------------------------------------------------------------------------------------- |
65+
| `TINYINT` | `Int8` | |
66+
| `SMALLINT` | `Int16` | |
67+
| `INT` or `INTEGER` | `Int32` | |
68+
| `BIGINT` | `Int64` | |
69+
| `TINYINT UNSIGNED` | `UInt8` | |
70+
| `SMALLINT UNSIGNED` | `UInt16` | |
71+
| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32` | |
72+
| `BIGINT UNSIGNED` | `UInt64` | |
73+
| `FLOAT` | `Float32` | |
74+
| `REAL` | `Float32` | |
75+
| `DOUBLE` | `Float64` | |
76+
| `DECIMAL(precision, scale)` | `Decimal128(precision, scale)` | Decimal support is currently experimental ([#3523](https://github.com/apache/arrow-datafusion/issues/3523)) |
7777

7878
## Date/Time Types
7979

docs/source/user-guide/sql/ddl.md

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,32 @@
1919

2020
# DDL
2121

22+
## CREATE DATABASE
23+
24+
Create catalog with specified name.
25+
26+
<pre>
27+
CREATE DATABASE [ IF NOT EXISTS ] <i><b>catalog</i></b>
28+
</pre>
29+
30+
```sql
31+
-- create catalog cat
32+
CREATE DATABASE cat;
33+
```
34+
35+
## CREATE SCHEMA
36+
37+
Create schema under specified catalog, or the default DataFusion catalog if not specified.
38+
39+
<pre>
40+
CREATE SCHEMA [ IF NOT EXISTS ] [ <i><b>catalog.</i></b> ] <b><i>schema_name</i></b>
41+
</pre>
42+
43+
```sql
44+
-- create schema emu under catalog cat
45+
CREATE SCHEMA cat.emu;
46+
```
47+
2248
## CREATE EXTERNAL TABLE
2349

2450
Parquet data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is not necessary
@@ -67,7 +93,7 @@ When creating an output from a data source that is already ordered by an express
6793
the data using the `WITH ORDER` clause. This applies even if the expression used for sorting is complex,
6894
allowing for greater flexibility.
6995

70-
Here's an example of how to use `WITH ORDER` query
96+
Here's an example of how to use `WITH ORDER` clause.
7197

7298
```sql
7399
CREATE EXTERNAL TABLE test (
@@ -91,14 +117,14 @@ WITH ORDER (c2 ASC, c5 + c8 DESC NULL FIRST)
91117
LOCATION '/path/to/aggregate_test_100.csv';
92118
```
93119

94-
where `WITH ORDER` clause specifies the sort order:
120+
Where `WITH ORDER` clause specifies the sort order:
95121

96122
```sql
97123
WITH ORDER (sort_expression1 [ASC | DESC] [NULLS { FIRST | LAST }]
98124
[, sort_expression2 [ASC | DESC] [NULLS { FIRST | LAST }] ...])
99125
```
100126

101-
### Cautions When Using the WITH ORDER Clause
127+
### Cautions when using the WITH ORDER Clause
102128

103129
- It's important to understand that using the `WITH ORDER` clause in the `CREATE EXTERNAL TABLE` statement only specifies the order in which the data should be read from the external file. If the data in the file is not already sorted according to the specified order, then the results may not be correct.
104130

@@ -153,7 +179,7 @@ DROP TABLE IF EXISTS nonexistent_table;
153179
View is a virtual table based on the result of a SQL query. It can be created from an existing table or values list.
154180

155181
<pre>
156-
CREATE VIEW <i><b>view_name</b></i> AS statement;
182+
CREATE [ OR REPLACE ] VIEW <i><b>view_name</b></i> AS statement;
157183
</pre>
158184

159185
```sql

docs/source/user-guide/sql/explain.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ EXPLAIN [ANALYZE] [VERBOSE] statement
2828
## EXPLAIN
2929

3030
Shows the execution plan of a statement.
31-
If you need more details output, try to use `EXPLAIN VERBOSE`.
31+
If you need more detailed output, use `EXPLAIN VERBOSE`.
3232

3333
```sql
3434
EXPLAIN SELECT SUM(x) FROM table GROUP BY b;
@@ -52,7 +52,7 @@ EXPLAIN SELECT SUM(x) FROM table GROUP BY b;
5252
## EXPLAIN ANALYZE
5353

5454
Shows the execution plan and metrics of a statement.
55-
If you need more information output, try to use `EXPLAIN ANALYZE VERBOSE`.
55+
If you need more information output, use `EXPLAIN ANALYZE VERBOSE`.
5656

5757
```sql
5858
EXPLAIN ANALYZE SELECT SUM(x) FROM table GROUP BY b;

0 commit comments

Comments
 (0)