Skip to content

Update sql doc #6025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions datafusion/expr/src/aggregate_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,30 +77,34 @@ impl FromStr for AggregateFunction {
type Err = DataFusionError;
fn from_str(name: &str) -> Result<AggregateFunction> {
Ok(match name {
"min" => AggregateFunction::Min,
"max" => AggregateFunction::Max,
"count" => AggregateFunction::Count,
// general
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just reorganizing, to be similar to how it looks in the sql doc, and group them together here a bit better

"avg" => AggregateFunction::Avg,
"count" => AggregateFunction::Count,
"max" => AggregateFunction::Max,
"mean" => AggregateFunction::Avg,
"sum" => AggregateFunction::Sum,
"median" => AggregateFunction::Median,
"approx_distinct" => AggregateFunction::ApproxDistinct,
"min" => AggregateFunction::Min,
"sum" => AggregateFunction::Sum,
"array_agg" => AggregateFunction::ArrayAgg,
"var" => AggregateFunction::Variance,
"var_samp" => AggregateFunction::Variance,
"var_pop" => AggregateFunction::VariancePop,
"stddev" => AggregateFunction::Stddev,
"stddev_samp" => AggregateFunction::Stddev,
"stddev_pop" => AggregateFunction::StddevPop,
// statistical
"corr" => AggregateFunction::Correlation,
"covar" => AggregateFunction::Covariance,
"covar_samp" => AggregateFunction::Covariance,
"covar_pop" => AggregateFunction::CovariancePop,
"corr" => AggregateFunction::Correlation,
"covar_samp" => AggregateFunction::Covariance,
"stddev" => AggregateFunction::Stddev,
"stddev_pop" => AggregateFunction::StddevPop,
"stddev_samp" => AggregateFunction::Stddev,
"var" => AggregateFunction::Variance,
"var_pop" => AggregateFunction::VariancePop,
"var_samp" => AggregateFunction::Variance,
// approximate
"approx_distinct" => AggregateFunction::ApproxDistinct,
"approx_median" => AggregateFunction::ApproxMedian,
"approx_percentile_cont" => AggregateFunction::ApproxPercentileCont,
"approx_percentile_cont_with_weight" => {
AggregateFunction::ApproxPercentileContWithWeight
}
"approx_median" => AggregateFunction::ApproxMedian,
// other
"grouping" => AggregateFunction::Grouping,
_ => {
return Err(DataFusionError::Plan(format!(
Expand Down
61 changes: 35 additions & 26 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,13 @@ impl FromStr for BuiltinScalarFunction {
// math functions
"abs" => BuiltinScalarFunction::Abs,
"acos" => BuiltinScalarFunction::Acos,
"asin" => BuiltinScalarFunction::Asin,
"atan" => BuiltinScalarFunction::Atan,
"acosh" => BuiltinScalarFunction::Acosh,
"asin" => BuiltinScalarFunction::Asin,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

"asinh" => BuiltinScalarFunction::Asinh,
"atan" => BuiltinScalarFunction::Atan,
"atanh" => BuiltinScalarFunction::Atanh,
"atan2" => BuiltinScalarFunction::Atan2,
"cbrt" => BuiltinScalarFunction::Cbrt,
"ceil" => BuiltinScalarFunction::Ceil,
"cos" => BuiltinScalarFunction::Cos,
"cosh" => BuiltinScalarFunction::Cosh,
Expand All @@ -330,21 +331,19 @@ impl FromStr for BuiltinScalarFunction {
"log2" => BuiltinScalarFunction::Log2,
"pi" => BuiltinScalarFunction::Pi,
"power" | "pow" => BuiltinScalarFunction::Power,
"random" => BuiltinScalarFunction::Random,
"round" => BuiltinScalarFunction::Round,
"signum" => BuiltinScalarFunction::Signum,
"sin" => BuiltinScalarFunction::Sin,
"sinh" => BuiltinScalarFunction::Sinh,
"sqrt" => BuiltinScalarFunction::Sqrt,
"cbrt" => BuiltinScalarFunction::Cbrt,
"tan" => BuiltinScalarFunction::Tan,
"tanh" => BuiltinScalarFunction::Tanh,
"trunc" => BuiltinScalarFunction::Trunc,

// conditional functions
"coalesce" => BuiltinScalarFunction::Coalesce,

// array functions
"make_array" => BuiltinScalarFunction::MakeArray,
"nullif" => BuiltinScalarFunction::NullIf,

// string functions
"ascii" => BuiltinScalarFunction::Ascii,
Expand All @@ -355,51 +354,61 @@ impl FromStr for BuiltinScalarFunction {
"concat" => BuiltinScalarFunction::Concat,
"concat_ws" => BuiltinScalarFunction::ConcatWithSeparator,
"chr" => BuiltinScalarFunction::Chr,
"current_date" => BuiltinScalarFunction::CurrentDate,
"current_time" => BuiltinScalarFunction::CurrentTime,
"date_part" | "datepart" => BuiltinScalarFunction::DatePart,
"date_trunc" | "datetrunc" => BuiltinScalarFunction::DateTrunc,
"date_bin" => BuiltinScalarFunction::DateBin,
"initcap" => BuiltinScalarFunction::InitCap,
"left" => BuiltinScalarFunction::Left,
"length" => BuiltinScalarFunction::CharacterLength,
"lower" => BuiltinScalarFunction::Lower,
"lpad" => BuiltinScalarFunction::Lpad,
"ltrim" => BuiltinScalarFunction::Ltrim,
"md5" => BuiltinScalarFunction::MD5,
"nullif" => BuiltinScalarFunction::NullIf,
"octet_length" => BuiltinScalarFunction::OctetLength,
"random" => BuiltinScalarFunction::Random,
"regexp_replace" => BuiltinScalarFunction::RegexpReplace,
"repeat" => BuiltinScalarFunction::Repeat,
"replace" => BuiltinScalarFunction::Replace,
"reverse" => BuiltinScalarFunction::Reverse,
"right" => BuiltinScalarFunction::Right,
"rpad" => BuiltinScalarFunction::Rpad,
"rtrim" => BuiltinScalarFunction::Rtrim,
"sha224" => BuiltinScalarFunction::SHA224,
"sha256" => BuiltinScalarFunction::SHA256,
"sha384" => BuiltinScalarFunction::SHA384,
"sha512" => BuiltinScalarFunction::SHA512,
"digest" => BuiltinScalarFunction::Digest,
"split_part" => BuiltinScalarFunction::SplitPart,
"starts_with" => BuiltinScalarFunction::StartsWith,
"strpos" => BuiltinScalarFunction::Strpos,
"substr" => BuiltinScalarFunction::Substr,
"to_hex" => BuiltinScalarFunction::ToHex,
"to_timestamp" => BuiltinScalarFunction::ToTimestamp,
"to_timestamp_millis" => BuiltinScalarFunction::ToTimestampMillis,
"to_timestamp_micros" => BuiltinScalarFunction::ToTimestampMicros,
"to_timestamp_seconds" => BuiltinScalarFunction::ToTimestampSeconds,
"now" => BuiltinScalarFunction::Now,
"translate" => BuiltinScalarFunction::Translate,
"trim" => BuiltinScalarFunction::Trim,
"upper" => BuiltinScalarFunction::Upper,
"uuid" => BuiltinScalarFunction::Uuid,

// regex functions
"regexp_match" => BuiltinScalarFunction::RegexpMatch,
"struct" => BuiltinScalarFunction::Struct,
"regexp_replace" => BuiltinScalarFunction::RegexpReplace,

// time/date functions
"now" => BuiltinScalarFunction::Now,
"current_date" => BuiltinScalarFunction::CurrentDate,
"current_time" => BuiltinScalarFunction::CurrentTime,
"date_bin" => BuiltinScalarFunction::DateBin,
"date_trunc" | "datetrunc" => BuiltinScalarFunction::DateTrunc,
"date_part" | "datepart" => BuiltinScalarFunction::DatePart,
"to_timestamp" => BuiltinScalarFunction::ToTimestamp,
"to_timestamp_millis" => BuiltinScalarFunction::ToTimestampMillis,
"to_timestamp_micros" => BuiltinScalarFunction::ToTimestampMicros,
"to_timestamp_seconds" => BuiltinScalarFunction::ToTimestampSeconds,
"from_unixtime" => BuiltinScalarFunction::FromUnixtime,

// hashing functions
"digest" => BuiltinScalarFunction::Digest,
"md5" => BuiltinScalarFunction::MD5,
"sha224" => BuiltinScalarFunction::SHA224,
"sha256" => BuiltinScalarFunction::SHA256,
"sha384" => BuiltinScalarFunction::SHA384,
"sha512" => BuiltinScalarFunction::SHA512,

// other functions
"struct" => BuiltinScalarFunction::Struct,
"arrow_typeof" => BuiltinScalarFunction::ArrowTypeof,

// array functions
"make_array" => BuiltinScalarFunction::MakeArray,

_ => {
return Err(DataFusionError::Plan(format!(
"There is no built-in function named {name}"
Expand Down
25 changes: 24 additions & 1 deletion docs/source/user-guide/sql/aggregate_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Aggregate functions operate on a set of values to compute a single result.
- [count](#count)
- [max](#max)
- [mean](#mean)
- [median](#median)
- [min](#min)
- [sum](#sum)
- [array_agg](#array_agg)
Expand Down Expand Up @@ -82,6 +83,19 @@ max(expression)

_Alias of [avg](#avg)._

### `median`

Returns the median value in the specified column.

```
median(expression)
```

#### Arguments

- **expression**: Expression to operate on.
Can be a constant, column, or function, and any combination of arithmetic operators.

### `min`

Returns the minimum value in the specified column.
Expand Down Expand Up @@ -110,7 +124,16 @@ sum(expression)

### `array_agg`

<!-- TODO: Add array_agg documentation -->
Returns an array created from the expression elements.

```
array_agg(expression)
```

#### Arguments

- **expression**: Expression to operate on.
Can be a constant, column, or function, and any combination of arithmetic operators.

## Statistical

Expand Down
28 changes: 14 additions & 14 deletions docs/source/user-guide/sql/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,20 @@ For example, to cast the output of `now()` to a `Timestamp` with second precisio

## Numeric Types

| SQL DataType | Arrow DataType | Notes |
| ------------------------------------ | :---------------------------- | ----------------------------------------------------------------------------------------------------------- |
| `TINYINT` | `Int8` | |
| `SMALLINT` | `Int16` | |
| `INT` or `INTEGER` | `Int32` | |
| `BIGINT` | `Int64` | |
| `TINYINT UNSIGNED` | `UInt8` | |
| `SMALLINT UNSIGNED` | `UInt16` | |
| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32` | |
| `BIGINT UNSIGNED` | `UInt64` | |
| `FLOAT` | `Float32` | |
| `REAL` | `Float32` | |
| `DOUBLE` | `Float64` | |
| `DECIMAL(precision,scale)` | `Decimal128(precision,scale)` | Decimal support is currently experimental ([#3523](https://github.com/apache/arrow-datafusion/issues/3523)) |
| SQL DataType | Arrow DataType | Notes |
| ------------------------------------ | :----------------------------- | ----------------------------------------------------------------------------------------------------------- |
| `TINYINT` | `Int8` | |
| `SMALLINT` | `Int16` | |
| `INT` or `INTEGER` | `Int32` | |
| `BIGINT` | `Int64` | |
| `TINYINT UNSIGNED` | `UInt8` | |
| `SMALLINT UNSIGNED` | `UInt16` | |
| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32` | |
| `BIGINT UNSIGNED` | `UInt64` | |
| `FLOAT` | `Float32` | |
| `REAL` | `Float32` | |
| `DOUBLE` | `Float64` | |
| `DECIMAL(precision, scale)` | `Decimal128(precision, scale)` | Decimal support is currently experimental ([#3523](https://github.com/apache/arrow-datafusion/issues/3523)) |

## Date/Time Types

Expand Down
34 changes: 30 additions & 4 deletions docs/source/user-guide/sql/ddl.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,32 @@

# DDL

## CREATE DATABASE

Create catalog with specified name.

<pre>
CREATE DATABASE [ IF NOT EXISTS ] <i><b>catalog</i></b>
</pre>

```sql
-- create catalog cat
CREATE DATABASE cat;
```

## CREATE SCHEMA

Create schema under specified catalog, or the default DataFusion catalog if not specified.

<pre>
CREATE SCHEMA [ IF NOT EXISTS ] [ <i><b>catalog.</i></b> ] <b><i>schema_name</i></b>
</pre>

```sql
-- create schema emu under catalog cat
CREATE SCHEMA cat.emu;
```
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know this worked ❤️ but I tried it out

8 rows in set. Query took 0.014 seconds.
❯ create table cat.emu.dog(x int);
0 rows in set. Query took 0.000 seconds.
❯ show tables;
+---------------+--------------------+-------------+------------+
| table_catalog | table_schema       | table_name  | table_type |
+---------------+--------------------+-------------+------------+
| cat           | emu                | dog         | BASE TABLE |
| cat           | information_schema | tables      | VIEW       |
| cat           | information_schema | views       | VIEW       |
| cat           | information_schema | columns     | VIEW       |
| cat           | information_schema | df_settings | VIEW       |
| datafusion    | information_schema | tables      | VIEW       |
| datafusion    | information_schema | views       | VIEW       |
| datafusion    | information_schema | columns     | VIEW       |
| datafusion    | information_schema | df_settings | VIEW       |
+---------------+--------------------+-------------+------------+
9 rows in set. Query took 0.001 seconds.


## CREATE EXTERNAL TABLE

Parquet data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is not necessary
Expand Down Expand Up @@ -67,7 +93,7 @@ When creating an output from a data source that is already ordered by an express
the data using the `WITH ORDER` clause. This applies even if the expression used for sorting is complex,
allowing for greater flexibility.

Here's an example of how to use `WITH ORDER` query
Here's an example of how to use `WITH ORDER` clause.

```sql
CREATE EXTERNAL TABLE test (
Expand All @@ -91,14 +117,14 @@ WITH ORDER (c2 ASC, c5 + c8 DESC NULL FIRST)
LOCATION '/path/to/aggregate_test_100.csv';
```

where `WITH ORDER` clause specifies the sort order:
Where `WITH ORDER` clause specifies the sort order:

```sql
WITH ORDER (sort_expression1 [ASC | DESC] [NULLS { FIRST | LAST }]
[, sort_expression2 [ASC | DESC] [NULLS { FIRST | LAST }] ...])
```

### Cautions When Using the WITH ORDER Clause
### Cautions when using the WITH ORDER Clause

- It's important to understand that using the `WITH ORDER` clause in the `CREATE EXTERNAL TABLE` statement only specifies the order in which the data should be read from the external file. If the data in the file is not already sorted according to the specified order, then the results may not be correct.

Expand Down Expand Up @@ -153,7 +179,7 @@ DROP TABLE IF EXISTS nonexistent_table;
View is a virtual table based on the result of a SQL query. It can be created from an existing table or values list.

<pre>
CREATE VIEW <i><b>view_name</b></i> AS statement;
CREATE [ OR REPLACE ] VIEW <i><b>view_name</b></i> AS statement;
</pre>

```sql
Expand Down
4 changes: 2 additions & 2 deletions docs/source/user-guide/sql/explain.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ EXPLAIN [ANALYZE] [VERBOSE] statement
## EXPLAIN

Shows the execution plan of a statement.
If you need more details output, try to use `EXPLAIN VERBOSE`.
If you need more detailed output, use `EXPLAIN VERBOSE`.

```sql
EXPLAIN SELECT SUM(x) FROM table GROUP BY b;
Expand All @@ -52,7 +52,7 @@ EXPLAIN SELECT SUM(x) FROM table GROUP BY b;
## EXPLAIN ANALYZE

Shows the execution plan and metrics of a statement.
If you need more information output, try to use `EXPLAIN ANALYZE VERBOSE`.
If you need more information output, use `EXPLAIN ANALYZE VERBOSE`.

```sql
EXPLAIN ANALYZE SELECT SUM(x) FROM table GROUP BY b;
Expand Down
Loading