diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 40ae75cd7f80..9ebb2a490e8d 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -22,16 +22,15 @@ use arrow::{ util::display::array_value_to_string, }; -use datafusion::datasource::TableProvider; use datafusion::error::Result; use datafusion::logical_expr::{Aggregate, LogicalPlan, TableScan}; +use datafusion::physical_plan::collect; use datafusion::physical_plan::metrics::MetricValue; use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::ExecutionPlanVisitor; use datafusion::prelude::*; use datafusion::test_util; use datafusion::{assert_batches_eq, assert_batches_sorted_eq}; -use datafusion::{datasource::MemTable, physical_plan::collect}; use datafusion::{execution::context::SessionContext, physical_plan::displayable}; use datafusion_common::{assert_contains, assert_not_contains}; use object_store::path::Path; @@ -322,21 +321,6 @@ async fn register_alltypes_parquet(ctx: &SessionContext) { .unwrap(); } -/// Return a new table provider that has a single Int32 column with -/// values between `seq_start` and `seq_end` -pub fn table_with_sequence( - seq_start: i32, - seq_end: i32, -) -> Result> { - let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); - let arr = Arc::new(Int32Array::from((seq_start..=seq_end).collect::>())); - let partitions = vec![vec![RecordBatch::try_new( - schema.clone(), - vec![arr as ArrayRef], - )?]]; - Ok(Arc::new(MemTable::try_new(schema, partitions)?)) -} - pub struct ExplainNormalizer { replacements: Vec<(String, String)>, } diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 71369c73008c..667d3eeab31e 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -20,459 +20,81 @@ use datafusion_common::ScalarValue; use tempfile::TempDir; #[tokio::test] -async fn query_get_indexed_field() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![Field::new_list( - "some_list", - Field::new("item", DataType::Int64, true), - false, - )])); - let builder = PrimitiveBuilder::::with_capacity(3); - let mut lb = ListBuilder::new(builder); - for int_vec in [[0, 1, 2], [4, 5, 6], [7, 8, 9]] { - let builder = lb.values(); - for int in int_vec { - builder.append_value(int); - } - lb.append(true); - } - - let data = RecordBatch::try_new(schema.clone(), vec![Arc::new(lb.finish())])?; - - ctx.register_batch("ints", data)?; - - // Original column is micros, convert to millis and check timestamp - let sql = "SELECT some_list[1] as i0 FROM ints LIMIT 3"; - let actual = execute_to_batches(&ctx, sql).await; - #[rustfmt::skip] - let expected = ["+----+", - "| i0 |", - "+----+", - "| 0 |", - "| 4 |", - "| 7 |", - "+----+"]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn query_nested_get_indexed_field() -> Result<()> { - let ctx = SessionContext::new(); - let nested_dt = DataType::List(Arc::new(Field::new("item", DataType::Int64, true))); - // Nested schema of { "some_list": [[i64]] } - let schema = Arc::new(Schema::new(vec![Field::new( - "some_list", - DataType::List(Arc::new(Field::new("item", nested_dt.clone(), true))), - false, - )])); - - let builder = PrimitiveBuilder::::with_capacity(3); - let nested_lb = ListBuilder::new(builder); - let mut lb = ListBuilder::new(nested_lb); - for int_vec_vec in [ - [[0, 1], [2, 3], [3, 4]], - [[5, 6], [7, 8], [9, 10]], - [[11, 12], [13, 14], [15, 16]], - ] { - let nested_builder = lb.values(); - for int_vec in int_vec_vec { - let builder = nested_builder.values(); - for int in int_vec { - builder.append_value(int); - } - nested_builder.append(true); - } - lb.append(true); - } - - let data = RecordBatch::try_new(schema.clone(), vec![Arc::new(lb.finish())])?; - - ctx.register_batch("ints", data)?; - - // Original column is micros, convert to millis and check timestamp - let sql = "SELECT some_list[1] as i0 FROM ints LIMIT 3"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+----------+", - "| i0 |", - "+----------+", - "| [0, 1] |", - "| [5, 6] |", - "| [11, 12] |", - "+----------+", - ]; - assert_batches_eq!(expected, &actual); - let sql = "SELECT some_list[1][1] as i0 FROM ints LIMIT 3"; - let actual = execute_to_batches(&ctx, sql).await; - #[rustfmt::skip] - let expected = ["+----+", - "| i0 |", - "+----+", - "| 0 |", - "| 5 |", - "| 11 |", - "+----+"]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn query_nested_get_indexed_field_on_struct() -> Result<()> { - let ctx = SessionContext::new(); - let nested_dt = DataType::List(Arc::new(Field::new("item", DataType::Int64, true))); - // Nested schema of { "some_struct": { "bar": [i64] } } - let struct_fields = vec![Field::new("bar", nested_dt.clone(), true)]; - let schema = Arc::new(Schema::new(vec![Field::new( - "some_struct", - DataType::Struct(struct_fields.clone().into()), - false, - )])); - - let builder = PrimitiveBuilder::::with_capacity(3); - let nested_lb = ListBuilder::new(builder); - let mut sb = StructBuilder::new(struct_fields, vec![Box::new(nested_lb)]); - for int_vec in [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] { - let lb = sb.field_builder::>(0).unwrap(); - for int in int_vec { - lb.values().append_value(int); - } - lb.append(true); - sb.append(true); - } - let s = sb.finish(); - let data = RecordBatch::try_new(schema.clone(), vec![Arc::new(s)])?; - - ctx.register_batch("structs", data)?; - - // Original column is micros, convert to millis and check timestamp - let sql = "SELECT some_struct['bar'] as l0 FROM structs LIMIT 3"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+----------------+", - "| l0 |", - "+----------------+", - "| [0, 1, 2, 3] |", - "| [4, 5, 6, 7] |", - "| [8, 9, 10, 11] |", - "+----------------+", - ]; - assert_batches_eq!(expected, &actual); - - // Access to field of struct by CompoundIdentifier - let sql = "SELECT some_struct.bar as l0 FROM structs LIMIT 3"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+----------------+", - "| l0 |", - "+----------------+", - "| [0, 1, 2, 3] |", - "| [4, 5, 6, 7] |", - "| [8, 9, 10, 11] |", - "+----------------+", - ]; - assert_batches_eq!(expected, &actual); - - let sql = "SELECT some_struct['bar'][1] as i0 FROM structs LIMIT 3"; - let actual = execute_to_batches(&ctx, sql).await; - #[rustfmt::skip] - let expected = ["+----+", - "| i0 |", - "+----+", - "| 0 |", - "| 4 |", - "| 8 |", - "+----+"]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn query_on_string_dictionary() -> Result<()> { - // Test to ensure DataFusion can operate on dictionary types - // Use StringDictionary (32 bit indexes = keys) - let d1: DictionaryArray = - vec![Some("one"), None, Some("three")].into_iter().collect(); - - let d2: DictionaryArray = vec![Some("blarg"), None, Some("three")] - .into_iter() - .collect(); - - let d3: StringArray = vec![Some("XYZ"), None, Some("three")].into_iter().collect(); - - let batch = RecordBatch::try_from_iter(vec![ - ("d1", Arc::new(d1) as ArrayRef), - ("d2", Arc::new(d2) as ArrayRef), - ("d3", Arc::new(d3) as ArrayRef), - ]) - .unwrap(); - - let ctx = SessionContext::new(); - ctx.register_batch("test", batch)?; - - // Basic SELECT - let sql = "SELECT d1 FROM test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+", - "| d1 |", - "+-------+", - "| one |", - "| |", - "| three |", - "+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // basic filtering - let sql = "SELECT d1 FROM test WHERE d1 IS NOT NULL"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+", - "| d1 |", - "+-------+", - "| one |", - "| three |", - "+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // comparison with constant - let sql = "SELECT d1 FROM test WHERE d1 = 'three'"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+", - "| d1 |", - "+-------+", - "| three |", - "+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // comparison with another dictionary column - let sql = "SELECT d1 FROM test WHERE d1 = d2"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+", - "| d1 |", - "+-------+", - "| three |", - "+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // order comparison with another dictionary column - let sql = "SELECT d1 FROM test WHERE d1 <= d2"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+", - "| d1 |", - "+-------+", - "| three |", - "+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // comparison with a non dictionary column - let sql = "SELECT d1 FROM test WHERE d1 = d3"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+", - "| d1 |", - "+-------+", - "| three |", - "+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // filtering with constant - let sql = "SELECT d1 FROM test WHERE d1 = 'three'"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+", - "| d1 |", - "+-------+", - "| three |", - "+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // Expression evaluation - let sql = "SELECT concat(d1, '-foo') FROM test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+------------------------------+", - "| concat(test.d1,Utf8(\"-foo\")) |", - "+------------------------------+", - "| one-foo |", - "| -foo |", - "| three-foo |", - "+------------------------------+", - ]; - assert_batches_eq!(expected, &actual); - - // Expression evaluation with two dictionaries - let sql = "SELECT concat(d1, d2) FROM test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------------------------+", - "| concat(test.d1,test.d2) |", - "+-------------------------+", - "| oneblarg |", - "| |", - "| threethree |", - "+-------------------------+", - ]; - assert_batches_eq!(expected, &actual); - - // aggregation - let sql = "SELECT COUNT(d1) FROM test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+----------------+", - "| COUNT(test.d1) |", - "+----------------+", - "| 2 |", - "+----------------+", - ]; - assert_batches_eq!(expected, &actual); - - // aggregation min - let sql = "SELECT MIN(d1) FROM test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+--------------+", - "| MIN(test.d1) |", - "+--------------+", - "| one |", - "+--------------+", - ]; - assert_batches_eq!(expected, &actual); - - // aggregation max - let sql = "SELECT MAX(d1) FROM test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+--------------+", - "| MAX(test.d1) |", - "+--------------+", - "| three |", - "+--------------+", - ]; - assert_batches_eq!(expected, &actual); - - // grouping - let sql = "SELECT d1, COUNT(*) FROM test group by d1"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+----------+", - "| d1 | COUNT(*) |", - "+-------+----------+", - "| | 1 |", - "| one | 1 |", - "| three | 1 |", - "+-------+----------+", - ]; - assert_batches_sorted_eq!(expected, &actual); +async fn test_list_query_parameters() -> Result<()> { + let tmp_dir = TempDir::new()?; + let partition_count = 4; + let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; - // window functions - let sql = "SELECT d1, row_number() OVER (partition by d1) as rn1 FROM test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+-----+", - "| d1 | rn1 |", - "+-------+-----+", - "| | 1 |", - "| one | 1 |", - "| three | 1 |", - "+-------+-----+", + let results = ctx + .sql("SELECT * FROM test WHERE c1 = $1") + .await? + .with_param_values(vec![ScalarValue::from(3i32)])? + .collect() + .await?; + let expected = vec![ + "+----+----+-------+", + "| c1 | c2 | c3 |", + "+----+----+-------+", + "| 3 | 1 | false |", + "| 3 | 10 | true |", + "| 3 | 2 | true |", + "| 3 | 3 | false |", + "| 3 | 4 | true |", + "| 3 | 5 | false |", + "| 3 | 6 | true |", + "| 3 | 7 | false |", + "| 3 | 8 | true |", + "| 3 | 9 | false |", + "+----+----+-------+", ]; - assert_batches_sorted_eq!(expected, &actual); - + assert_batches_sorted_eq!(expected, &results); Ok(()) } #[tokio::test] -async fn sort_on_window_null_string() -> Result<()> { - let d1: DictionaryArray = - vec![Some("one"), None, Some("three")].into_iter().collect(); - let d2: StringArray = vec![Some("ONE"), None, Some("THREE")].into_iter().collect(); - let d3: LargeStringArray = - vec![Some("One"), None, Some("Three")].into_iter().collect(); - - let batch = RecordBatch::try_from_iter(vec![ - ("d1", Arc::new(d1) as ArrayRef), - ("d2", Arc::new(d2) as ArrayRef), - ("d3", Arc::new(d3) as ArrayRef), - ]) - .unwrap(); - - let ctx = - SessionContext::new_with_config(SessionConfig::new().with_target_partitions(1)); - ctx.register_batch("test", batch)?; - - let sql = - "SELECT d1, row_number() OVER (partition by d1) as rn1 FROM test order by d1 asc"; - - let actual = execute_to_batches(&ctx, sql).await; - // NULLS LAST - let expected = [ - "+-------+-----+", - "| d1 | rn1 |", - "+-------+-----+", - "| one | 1 |", - "| three | 1 |", - "| | 1 |", - "+-------+-----+", - ]; - assert_batches_eq!(expected, &actual); - - let sql = - "SELECT d2, row_number() OVER (partition by d2) as rn1 FROM test ORDER BY d2 asc"; - let actual = execute_to_batches(&ctx, sql).await; - // NULLS LAST - let expected = [ - "+-------+-----+", - "| d2 | rn1 |", - "+-------+-----+", - "| ONE | 1 |", - "| THREE | 1 |", - "| | 1 |", - "+-------+-----+", - ]; - assert_batches_eq!(expected, &actual); - - let sql = - "SELECT d2, row_number() OVER (partition by d2 order by d2 desc) as rn1 FROM test ORDER BY d2 desc"; +async fn test_named_query_parameters() -> Result<()> { + let tmp_dir = TempDir::new()?; + let partition_count = 4; + let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; - let actual = execute_to_batches(&ctx, sql).await; - // NULLS FIRST - let expected = [ - "+-------+-----+", - "| d2 | rn1 |", - "+-------+-----+", - "| | 1 |", - "| THREE | 1 |", - "| ONE | 1 |", - "+-------+-----+", + // sql to statement then to logical plan with parameters + // c1 defined as UINT32, c2 defined as UInt64 + let results = ctx + .sql("SELECT c1, c2 FROM test WHERE c1 > $coo AND c1 < $foo") + .await? + .with_param_values(vec![ + ("foo", ScalarValue::UInt32(Some(3))), + ("coo", ScalarValue::UInt32(Some(0))), + ])? + .collect() + .await?; + let expected = vec![ + "+----+----+", + "| c1 | c2 |", + "+----+----+", + "| 1 | 1 |", + "| 1 | 2 |", + "| 1 | 3 |", + "| 1 | 4 |", + "| 1 | 5 |", + "| 1 | 6 |", + "| 1 | 7 |", + "| 1 | 8 |", + "| 1 | 9 |", + "| 1 | 10 |", + "| 2 | 1 |", + "| 2 | 2 |", + "| 2 | 3 |", + "| 2 | 4 |", + "| 2 | 5 |", + "| 2 | 6 |", + "| 2 | 7 |", + "| 2 | 8 |", + "| 2 | 9 |", + "| 2 | 10 |", + "+----+----+", ]; - assert_batches_eq!(expected, &actual); - - // FIXME sort on LargeUtf8 String has bug. - // let sql = - // "SELECT d3, row_number() OVER (partition by d3) as rn1 FROM test"; - // let actual = execute_to_batches(&ctx, sql).await; - // let expected = vec![ - // "+-------+-----+", - // "| d3 | rn1 |", - // "+-------+-----+", - // "| | 1 |", - // "| One | 1 |", - // "| Three | 1 |", - // "+-------+-----+", - // ]; - // assert_batches_eq!(expected, &actual); - + assert_batches_sorted_eq!(expected, &results); Ok(()) } @@ -576,85 +198,6 @@ async fn prepared_statement_invalid_types() -> Result<()> { Ok(()) } -#[tokio::test] -async fn test_list_query_parameters() -> Result<()> { - let tmp_dir = TempDir::new()?; - let partition_count = 4; - let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; - - let results = ctx - .sql("SELECT * FROM test WHERE c1 = $1") - .await? - .with_param_values(vec![ScalarValue::from(3i32)])? - .collect() - .await?; - let expected = vec![ - "+----+----+-------+", - "| c1 | c2 | c3 |", - "+----+----+-------+", - "| 3 | 1 | false |", - "| 3 | 10 | true |", - "| 3 | 2 | true |", - "| 3 | 3 | false |", - "| 3 | 4 | true |", - "| 3 | 5 | false |", - "| 3 | 6 | true |", - "| 3 | 7 | false |", - "| 3 | 8 | true |", - "| 3 | 9 | false |", - "+----+----+-------+", - ]; - assert_batches_sorted_eq!(expected, &results); - Ok(()) -} - -#[tokio::test] -async fn test_named_query_parameters() -> Result<()> { - let tmp_dir = TempDir::new()?; - let partition_count = 4; - let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; - - // sql to statement then to logical plan with parameters - // c1 defined as UINT32, c2 defined as UInt64 - let results = ctx - .sql("SELECT c1, c2 FROM test WHERE c1 > $coo AND c1 < $foo") - .await? - .with_param_values(vec![ - ("foo", ScalarValue::UInt32(Some(3))), - ("coo", ScalarValue::UInt32(Some(0))), - ])? - .collect() - .await?; - let expected = vec![ - "+----+----+", - "| c1 | c2 |", - "+----+----+", - "| 1 | 1 |", - "| 1 | 2 |", - "| 1 | 3 |", - "| 1 | 4 |", - "| 1 | 5 |", - "| 1 | 6 |", - "| 1 | 7 |", - "| 1 | 8 |", - "| 1 | 9 |", - "| 1 | 10 |", - "| 2 | 1 |", - "| 2 | 2 |", - "| 2 | 3 |", - "| 2 | 4 |", - "| 2 | 5 |", - "| 2 | 6 |", - "| 2 | 7 |", - "| 2 | 8 |", - "| 2 | 9 |", - "| 2 | 10 |", - "+----+----+", - ]; - assert_batches_sorted_eq!(expected, &results); - Ok(()) -} - #[tokio::test] async fn test_parameter_type_coercion() -> Result<()> { let ctx = SessionContext::new(); @@ -708,93 +251,3 @@ async fn test_parameter_invalid_types() -> Result<()> { ); Ok(()) } - -#[tokio::test] -async fn parallel_query_with_filter() -> Result<()> { - let tmp_dir = TempDir::new()?; - let partition_count = 4; - let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; - - let dataframe = ctx - .sql("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3") - .await?; - let results = dataframe.collect().await.unwrap(); - let expected = vec![ - "+----+----+", - "| c1 | c2 |", - "+----+----+", - "| 1 | 1 |", - "| 1 | 10 |", - "| 1 | 2 |", - "| 1 | 3 |", - "| 1 | 4 |", - "| 1 | 5 |", - "| 1 | 6 |", - "| 1 | 7 |", - "| 1 | 8 |", - "| 1 | 9 |", - "| 2 | 1 |", - "| 2 | 10 |", - "| 2 | 2 |", - "| 2 | 3 |", - "| 2 | 4 |", - "| 2 | 5 |", - "| 2 | 6 |", - "| 2 | 7 |", - "| 2 | 8 |", - "| 2 | 9 |", - "+----+----+", - ]; - assert_batches_sorted_eq!(expected, &results); - - Ok(()) -} - -#[tokio::test] -async fn boolean_literal() -> Result<()> { - let results = - execute_with_partition("SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true", 4) - .await?; - - let expected = [ - "+----+------+", - "| c1 | c3 |", - "+----+------+", - "| 3 | true |", - "| 3 | true |", - "| 3 | true |", - "| 3 | true |", - "| 3 | true |", - "+----+------+", - ]; - assert_batches_sorted_eq!(expected, &results); - - Ok(()) -} - -#[tokio::test] -async fn unprojected_filter() { - let config = SessionConfig::new(); - let ctx = SessionContext::new_with_config(config); - let df = ctx.read_table(table_with_sequence(1, 3).unwrap()).unwrap(); - - let df = df - .filter(col("i").gt(lit(2))) - .unwrap() - .select(vec![col("i") + col("i")]) - .unwrap(); - - let plan = df.clone().into_optimized_plan().unwrap(); - println!("{}", plan.display_indent()); - - let results = df.collect().await.unwrap(); - - let expected = [ - "+-----------------------+", - "| ?table?.i + ?table?.i |", - "+-----------------------+", - "| 6 |", - "+-----------------------+", - ]; - assert_batches_sorted_eq!(expected, &results); -} diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index faa5370c70ef..5216b14cb2d2 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -46,11 +46,294 @@ STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv' - ########## ## SELECT Tests ########## +########## +## window_null_string_table_test +########## + +statement ok +CREATE TABLE window_null_string_value_prepare(x string, y string, z string) +AS VALUES +('one', 'ONE', 'One'), +(NULL, NULL, NULL), +('three', 'THREE', 'Three'); + +statement ok +CREATE TABLE window_null_string_table +AS SELECT arrow_cast(x, 'Dictionary(Int32, Utf8)') as d1, +y as d2, +arrow_cast(z, 'LargeUtf8') as d3 FROM window_null_string_value_prepare; + +query ?I +SELECT d1, row_number() OVER (partition by d1) as rn1 FROM window_null_string_table order by d1 asc; +---- +one 1 +three 1 +NULL 1 + +query TI +SELECT d2, row_number() OVER (partition by d2) as rn1 FROM window_null_string_table ORDER BY d2 asc; +---- +ONE 1 +THREE 1 +NULL 1 + +query TI +SELECT d2, row_number() OVER (partition by d2 order by d2 desc) as rn1 FROM window_null_string_table ORDER BY d2 desc +---- +NULL 1 +THREE 1 +ONE 1 + +# Test large string as well +query TI rowsort +SELECT d3, row_number() OVER (partition by d3) as rn1 FROM window_null_string_table; +---- +NULL 1 +One 1 +Three 1 + + +statement ok +CREATE TABLE test ( + c1 BIGINT NOT NULL, + c2 BIGINT NOT NULL, + c3 BOOLEAN NOT NULL, +) AS VALUES (0, 1, false), +(0, 10, true), +(0, 2, true), +(0, 3, false), +(0, 4, true), +(0, 5, false), +(0, 6, true), +(0, 7, false), +(0, 8, true), +(0, 9, false), +(1, 1, false), +(1, 10, true), +(1, 2, true), +(1, 3, false), +(1, 4, true), +(1, 5, false), +(1, 6, true), +(1, 7, false), +(1, 8, true), +(1, 9, false), +(2, 1, false), +(2, 10, true), +(2, 2, true), +(2, 3, false), +(2, 4, true), +(2, 5, false), +(2, 6, true), +(2, 7, false), +(2, 8, true), +(2, 9, false), +(3, 1, false), +(3, 10, true), +(3, 2, true), +(3, 3, false), +(3, 4, true), +(3, 5, false), +(3, 6, true), +(3, 7, false), +(3, 8, true), +(3, 9, false); + + +# parallel_query_with_filter +query II +SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3; +---- +1 1 +1 10 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +2 1 +2 10 +2 2 +2 3 +2 4 +2 5 +2 6 +2 7 +2 8 +2 9 + +###### +# Boolean literal +###### +query IB +SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true; +---- +3 true +3 true +3 true +3 true +3 true + +statement ok +drop table test; + +###### +# struct test +###### +# Prepare the table with struct values for testing +statement ok +CREATE TABLE struct_value +AS VALUES +(make_array(0, 1, 2, 3)), +(make_array(4, 5, 6, 7)), +(make_array(8, 9, 10, 11)); + +statement ok +CREATE TABLE nested_get_indexed_field_on_struct_table +AS SELECT struct(column1) as some_struct from struct_value; + +# Original column is micros, convert to millis and check timestamp +query ? +SELECT some_struct['c0'] FROM nested_get_indexed_field_on_struct_table LIMIT 3; +---- +[0, 1, 2, 3] +[4, 5, 6, 7] +[8, 9, 10, 11] + +# Access to field of struct by CompoundIdentifier +query ? +SELECT some_struct.c0 as l0 FROM nested_get_indexed_field_on_struct_table LIMIT 3; +---- +[0, 1, 2, 3] +[4, 5, 6, 7] +[8, 9, 10, 11] + +query I +SELECT some_struct['c0'][1] as i0 FROM nested_get_indexed_field_on_struct_table LIMIT 3; +---- +0 +4 +8 + +# Basic SELECT +#### +# dictionary_test +#### + +# Prepare the table with dictionary values for testing +statement ok +CREATE TABLE value(x string, y string, z string) +AS VALUES +('one', 'blarg', 'XYZ'), +(NULL, NULL, NULL), +('three', 'three', 'three'); + +statement ok +CREATE TABLE string_dictionary_table +AS SELECT arrow_cast(x, 'Dictionary(Int32, Utf8)') as d1, +arrow_cast(y, 'Dictionary(Int32, Utf8)') as d2, +z as d3 FROM value; + +query ? +SELECT d1 FROM string_dictionary_table; +---- +one +NULL +three + +# basic filtering +query ? +SELECT d1 FROM string_dictionary_table WHERE d1 IS NOT NULL; +---- +one +three + +# comparison with constant +query ? +SELECT d1 FROM string_dictionary_table WHERE d1 = 'three'; +---- +three + +# comparison with another dictionary column +query ? +SELECT d1 FROM string_dictionary_table WHERE d1 = d2; +---- +three + +# order comparison with another dictionary column +query ? +SELECT d1 FROM string_dictionary_table WHERE d1 <= d2; +---- +three + +# comparison with a non dictionary column +query ? +SELECT d1 FROM string_dictionary_table WHERE d1 = d3; +---- +three + +# filtering with constant +query ? +SELECT d1 FROM string_dictionary_table WHERE d1 = 'three'; +---- +three + +# Expression evaluation +query T +SELECT concat(d1, '-foo') FROM string_dictionary_table; +---- +one-foo +-foo +three-foo + +# Expression evaluation with two dictionaries +query T +SELECT concat(d1, d2) FROM string_dictionary_table; +---- +oneblarg +(empty) +threethree + +# aggregation +query I +SELECT COUNT(d1) FROM string_dictionary_table; +---- +2 + +# aggregation min +query T +SELECT MIN(d1) FROM string_dictionary_table; +---- +one + +# aggregation max +query T +SELECT MAX(d1) FROM string_dictionary_table; +---- +three + +# grouping +query ?I +SELECT d1, COUNT(*) FROM string_dictionary_table group by d1 order by d1; +---- +one 1 +three 1 +NULL 1 + +# window functions +query ?I +SELECT d1, row_number() OVER (partition by d1) as rn1 FROM string_dictionary_table order by d1; +---- +one 1 +three 1 +NULL 1 # select_values_list statement error DataFusion error: SQL error: ParserError\("Expected \(, found: EOF"\) @@ -1176,11 +1459,69 @@ SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t; statement ok DROP TABLE t; +########## +## indexed_field_test +########## +statement ok +CREATE TABLE indexed_field +AS VALUES (make_array(0, 1, 2)), +(make_array(4, 5, 6)), +(make_array(7, 8, 9)) + +# query_get_indexed_field +query I +SELECT column1[1] AS i0 +FROM indexed_field LIMIT 3; +---- +0 +4 +7 + +########## +## nested_indexed_field_test +########## +statement ok +CREATE TABLE nested_indexed_field +AS VALUES (make_array([0, 1], [2, 3], [3, 4])), +(make_array([5, 6], [7, 8], [9, 10])), +(make_array([11, 12], [13, 14], [15, 16])) + +# query nested_indexed_field +query ? +SELECT column1[1] AS i0 +FROM nested_indexed_field LIMIT 3; +---- +[0, 1] +[5, 6] +[11, 12] + +query I +SELECT column1[1][1] AS i0 +FROM nested_indexed_field LIMIT 3; +---- +0 +5 +11 + query I SELECT CASE 1 WHEN 2 THEN 4 / 0 END; ---- NULL + +###### +# Unprojected filter +###### + +statement ok +CREATE TABLE test(i INT) AS +VALUES (1), (2), (3); + +query I +SELECT i + i FROM test WHERE i > 2; +---- +6 + query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from 'I AM NOT A TIMESTAMP': error parsing date SELECT to_timestamp('I AM NOT A TIMESTAMP');