Skip to content

Commit 49c91b5

Browse files
authored
Replace array_contains with SQL array functions: array_has, array_has_any, array_has_all (#6990)
* rename array_contains to array_has_all Signed-off-by: jayzhan211 <[email protected]> * array_has_all with columns done Signed-off-by: jayzhan211 <[email protected]> * add more tests Signed-off-by: jayzhan211 <[email protected]> * prepare array has any and array has Signed-off-by: jayzhan211 <[email protected]> * clippy Signed-off-by: jayzhan211 <[email protected]> * add proto only Signed-off-by: jayzhan211 <[email protected]> * add generated files Signed-off-by: jayzhan211 <[email protected]> * basic array_has done Signed-off-by: jayzhan211 <[email protected]> * add more tests Signed-off-by: jayzhan211 <[email protected]> * basic array_has done Signed-off-by: jayzhan211 <[email protected]> * array_has_all done Signed-off-by: jayzhan211 <[email protected]> * any and all are done Signed-off-by: jayzhan211 <[email protected]> * remove proto files Signed-off-by: jayzhan211 <[email protected]> * remove test Signed-off-by: jayzhan211 <[email protected]> * address comment Signed-off-by: jayzhan211 <[email protected]> * run doc command Signed-off-by: jayzhan211 <[email protected]> * rerun command Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]>
1 parent 466b6c3 commit 49c91b5

File tree

12 files changed

+578
-181
lines changed

12 files changed

+578
-181
lines changed

datafusion/core/tests/sqllogictests/test_files/array.slt

Lines changed: 183 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,60 @@ AS VALUES
8686
(NULL, NULL, NULL, NULL)
8787
;
8888

89+
statement ok
90+
CREATE TABLE array_has_table_1D
91+
AS VALUES
92+
(make_array(1, 2), 1, make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3,5)),
93+
(make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5))
94+
;
95+
96+
statement ok
97+
CREATE TABLE array_has_table_1D_Float
98+
AS VALUES
99+
(make_array(1.0, 2.0), 1.0, make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)),
100+
(make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33))
101+
;
102+
103+
statement ok
104+
CREATE TABLE array_has_table_1D_Boolean
105+
AS VALUES
106+
(make_array(true, true, true), false, make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)),
107+
(make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true))
108+
;
109+
110+
statement ok
111+
CREATE TABLE array_has_table_1D_UTF8
112+
AS VALUES
113+
(make_array('a', 'bc', 'def'), 'bc', make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')),
114+
(make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow'))
115+
;
116+
117+
statement ok
118+
CREATE TABLE array_has_table_2D
119+
AS VALUES
120+
(make_array([1,2]), make_array(1,3), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])),
121+
(make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10]))
122+
;
123+
124+
statement ok
125+
CREATE TABLE array_has_table_2D_float
126+
AS VALUES
127+
(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])),
128+
(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3]))
129+
;
130+
131+
statement ok
132+
CREATE TABLE array_has_table_3D
133+
AS VALUES
134+
(make_array([[1,2]]), make_array([1])),
135+
(make_array([[1,2]]), make_array([1,2])),
136+
(make_array([[1,2]]), make_array([1,2,3])),
137+
(make_array([[1], [2]]), make_array([2])),
138+
(make_array([[1], [2]]), make_array([1], [2])),
139+
(make_array([[1], [2]], [[2], [3]]), make_array([1], [2], [3])),
140+
(make_array([[1], [2]], [[2], [3]]), make_array([1], [2]))
141+
;
142+
89143
statement ok
90144
CREATE TABLE arrays_values_without_nulls
91145
AS VALUES
@@ -1164,48 +1218,129 @@ NULL 1 1
11641218
2 NULL 1
11651219
2 1 NULL
11661220

1167-
## array_contains
1221+
## array_has/array_has_all/array_has_any
1222+
1223+
query BBBBBBBBBBBB
1224+
select array_has(make_array(1,2), 1),
1225+
array_has(make_array(1,2,NULL), 1),
1226+
array_has(make_array([2,3], [3,4]), make_array(2,3)),
1227+
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1], [2,3])),
1228+
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([4,5], [6])),
1229+
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1])),
1230+
array_has(make_array([[[1]]]), make_array([[1]])),
1231+
array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[2]])),
1232+
array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[1], [2]])),
1233+
list_has(make_array(1,2,3), 4),
1234+
array_contains(make_array(1,2,3), 3),
1235+
list_contains(make_array(1,2,3), 0)
1236+
;
1237+
----
1238+
true true true true true false true false true false true false
11681239

1169-
# array_contains scalar function #1
11701240
query BBB
1171-
select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]);
1241+
select array_has(column1, column2),
1242+
array_has_all(column3, column4),
1243+
array_has_any(column5, column6)
1244+
from array_has_table_1D;
11721245
----
11731246
true true true
1247+
false false false
11741248

1175-
# array_contains scalar function #2
1176-
query BB
1177-
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]);
1249+
query BBB
1250+
select array_has(column1, column2),
1251+
array_has_all(column3, column4),
1252+
array_has_any(column5, column6)
1253+
from array_has_table_1D_Float;
11781254
----
1179-
true true
1255+
true true false
1256+
false false true
11801257

1181-
# array_contains scalar function #3
11821258
query BBB
1183-
select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)), array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]);
1259+
select array_has(column1, column2),
1260+
array_has_all(column3, column4),
1261+
array_has_any(column5, column6)
1262+
from array_has_table_1D_Boolean;
11841263
----
1185-
false false false
1264+
false true true
1265+
true true true
11861266

1187-
# array_contains scalar function #4
1188-
query BB
1189-
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]);
1267+
query BBB
1268+
select array_has(column1, column2),
1269+
array_has_all(column3, column4),
1270+
array_has_any(column5, column6)
1271+
from array_has_table_1D_UTF8;
11901272
----
1191-
false false
1273+
true true false
1274+
false false true
11921275

1193-
# array_contains scalar function #5
11941276
query BB
1195-
select array_contains([true, true, false, true, false], [true, false, false]), array_contains([true, false, true], [true, true]);
1277+
select array_has(column1, column2),
1278+
array_has_all(column3, column4)
1279+
from array_has_table_2D;
1280+
----
1281+
false true
1282+
true false
1283+
1284+
query B
1285+
select array_has_all(column1, column2)
1286+
from array_has_table_2D_float;
1287+
----
1288+
true
1289+
false
1290+
1291+
query B
1292+
select array_has(column1, column2) from array_has_table_3D;
1293+
----
1294+
false
1295+
true
1296+
false
1297+
false
1298+
true
1299+
false
1300+
true
1301+
1302+
query BBBB
1303+
select array_has(column1, make_array(5, 6)),
1304+
array_has(column1, make_array(7, NULL)),
1305+
array_has(column2, 5.5),
1306+
array_has(column3, 'o')
1307+
from arrays;
1308+
----
1309+
false false false true
1310+
true false true false
1311+
true false false true
1312+
false true false false
1313+
false false false false
1314+
false false false false
1315+
1316+
query BBBBBBBBBBBBB
1317+
select array_has_all(make_array(1,2,3), make_array(1,3)),
1318+
array_has_all(make_array(1,2,3), make_array(1,4)),
1319+
array_has_all(make_array([1,2], [3,4]), make_array([1,2])),
1320+
array_has_all(make_array([1,2], [3,4]), make_array([1,3])),
1321+
array_has_all(make_array([1,2], [3,4]), make_array([1,2], [3,4], [5,6])),
1322+
array_has_all(make_array([[1,2,3]]), make_array([[1]])),
1323+
array_has_all(make_array([[1,2,3]]), make_array([[1,2,3]])),
1324+
array_has_any(make_array(1,2,3), make_array(1,10,100)),
1325+
array_has_any(make_array(1,2,3), make_array(10,100)),
1326+
array_has_any(make_array([1,2], [3,4]), make_array([1,10], [10,4])),
1327+
array_has_any(make_array([1,2], [3,4]), make_array([10,20], [3,4])),
1328+
array_has_any(make_array([[1,2,3]]), make_array([[1,2,3], [4,5,6]])),
1329+
array_has_any(make_array([[1,2,3]]), make_array([[1,2,3]], [[4,5,6]]))
1330+
;
11961331
----
1197-
true true
1332+
true false true false false false true true false false true false true
11981333

1199-
# array_contains scalar function #6
1200-
query BB
1201-
select array_contains(make_array(true, true, true), make_array(false, false)), array_contains([false, false, false], [true, true]);
1334+
query BBBB
1335+
select list_has_all(make_array(1,2,3), make_array(4,5,6)),
1336+
list_has_all(make_array(1,2,3), make_array(1,2)),
1337+
list_has_any(make_array(1,2,3), make_array(4,5,6)),
1338+
list_has_any(make_array(1,2,3), make_array(1,2,4))
1339+
;
12021340
----
1203-
false false
1204-
1341+
false true false true
12051342

12061343
### Array operators tests
1207-
1208-
12091344
## array concatenate operator
12101345

12111346
# array concatenate operator with scalars #1 (like array_concat scalar function)
@@ -1296,7 +1431,6 @@ select make_array(f0) from fixed_size_list_array
12961431
[[1, 2], [3, 4]]
12971432

12981433

1299-
13001434
### Delete tables
13011435

13021436

@@ -1312,5 +1446,29 @@ drop table arrays;
13121446
statement ok
13131447
drop table arrays_values;
13141448

1449+
statement ok
1450+
drop table arrays_values_v2;
1451+
1452+
statement ok
1453+
drop table array_has_table_1D;
1454+
1455+
statement ok
1456+
drop table array_has_table_1D_Float;
1457+
1458+
statement ok
1459+
drop table array_has_table_1D_Boolean;
1460+
1461+
statement ok
1462+
drop table array_has_table_1D_UTF8;
1463+
1464+
statement ok
1465+
drop table array_has_table_2D;
1466+
1467+
statement ok
1468+
drop table array_has_table_2D_float;
1469+
1470+
statement ok
1471+
drop table array_has_table_3D;
1472+
13151473
statement ok
13161474
drop table arrays_values_without_nulls;

datafusion/expr/src/built_in_function.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,12 @@ pub enum BuiltinScalarFunction {
119119
ArrayAppend,
120120
/// array_concat
121121
ArrayConcat,
122-
/// array_contains
123-
ArrayContains,
122+
/// array_has
123+
ArrayHas,
124+
/// array_has_all
125+
ArrayHasAll,
126+
/// array_has_any
127+
ArrayHasAny,
124128
/// array_dims
125129
ArrayDims,
126130
/// array_fill
@@ -330,7 +334,9 @@ impl BuiltinScalarFunction {
330334
BuiltinScalarFunction::Trunc => Volatility::Immutable,
331335
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
332336
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
333-
BuiltinScalarFunction::ArrayContains => Volatility::Immutable,
337+
BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable,
338+
BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable,
339+
BuiltinScalarFunction::ArrayHas => Volatility::Immutable,
334340
BuiltinScalarFunction::ArrayDims => Volatility::Immutable,
335341
BuiltinScalarFunction::ArrayFill => Volatility::Immutable,
336342
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
@@ -501,7 +507,9 @@ impl BuiltinScalarFunction {
501507

502508
Ok(expr_type)
503509
}
504-
BuiltinScalarFunction::ArrayContains => Ok(Boolean),
510+
BuiltinScalarFunction::ArrayHasAll
511+
| BuiltinScalarFunction::ArrayHasAny
512+
| BuiltinScalarFunction::ArrayHas => Ok(Boolean),
505513
BuiltinScalarFunction::ArrayDims => {
506514
Ok(List(Arc::new(Field::new("item", UInt64, true))))
507515
}
@@ -808,7 +816,9 @@ impl BuiltinScalarFunction {
808816
BuiltinScalarFunction::ArrayConcat => {
809817
Signature::variadic_any(self.volatility())
810818
}
811-
BuiltinScalarFunction::ArrayContains => Signature::any(2, self.volatility()),
819+
BuiltinScalarFunction::ArrayHasAll
820+
| BuiltinScalarFunction::ArrayHasAny
821+
| BuiltinScalarFunction::ArrayHas => Signature::any(2, self.volatility()),
812822
BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()),
813823
BuiltinScalarFunction::ArrayFill => Signature::any(2, self.volatility()),
814824
BuiltinScalarFunction::ArrayLength => {
@@ -1278,8 +1288,12 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
12781288
BuiltinScalarFunction::ArrayConcat => {
12791289
&["array_concat", "array_cat", "list_concat", "list_cat"]
12801290
}
1281-
BuiltinScalarFunction::ArrayContains => &["array_contains"],
12821291
BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"],
1292+
BuiltinScalarFunction::ArrayHasAll => &["array_has_all", "list_has_all"],
1293+
BuiltinScalarFunction::ArrayHasAny => &["array_has_any", "list_has_any"],
1294+
BuiltinScalarFunction::ArrayHas => {
1295+
&["array_has", "list_has", "array_contains", "list_contains"]
1296+
}
12831297
BuiltinScalarFunction::ArrayFill => &["array_fill"],
12841298
BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
12851299
BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"],

datafusion/expr/src/expr_fn.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -536,10 +536,22 @@ scalar_expr!(
536536
);
537537
nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
538538
scalar_expr!(
539-
ArrayContains,
540-
array_contains,
539+
ArrayHas,
540+
array_has,
541541
first_array second_array,
542-
"returns true, if each element of the second array appearing in the first array, otherwise false."
542+
"Returns true, if the element appears in the first array, otherwise false."
543+
);
544+
scalar_expr!(
545+
ArrayHasAll,
546+
array_has_all,
547+
first_array second_array,
548+
"Returns true if each element of the second array appears in the first array; otherwise, it returns false."
549+
);
550+
scalar_expr!(
551+
ArrayHasAny,
552+
array_has_any,
553+
first_array second_array,
554+
"Returns true if at least one element of the second array appears in the first array; otherwise, it returns false."
543555
);
544556
scalar_expr!(
545557
ArrayDims,

0 commit comments

Comments
 (0)