@@ -1352,14 +1352,25 @@ macro_rules! contains {
13521352 ( $FIRST_ARRAY: expr, $SECOND_ARRAY: expr, $ARRAY_TYPE: ident) => { {
13531353 let first_array = downcast_arg!( $FIRST_ARRAY, $ARRAY_TYPE) ;
13541354 let second_array = downcast_arg!( $SECOND_ARRAY, $ARRAY_TYPE) ;
1355- let mut res = true ;
13561355 for x in second_array. values( ) . iter( ) . dedup( ) {
13571356 if !first_array. values( ) . contains( x) {
1358- res = false ;
1359- break ;
1357+ return Ok ( false ) ;
13601358 }
13611359 }
1362- res
1360+ Ok ( true )
1361+ } } ;
1362+ }
1363+
1364+ macro_rules! overlap {
1365+ ( $FIRST_ARRAY: expr, $SECOND_ARRAY: expr, $ARRAY_TYPE: ident) => { {
1366+ let first_array = downcast_arg!( $FIRST_ARRAY, $ARRAY_TYPE) ;
1367+ let second_array = downcast_arg!( $SECOND_ARRAY, $ARRAY_TYPE) ;
1368+ for x in second_array. values( ) . iter( ) . dedup( ) {
1369+ if first_array. values( ) . contains( x) {
1370+ return Ok ( true ) ;
1371+ }
1372+ }
1373+ Ok ( false )
13631374 } } ;
13641375}
13651376
@@ -1374,7 +1385,7 @@ fn flatten_list_array<OffsetSize: OffsetSizeTrait>(
13741385 let ( _, offsets, values, _) = list_array. clone ( ) . into_parts ( ) ;
13751386 let arr_offsets = offsets. to_vec ( ) ;
13761387 let inner_arr = flatten_list_array :: < OffsetSize > ( values) ?;
1377- let ( field, offsets, values, nulls) = inner_arr. clone ( ) . into_parts ( ) ;
1388+ let ( field, offsets, values, nulls) = inner_arr. into_parts ( ) ;
13781389
13791390 let inner_arr_offsets = offsets. to_vec ( ) ;
13801391 let flatten_offsets: Vec < OffsetSize > = arr_offsets
@@ -1394,41 +1405,59 @@ fn flatten_list_array<OffsetSize: OffsetSizeTrait>(
13941405 }
13951406 _ => Ok ( list_array. clone ( ) ) ,
13961407 } ,
1397- _ => Err ( DataFusionError :: Internal ( format ! ( "array should be list" ) ) ) ,
1408+ _ => Err ( DataFusionError :: Internal (
1409+ "array should be list" . to_string ( ) ,
1410+ ) ) ,
13981411 }
13991412}
14001413
1401- /// Array_has_any SQL function
1402- pub fn array_has_any ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1403- assert_eq ! ( args. len( ) , 2 ) ;
1404- let array = flatten_list_array :: < i32 > ( args[ 0 ] . clone ( ) ) ?;
1405- Ok ( Arc :: new ( array) as ArrayRef )
1406- }
1407-
1408- /// Array_has SQL function
1409- pub fn array_has ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1410- assert_eq ! ( args. len( ) , 2 ) ;
1411- let array = flatten_list_array :: < i32 > ( args[ 0 ] . clone ( ) ) ?;
1412- Ok ( Arc :: new ( array) as ArrayRef )
1414+ fn overlap_internal ( arr : ArrayRef , sub_arr : ArrayRef ) -> Result < bool > {
1415+ match ( arr. data_type ( ) , sub_arr. data_type ( ) ) {
1416+ ( DataType :: Utf8 , DataType :: Utf8 ) => overlap ! ( arr, sub_arr, StringArray ) ,
1417+ ( DataType :: LargeUtf8 , DataType :: LargeUtf8 ) => overlap ! ( arr, sub_arr, LargeStringArray ) ,
1418+ ( DataType :: Boolean , DataType :: Boolean ) => {
1419+ let first_array = downcast_arg ! ( arr, BooleanArray ) ;
1420+ let second_array = downcast_arg ! ( sub_arr, BooleanArray ) ;
1421+ if second_array. true_count ( ) > 0 && first_array. true_count ( ) > 0 {
1422+ return Ok ( true ) ;
1423+ }
1424+ if second_array. false_count ( ) > 0 &&first_array. false_count ( ) > 0 {
1425+ return Ok ( true ) ;
1426+ }
1427+ Ok ( false )
1428+ }
1429+ ( DataType :: Float32 , DataType :: Float32 ) => overlap ! ( arr, sub_arr, Float32Array ) ,
1430+ ( DataType :: Float64 , DataType :: Float64 ) => overlap ! ( arr, sub_arr, Float64Array ) ,
1431+ ( DataType :: Int8 , DataType :: Int8 ) => overlap ! ( arr, sub_arr, Int8Array ) ,
1432+ ( DataType :: Int16 , DataType :: Int16 ) => overlap ! ( arr, sub_arr, Int16Array ) ,
1433+ ( DataType :: Int32 , DataType :: Int32 ) => overlap ! ( arr, sub_arr, Int32Array ) ,
1434+ ( DataType :: Int64 , DataType :: Int64 ) => overlap ! ( arr, sub_arr, Int64Array ) ,
1435+ ( DataType :: UInt8 , DataType :: UInt8 ) => overlap ! ( arr, sub_arr, UInt8Array ) ,
1436+ ( DataType :: UInt16 , DataType :: UInt16 ) => overlap ! ( arr, sub_arr, UInt16Array ) ,
1437+ ( DataType :: UInt32 , DataType :: UInt32 ) => overlap ! ( arr, sub_arr, UInt32Array ) ,
1438+ ( DataType :: UInt64 , DataType :: UInt64 ) => overlap ! ( arr, sub_arr, UInt64Array ) ,
1439+ ( first_array_data_type, second_array_data_type) => {
1440+ Err ( DataFusionError :: NotImplemented ( format ! (
1441+ "Array_has_all is not implemented for types '{first_array_data_type:?}' and '{second_array_data_type:?}'."
1442+ ) ) )
1443+ }
1444+ }
14131445}
14141446
1415- /// Array_has_all SQL function
1416- pub fn array_has_all ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1417- assert_eq ! ( args. len( ) , 2 ) ;
1418- let array = flatten_list_array :: < i32 > ( args[ 0 ] . clone ( ) ) ?;
1419- // TODO: Dont need to flatten rhs array
1420- let sub_array = flatten_list_array :: < i32 > ( args[ 1 ] . clone ( ) ) ?;
1421- let mut boolean_array = Vec :: with_capacity ( array. len ( ) ) ;
1422-
1423- for ( arr, sub_arr) in array. iter ( ) . zip ( sub_array. iter ( ) ) {
1424- if let ( Some ( arr) , Some ( sub_arr) ) = ( arr, sub_arr) {
1425- let res = match ( arr. data_type ( ) , sub_arr. data_type ( ) ) {
1447+ fn contains_internal ( arr : ArrayRef , sub_arr : ArrayRef ) -> Result < bool > {
1448+ match ( arr. data_type ( ) , sub_arr. data_type ( ) ) {
14261449 ( DataType :: Utf8 , DataType :: Utf8 ) => contains ! ( arr, sub_arr, StringArray ) ,
14271450 ( DataType :: LargeUtf8 , DataType :: LargeUtf8 ) => contains ! ( arr, sub_arr, LargeStringArray ) ,
14281451 ( DataType :: Boolean , DataType :: Boolean ) => {
14291452 let first_array = downcast_arg ! ( arr, BooleanArray ) ;
14301453 let second_array = downcast_arg ! ( sub_arr, BooleanArray ) ;
1431- compute:: bool_or ( first_array) == compute:: bool_or ( second_array)
1454+ if second_array. true_count ( ) > 0 && first_array. true_count ( ) == 0 {
1455+ return Ok ( false ) ;
1456+ }
1457+ if second_array. false_count ( ) > 0 && first_array. false_count ( ) == 0 {
1458+ return Ok ( false ) ;
1459+ }
1460+ Ok ( true )
14321461 }
14331462 ( DataType :: Float32 , DataType :: Float32 ) => contains ! ( arr, sub_arr, Float32Array ) ,
14341463 ( DataType :: Float64 , DataType :: Float64 ) => contains ! ( arr, sub_arr, Float64Array ) ,
@@ -1441,11 +1470,56 @@ pub fn array_has_all(args: &[ArrayRef]) -> Result<ArrayRef> {
14411470 ( DataType :: UInt32 , DataType :: UInt32 ) => contains ! ( arr, sub_arr, UInt32Array ) ,
14421471 ( DataType :: UInt64 , DataType :: UInt64 ) => contains ! ( arr, sub_arr, UInt64Array ) ,
14431472 ( first_array_data_type, second_array_data_type) => {
1444- return Err ( DataFusionError :: NotImplemented ( format ! (
1473+ Err ( DataFusionError :: NotImplemented ( format ! (
14451474 "Array_has_all is not implemented for types '{first_array_data_type:?}' and '{second_array_data_type:?}'."
14461475 ) ) )
14471476 }
1448- } ;
1477+ }
1478+ }
1479+
1480+ /// Array_has_any SQL function
1481+ pub fn array_has_any ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1482+ assert_eq ! ( args. len( ) , 2 ) ;
1483+ let array = flatten_list_array :: < i32 > ( args[ 0 ] . clone ( ) ) ?;
1484+ // TODO: Dont need to flatten rhs array
1485+ let sub_array = flatten_list_array :: < i32 > ( args[ 1 ] . clone ( ) ) ?;
1486+ let mut boolean_array = Vec :: with_capacity ( array. len ( ) ) ;
1487+
1488+ for ( arr, sub_arr) in array. iter ( ) . zip ( sub_array. iter ( ) ) {
1489+ if let ( Some ( arr) , Some ( sub_arr) ) = ( arr, sub_arr) {
1490+ let res = overlap_internal ( arr. clone ( ) , sub_arr. clone ( ) ) ?;
1491+ boolean_array. push ( res) ;
1492+ }
1493+ }
1494+ Ok ( Arc :: new ( BooleanArray :: from ( boolean_array) ) )
1495+ }
1496+
1497+ /// Array_has SQL function
1498+ pub fn array_has ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1499+ assert_eq ! ( args. len( ) , 2 ) ;
1500+ let array = flatten_list_array :: < i32 > ( args[ 0 ] . clone ( ) ) ?;
1501+ let sub_array = args[ 1 ] . clone ( ) ;
1502+ let mut boolean_array = Vec :: with_capacity ( array. len ( ) ) ;
1503+
1504+ for arr in array. iter ( ) . flatten ( ) {
1505+ let res = contains_internal ( arr. clone ( ) , sub_array. clone ( ) ) ?;
1506+ boolean_array. push ( res) ;
1507+ }
1508+
1509+ Ok ( Arc :: new ( BooleanArray :: from ( boolean_array) ) )
1510+ }
1511+
1512+ /// Array_has_all SQL function
1513+ pub fn array_has_all ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1514+ assert_eq ! ( args. len( ) , 2 ) ;
1515+ let array = flatten_list_array :: < i32 > ( args[ 0 ] . clone ( ) ) ?;
1516+ // TODO: Dont need to flatten rhs array
1517+ let sub_array = flatten_list_array :: < i32 > ( args[ 1 ] . clone ( ) ) ?;
1518+ let mut boolean_array = Vec :: with_capacity ( array. len ( ) ) ;
1519+
1520+ for ( arr, sub_arr) in array. iter ( ) . zip ( sub_array. iter ( ) ) {
1521+ if let ( Some ( arr) , Some ( sub_arr) ) = ( arr, sub_arr) {
1522+ let res = contains_internal ( arr. clone ( ) , sub_arr. clone ( ) ) ?;
14491523 boolean_array. push ( res) ;
14501524 }
14511525 }
0 commit comments