@@ -23,10 +23,10 @@ use std::sync::Arc;
23
23
use arrow_array:: builder:: BooleanBufferBuilder ;
24
24
use arrow_array:: cast:: AsArray ;
25
25
use arrow_array:: types:: {
26
- ArrowDictionaryKeyType , ArrowPrimitiveType , ByteArrayType , RunEndIndexType ,
26
+ ArrowDictionaryKeyType , ArrowPrimitiveType , ByteArrayType , ByteViewType , RunEndIndexType ,
27
27
} ;
28
28
use arrow_array:: * ;
29
- use arrow_buffer:: { bit_util, BooleanBuffer , NullBuffer , RunEndBuffer } ;
29
+ use arrow_buffer:: { bit_util, ArrowNativeType , BooleanBuffer , NullBuffer , RunEndBuffer } ;
30
30
use arrow_buffer:: { Buffer , MutableBuffer } ;
31
31
use arrow_data:: bit_iterator:: { BitIndexIterator , BitSliceIterator } ;
32
32
use arrow_data:: transform:: MutableArrayData ;
@@ -333,12 +333,18 @@ fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> Result<Array
333
333
DataType :: LargeUtf8 => {
334
334
Ok ( Arc :: new( filter_bytes( values. as_string:: <i64 >( ) , predicate) ) )
335
335
}
336
+ DataType :: Utf8View => {
337
+ Ok ( Arc :: new( filter_byte_view( values. as_string_view( ) , predicate) ) )
338
+ }
336
339
DataType :: Binary => {
337
340
Ok ( Arc :: new( filter_bytes( values. as_binary:: <i32 >( ) , predicate) ) )
338
341
}
339
342
DataType :: LargeBinary => {
340
343
Ok ( Arc :: new( filter_bytes( values. as_binary:: <i64 >( ) , predicate) ) )
341
344
}
345
+ DataType :: BinaryView => {
346
+ Ok ( Arc :: new( filter_byte_view( values. as_binary_view( ) , predicate) ) )
347
+ }
342
348
DataType :: RunEndEncoded ( _, _) => {
343
349
downcast_run_array!{
344
350
values => Ok ( Arc :: new( filter_run_end_array( values, predicate) ?) ) ,
@@ -508,12 +514,8 @@ fn filter_boolean(array: &BooleanArray, predicate: &FilterPredicate) -> BooleanA
508
514
BooleanArray :: from ( data)
509
515
}
510
516
511
- /// `filter` implementation for primitive arrays
512
- fn filter_primitive < T > ( array : & PrimitiveArray < T > , predicate : & FilterPredicate ) -> PrimitiveArray < T >
513
- where
514
- T : ArrowPrimitiveType ,
515
- {
516
- let values = array. values ( ) ;
517
+ #[ inline( never) ]
518
+ fn filter_native < T : ArrowNativeType > ( values : & [ T ] , predicate : & FilterPredicate ) -> Buffer {
517
519
assert ! ( values. len( ) >= predicate. filter. len( ) ) ;
518
520
519
521
let buffer = match & predicate. strategy {
@@ -546,9 +548,19 @@ where
546
548
IterationStrategy :: All | IterationStrategy :: None => unreachable ! ( ) ,
547
549
} ;
548
550
551
+ buffer. into ( )
552
+ }
553
+
554
+ /// `filter` implementation for primitive arrays
555
+ fn filter_primitive < T > ( array : & PrimitiveArray < T > , predicate : & FilterPredicate ) -> PrimitiveArray < T >
556
+ where
557
+ T : ArrowPrimitiveType ,
558
+ {
559
+ let values = array. values ( ) ;
560
+ let buffer = filter_native ( values, predicate) ;
549
561
let mut builder = ArrayDataBuilder :: new ( array. data_type ( ) . clone ( ) )
550
562
. len ( predicate. count )
551
- . add_buffer ( buffer. into ( ) ) ;
563
+ . add_buffer ( buffer) ;
552
564
553
565
if let Some ( ( null_count, nulls) ) = filter_null_mask ( array. nulls ( ) , predicate) {
554
566
builder = builder. null_count ( null_count) . null_bit_buffer ( Some ( nulls) ) ;
@@ -673,6 +685,25 @@ where
673
685
GenericByteArray :: from ( data)
674
686
}
675
687
688
+ /// `filter` implementation for byte view arrays.
689
+ fn filter_byte_view < T : ByteViewType > (
690
+ array : & GenericByteViewArray < T > ,
691
+ predicate : & FilterPredicate ,
692
+ ) -> GenericByteViewArray < T > {
693
+ let new_view_buffer = filter_native ( array. views ( ) , predicate) ;
694
+
695
+ let mut builder = ArrayDataBuilder :: new ( T :: DATA_TYPE )
696
+ . len ( predicate. count )
697
+ . add_buffer ( new_view_buffer)
698
+ . add_buffers ( array. data_buffers ( ) . to_vec ( ) ) ;
699
+
700
+ if let Some ( ( null_count, nulls) ) = filter_null_mask ( array. nulls ( ) , predicate) {
701
+ builder = builder. null_count ( null_count) . null_bit_buffer ( Some ( nulls) ) ;
702
+ }
703
+
704
+ GenericByteViewArray :: from ( unsafe { builder. build_unchecked ( ) } )
705
+ }
706
+
676
707
/// `filter` implementation for dictionaries
677
708
fn filter_dict < T > ( array : & DictionaryArray < T > , predicate : & FilterPredicate ) -> DictionaryArray < T >
678
709
where
@@ -888,6 +919,69 @@ mod tests {
888
919
assert ! ( d. is_null( 1 ) ) ;
889
920
}
890
921
922
+ fn _test_filter_byte_view < T > ( )
923
+ where
924
+ T : ByteViewType ,
925
+ str : AsRef < T :: Native > ,
926
+ T :: Native : PartialEq ,
927
+ {
928
+ let array = {
929
+ // ["hello", "world", null, "large payload over 12 bytes", "lulu"]
930
+ let mut builder = GenericByteViewBuilder :: < T > :: new ( ) ;
931
+ builder. append_value ( "hello" ) ;
932
+ builder. append_value ( "world" ) ;
933
+ builder. append_null ( ) ;
934
+ builder. append_value ( "large payload over 12 bytes" ) ;
935
+ builder. append_value ( "lulu" ) ;
936
+ builder. finish ( )
937
+ } ;
938
+
939
+ {
940
+ let predicate = BooleanArray :: from ( vec ! [ true , false , true , true , false ] ) ;
941
+ let actual = filter ( & array, & predicate) . unwrap ( ) ;
942
+
943
+ assert_eq ! ( actual. len( ) , 3 ) ;
944
+
945
+ let expected = {
946
+ // ["hello", null, "large payload over 12 bytes"]
947
+ let mut builder = GenericByteViewBuilder :: < T > :: new ( ) ;
948
+ builder. append_value ( "hello" ) ;
949
+ builder. append_null ( ) ;
950
+ builder. append_value ( "large payload over 12 bytes" ) ;
951
+ builder. finish ( )
952
+ } ;
953
+
954
+ assert_eq ! ( actual. as_ref( ) , & expected) ;
955
+ }
956
+
957
+ {
958
+ let predicate = BooleanArray :: from ( vec ! [ true , false , false , false , true ] ) ;
959
+ let actual = filter ( & array, & predicate) . unwrap ( ) ;
960
+
961
+ assert_eq ! ( actual. len( ) , 2 ) ;
962
+
963
+ let expected = {
964
+ // ["hello", "lulu"]
965
+ let mut builder = GenericByteViewBuilder :: < T > :: new ( ) ;
966
+ builder. append_value ( "hello" ) ;
967
+ builder. append_value ( "lulu" ) ;
968
+ builder. finish ( )
969
+ } ;
970
+
971
+ assert_eq ! ( actual. as_ref( ) , & expected) ;
972
+ }
973
+ }
974
+
975
+ #[ test]
976
+ fn test_filter_string_view ( ) {
977
+ _test_filter_byte_view :: < StringViewType > ( )
978
+ }
979
+
980
+ #[ test]
981
+ fn test_filter_binary_view ( ) {
982
+ _test_filter_byte_view :: < BinaryViewType > ( )
983
+ }
984
+
891
985
#[ test]
892
986
fn test_filter_array_slice_with_null ( ) {
893
987
let a = Int32Array :: from ( vec ! [ Some ( 5 ) , None , Some ( 7 ) , Some ( 8 ) , Some ( 9 ) ] ) . slice ( 1 , 4 ) ;
0 commit comments