@@ -221,10 +221,14 @@ pub enum ScalarValue {
221221 UInt64 ( Option < u64 > ) ,
222222 /// utf-8 encoded string.
223223 Utf8 ( Option < String > ) ,
224+ /// utf-8 encoded string but from view types.
225+ Utf8View ( Option < String > ) ,
224226 /// utf-8 encoded string representing a LargeString's arrow type.
225227 LargeUtf8 ( Option < String > ) ,
226228 /// binary
227229 Binary ( Option < Vec < u8 > > ) ,
230+ /// binary but from view types.
231+ BinaryView ( Option < Vec < u8 > > ) ,
228232 /// fixed size binary
229233 FixedSizeBinary ( i32 , Option < Vec < u8 > > ) ,
230234 /// large binary
@@ -345,10 +349,14 @@ impl PartialEq for ScalarValue {
345349 ( UInt64 ( _) , _) => false ,
346350 ( Utf8 ( v1) , Utf8 ( v2) ) => v1. eq ( v2) ,
347351 ( Utf8 ( _) , _) => false ,
352+ ( Utf8View ( v1) , Utf8View ( v2) ) => v1. eq ( v2) ,
353+ ( Utf8View ( _) , _) => false ,
348354 ( LargeUtf8 ( v1) , LargeUtf8 ( v2) ) => v1. eq ( v2) ,
349355 ( LargeUtf8 ( _) , _) => false ,
350356 ( Binary ( v1) , Binary ( v2) ) => v1. eq ( v2) ,
351357 ( Binary ( _) , _) => false ,
358+ ( BinaryView ( v1) , BinaryView ( v2) ) => v1. eq ( v2) ,
359+ ( BinaryView ( _) , _) => false ,
352360 ( FixedSizeBinary ( _, v1) , FixedSizeBinary ( _, v2) ) => v1. eq ( v2) ,
353361 ( FixedSizeBinary ( _, _) , _) => false ,
354362 ( LargeBinary ( v1) , LargeBinary ( v2) ) => v1. eq ( v2) ,
@@ -470,8 +478,12 @@ impl PartialOrd for ScalarValue {
470478 ( Utf8 ( _) , _) => None ,
471479 ( LargeUtf8 ( v1) , LargeUtf8 ( v2) ) => v1. partial_cmp ( v2) ,
472480 ( LargeUtf8 ( _) , _) => None ,
481+ ( Utf8View ( v1) , Utf8View ( v2) ) => v1. partial_cmp ( v2) ,
482+ ( Utf8View ( _) , _) => None ,
473483 ( Binary ( v1) , Binary ( v2) ) => v1. partial_cmp ( v2) ,
474484 ( Binary ( _) , _) => None ,
485+ ( BinaryView ( v1) , BinaryView ( v2) ) => v1. partial_cmp ( v2) ,
486+ ( BinaryView ( _) , _) => None ,
475487 ( FixedSizeBinary ( _, v1) , FixedSizeBinary ( _, v2) ) => v1. partial_cmp ( v2) ,
476488 ( FixedSizeBinary ( _, _) , _) => None ,
477489 ( LargeBinary ( v1) , LargeBinary ( v2) ) => v1. partial_cmp ( v2) ,
@@ -667,11 +679,10 @@ impl std::hash::Hash for ScalarValue {
667679 UInt16 ( v) => v. hash ( state) ,
668680 UInt32 ( v) => v. hash ( state) ,
669681 UInt64 ( v) => v. hash ( state) ,
670- Utf8 ( v) => v. hash ( state) ,
671- LargeUtf8 ( v) => v. hash ( state) ,
672- Binary ( v) => v. hash ( state) ,
673- FixedSizeBinary ( _, v) => v. hash ( state) ,
674- LargeBinary ( v) => v. hash ( state) ,
682+ Utf8 ( v) | LargeUtf8 ( v) | Utf8View ( v) => v. hash ( state) ,
683+ Binary ( v) | FixedSizeBinary ( _, v) | LargeBinary ( v) | BinaryView ( v) => {
684+ v. hash ( state)
685+ }
675686 List ( arr) => {
676687 hash_nested_array ( arr. to_owned ( ) as ArrayRef , state) ;
677688 }
@@ -1107,7 +1118,9 @@ impl ScalarValue {
11071118 ScalarValue :: Float64 ( _) => DataType :: Float64 ,
11081119 ScalarValue :: Utf8 ( _) => DataType :: Utf8 ,
11091120 ScalarValue :: LargeUtf8 ( _) => DataType :: LargeUtf8 ,
1121+ ScalarValue :: Utf8View ( _) => DataType :: Utf8View ,
11101122 ScalarValue :: Binary ( _) => DataType :: Binary ,
1123+ ScalarValue :: BinaryView ( _) => DataType :: BinaryView ,
11111124 ScalarValue :: FixedSizeBinary ( sz, _) => DataType :: FixedSizeBinary ( * sz) ,
11121125 ScalarValue :: LargeBinary ( _) => DataType :: LargeBinary ,
11131126 ScalarValue :: List ( arr) => arr. data_type ( ) . to_owned ( ) ,
@@ -1310,11 +1323,13 @@ impl ScalarValue {
13101323 ScalarValue :: UInt16 ( v) => v. is_none ( ) ,
13111324 ScalarValue :: UInt32 ( v) => v. is_none ( ) ,
13121325 ScalarValue :: UInt64 ( v) => v. is_none ( ) ,
1313- ScalarValue :: Utf8 ( v) => v. is_none ( ) ,
1314- ScalarValue :: LargeUtf8 ( v) => v. is_none ( ) ,
1315- ScalarValue :: Binary ( v) => v. is_none ( ) ,
1316- ScalarValue :: FixedSizeBinary ( _, v) => v. is_none ( ) ,
1317- ScalarValue :: LargeBinary ( v) => v. is_none ( ) ,
1326+ ScalarValue :: Utf8 ( v)
1327+ | ScalarValue :: Utf8View ( v)
1328+ | ScalarValue :: LargeUtf8 ( v) => v. is_none ( ) ,
1329+ ScalarValue :: Binary ( v)
1330+ | ScalarValue :: BinaryView ( v)
1331+ | ScalarValue :: FixedSizeBinary ( _, v)
1332+ | ScalarValue :: LargeBinary ( v) => v. is_none ( ) ,
13181333 // arr.len() should be 1 for a list scalar, but we don't seem to
13191334 // enforce that anywhere, so we still check against array length.
13201335 ScalarValue :: List ( arr) => arr. len ( ) == arr. null_count ( ) ,
@@ -2002,6 +2017,12 @@ impl ScalarValue {
20022017 }
20032018 None => new_null_array ( & DataType :: Utf8 , size) ,
20042019 } ,
2020+ ScalarValue :: Utf8View ( e) => match e {
2021+ Some ( value) => {
2022+ Arc :: new ( StringViewArray :: from_iter_values ( repeat ( value) . take ( size) ) )
2023+ }
2024+ None => new_null_array ( & DataType :: Utf8View , size) ,
2025+ } ,
20052026 ScalarValue :: LargeUtf8 ( e) => match e {
20062027 Some ( value) => {
20072028 Arc :: new ( LargeStringArray :: from_iter_values ( repeat ( value) . take ( size) ) )
@@ -2018,6 +2039,16 @@ impl ScalarValue {
20182039 Arc :: new ( repeat ( None :: < & str > ) . take ( size) . collect :: < BinaryArray > ( ) )
20192040 }
20202041 } ,
2042+ ScalarValue :: BinaryView ( e) => match e {
2043+ Some ( value) => Arc :: new (
2044+ repeat ( Some ( value. as_slice ( ) ) )
2045+ . take ( size)
2046+ . collect :: < BinaryViewArray > ( ) ,
2047+ ) ,
2048+ None => {
2049+ Arc :: new ( repeat ( None :: < & str > ) . take ( size) . collect :: < BinaryViewArray > ( ) )
2050+ }
2051+ } ,
20212052 ScalarValue :: FixedSizeBinary ( s, e) => match e {
20222053 Some ( value) => Arc :: new (
20232054 FixedSizeBinaryArray :: try_from_sparse_iter_with_size (
@@ -2361,10 +2392,14 @@ impl ScalarValue {
23612392 DataType :: LargeBinary => {
23622393 typed_cast ! ( array, index, LargeBinaryArray , LargeBinary ) ?
23632394 }
2395+ DataType :: BinaryView => {
2396+ typed_cast ! ( array, index, BinaryViewArray , BinaryView ) ?
2397+ }
23642398 DataType :: Utf8 => typed_cast ! ( array, index, StringArray , Utf8 ) ?,
23652399 DataType :: LargeUtf8 => {
23662400 typed_cast ! ( array, index, LargeStringArray , LargeUtf8 ) ?
23672401 }
2402+ DataType :: Utf8View => typed_cast ! ( array, index, StringViewArray , Utf8View ) ?,
23682403 DataType :: List ( _) => {
23692404 let list_array = array. as_list :: < i32 > ( ) ;
23702405 let nested_array = list_array. value ( index) ;
@@ -2652,12 +2687,18 @@ impl ScalarValue {
26522687 ScalarValue :: Utf8 ( val) => {
26532688 eq_array_primitive ! ( array, index, StringArray , val) ?
26542689 }
2690+ ScalarValue :: Utf8View ( val) => {
2691+ eq_array_primitive ! ( array, index, StringViewArray , val) ?
2692+ }
26552693 ScalarValue :: LargeUtf8 ( val) => {
26562694 eq_array_primitive ! ( array, index, LargeStringArray , val) ?
26572695 }
26582696 ScalarValue :: Binary ( val) => {
26592697 eq_array_primitive ! ( array, index, BinaryArray , val) ?
26602698 }
2699+ ScalarValue :: BinaryView ( val) => {
2700+ eq_array_primitive ! ( array, index, BinaryViewArray , val) ?
2701+ }
26612702 ScalarValue :: FixedSizeBinary ( _, val) => {
26622703 eq_array_primitive ! ( array, index, FixedSizeBinaryArray , val) ?
26632704 }
@@ -2790,7 +2831,9 @@ impl ScalarValue {
27902831 | ScalarValue :: DurationMillisecond ( _)
27912832 | ScalarValue :: DurationMicrosecond ( _)
27922833 | ScalarValue :: DurationNanosecond ( _) => 0 ,
2793- ScalarValue :: Utf8 ( s) | ScalarValue :: LargeUtf8 ( s) => {
2834+ ScalarValue :: Utf8 ( s)
2835+ | ScalarValue :: LargeUtf8 ( s)
2836+ | ScalarValue :: Utf8View ( s) => {
27942837 s. as_ref ( ) . map ( |s| s. capacity ( ) ) . unwrap_or_default ( )
27952838 }
27962839 ScalarValue :: TimestampSecond ( _, s)
@@ -2801,7 +2844,8 @@ impl ScalarValue {
28012844 }
28022845 ScalarValue :: Binary ( b)
28032846 | ScalarValue :: FixedSizeBinary ( _, b)
2804- | ScalarValue :: LargeBinary ( b) => {
2847+ | ScalarValue :: LargeBinary ( b)
2848+ | ScalarValue :: BinaryView ( b) => {
28052849 b. as_ref ( ) . map ( |b| b. capacity ( ) ) . unwrap_or_default ( )
28062850 }
28072851 ScalarValue :: List ( arr) => arr. get_array_memory_size ( ) ,
@@ -3068,7 +3112,9 @@ impl TryFrom<&DataType> for ScalarValue {
30683112 }
30693113 DataType :: Utf8 => ScalarValue :: Utf8 ( None ) ,
30703114 DataType :: LargeUtf8 => ScalarValue :: LargeUtf8 ( None ) ,
3115+ DataType :: Utf8View => ScalarValue :: Utf8View ( None ) ,
30713116 DataType :: Binary => ScalarValue :: Binary ( None ) ,
3117+ DataType :: BinaryView => ScalarValue :: BinaryView ( None ) ,
30723118 DataType :: FixedSizeBinary ( len) => ScalarValue :: FixedSizeBinary ( * len, None ) ,
30733119 DataType :: LargeBinary => ScalarValue :: LargeBinary ( None ) ,
30743120 DataType :: Date32 => ScalarValue :: Date32 ( None ) ,
@@ -3190,11 +3236,13 @@ impl fmt::Display for ScalarValue {
31903236 ScalarValue :: TimestampMillisecond ( e, _) => format_option ! ( f, e) ?,
31913237 ScalarValue :: TimestampMicrosecond ( e, _) => format_option ! ( f, e) ?,
31923238 ScalarValue :: TimestampNanosecond ( e, _) => format_option ! ( f, e) ?,
3193- ScalarValue :: Utf8 ( e) => format_option ! ( f, e) ?,
3194- ScalarValue :: LargeUtf8 ( e) => format_option ! ( f, e) ?,
3239+ ScalarValue :: Utf8 ( e)
3240+ | ScalarValue :: LargeUtf8 ( e)
3241+ | ScalarValue :: Utf8View ( e) => format_option ! ( f, e) ?,
31953242 ScalarValue :: Binary ( e)
31963243 | ScalarValue :: FixedSizeBinary ( _, e)
3197- | ScalarValue :: LargeBinary ( e) => match e {
3244+ | ScalarValue :: LargeBinary ( e)
3245+ | ScalarValue :: BinaryView ( e) => match e {
31983246 Some ( l) => write ! (
31993247 f,
32003248 "{}" ,
@@ -3318,10 +3366,14 @@ impl fmt::Debug for ScalarValue {
33183366 }
33193367 ScalarValue :: Utf8 ( None ) => write ! ( f, "Utf8({self})" ) ,
33203368 ScalarValue :: Utf8 ( Some ( _) ) => write ! ( f, "Utf8(\" {self}\" )" ) ,
3369+ ScalarValue :: Utf8View ( None ) => write ! ( f, "Utf8View({self})" ) ,
3370+ ScalarValue :: Utf8View ( Some ( _) ) => write ! ( f, "Utf8View(\" {self}\" )" ) ,
33213371 ScalarValue :: LargeUtf8 ( None ) => write ! ( f, "LargeUtf8({self})" ) ,
33223372 ScalarValue :: LargeUtf8 ( Some ( _) ) => write ! ( f, "LargeUtf8(\" {self}\" )" ) ,
33233373 ScalarValue :: Binary ( None ) => write ! ( f, "Binary({self})" ) ,
33243374 ScalarValue :: Binary ( Some ( _) ) => write ! ( f, "Binary(\" {self}\" )" ) ,
3375+ ScalarValue :: BinaryView ( None ) => write ! ( f, "BinaryView({self})" ) ,
3376+ ScalarValue :: BinaryView ( Some ( _) ) => write ! ( f, "BinaryView(\" {self}\" )" ) ,
33253377 ScalarValue :: FixedSizeBinary ( size, None ) => {
33263378 write ! ( f, "FixedSizeBinary({size}, {self})" )
33273379 }
@@ -5393,6 +5445,17 @@ mod tests {
53935445 ScalarValue :: Utf8 ( None ) ,
53945446 DataType :: Dictionary ( Box :: new ( DataType :: Int32 ) , Box :: new ( DataType :: Utf8 ) ) ,
53955447 ) ;
5448+
5449+ // needs https://github.com/apache/arrow-rs/issues/5893
5450+ /*
5451+ check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
5452+ check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
5453+ check_scalar_cast(
5454+ ScalarValue::from("larger than 12 bytes string"),
5455+ DataType::Utf8View,
5456+ );
5457+
5458+ */
53965459 }
53975460
53985461 // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
0 commit comments