@@ -105,11 +105,26 @@ impl VariantArray {
105105 ) ) ) ;
106106 } ;
107107
108+ // Extract value and typed_value fields
109+ let value = if let Some ( value_col) = inner. column_by_name ( "value" ) {
110+ if let Some ( binary_view) = value_col. as_binary_view_opt ( ) {
111+ Some ( binary_view. clone ( ) )
112+ } else {
113+ return Err ( ArrowError :: NotYetImplemented ( format ! (
114+ "VariantArray 'value' field must be BinaryView, got {}" ,
115+ value_col. data_type( )
116+ ) ) ) ;
117+ }
118+ } else {
119+ None
120+ } ;
121+ let typed_value = inner. column_by_name ( "typed_value" ) . cloned ( ) ;
122+
108123 // Note these clones are cheap, they just bump the ref count
109124 Ok ( Self {
110125 inner : inner. clone ( ) ,
111126 metadata : metadata. clone ( ) ,
112- shredding_state : ShreddingState :: try_new ( inner ) ?,
127+ shredding_state : ShreddingState :: try_new ( metadata . clone ( ) , value , typed_value ) ?,
113128 } )
114129 }
115130
@@ -135,7 +150,7 @@ impl VariantArray {
135150 // This would be a lot simpler if ShreddingState were just a pair of Option... we already
136151 // have everything we need.
137152 let inner = builder. build ( ) ;
138- let shredding_state = ShreddingState :: try_new ( & inner ) . unwrap ( ) ; // valid by construction
153+ let shredding_state = ShreddingState :: try_new ( metadata . clone ( ) , value , typed_value ) . unwrap ( ) ; // valid by construction
139154 Self {
140155 inner,
141156 metadata,
@@ -200,7 +215,8 @@ impl VariantArray {
200215 typed_value_to_variant ( typed_value, index)
201216 }
202217 }
203- ShreddingState :: AllNull { .. } => {
218+ ( None , None ) => {
219+ // AllNull case: neither value nor typed_value fields exist
204220 // NOTE: This handles the case where neither value nor typed_value fields exist.
205221 // For top-level variants, this returns Variant::Null (JSON null).
206222 // For shredded object fields, this technically should indicate SQL NULL,
@@ -296,11 +312,18 @@ impl ShreddedVariantFieldArray {
296312 ) ) ;
297313 } ;
298314
315+ // Extract value and typed_value fields (metadata is not expected in ShreddedVariantFieldArray)
316+ let value = inner_struct. column_by_name ( "value" ) . and_then ( |col| col. as_binary_view_opt ( ) . cloned ( ) ) ;
317+ let typed_value = inner_struct. column_by_name ( "typed_value" ) . cloned ( ) ;
318+
319+ // Use a dummy metadata for the constructor (ShreddedVariantFieldArray doesn't have metadata)
320+ let dummy_metadata = arrow:: array:: BinaryViewArray :: new_null ( inner_struct. len ( ) ) ;
321+
299322 // Note this clone is cheap, it just bumps the ref count
300323 let inner = inner_struct. clone ( ) ;
301324 Ok ( Self {
302325 inner : inner. clone ( ) ,
303- shredding_state : ShreddingState :: try_new ( & inner ) ?,
326+ shredding_state : ShreddingState :: try_new ( dummy_metadata , value , typed_value ) ?,
304327 } )
305328 }
306329
@@ -398,114 +421,36 @@ impl Array for ShreddedVariantFieldArray {
398421///
399422/// [Parquet Variant Shredding Spec]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding
400423#[ derive( Debug ) ]
401- pub enum ShreddingState {
402- /// This variant has no typed_value field
403- Unshredded { value : BinaryViewArray } ,
404- /// This variant has a typed_value field and no value field
405- /// meaning it is the shredded type
406- PerfectlyShredded { typed_value : ArrayRef } ,
407- /// Imperfectly shredded: Shredded values reside in `typed_value` while those that failed to
408- /// shred reside in `value`. Missing field values are NULL in both columns, while NULL primitive
409- /// values have NULL `typed_value` and `Variant::Null` in `value`.
410- ///
411- /// NOTE: A partially shredded struct is a special kind of imperfect shredding, where
412- /// `typed_value` and `value` are both non-NULL. The `typed_value` is a struct containing the
413- /// subset of fields for which shredding was attempted (each field will then have its own value
414- /// and/or typed_value sub-fields that indicate how shredding actually turned out). Meanwhile,
415- /// the `value` is a variant object containing the subset of fields for which shredding was
416- /// not even attempted.
417- ImperfectlyShredded {
418- value : BinaryViewArray ,
419- typed_value : ArrayRef ,
420- } ,
421- /// All values are null, only metadata is present.
422- ///
423- /// This state occurs when neither `value` nor `typed_value` fields exist in the schema.
424- /// Note: By strict spec interpretation, this should only be valid for shredded object fields,
425- /// not top-level variants. However, we allow it and treat as Variant::Null for pragmatic
426- /// handling of missing data.
427- AllNull { metadata : BinaryViewArray } ,
424+ pub struct ShreddingState {
425+ pub value : Option < BinaryViewArray > ,
426+ pub typed_value : Option < ArrayRef > ,
428427}
429428
430429impl ShreddingState {
431430 /// try to create a new `ShreddingState` from the given fields
432431 pub fn try_new (
433- metadata : BinaryViewArray ,
432+ _metadata : BinaryViewArray ,
434433 value : Option < BinaryViewArray > ,
435434 typed_value : Option < ArrayRef > ,
436435 ) -> Result < Self , ArrowError > {
437- match ( metadata, value, typed_value) {
438- ( metadata, Some ( value) , Some ( typed_value) ) => Ok ( Self :: PartiallyShredded {
439- metadata,
440- value,
441- typed_value,
442- } ) ,
443- ( metadata, Some ( value) , None ) => Ok ( Self :: Unshredded { metadata, value } ) ,
444- ( metadata, None , Some ( typed_value) ) => Ok ( Self :: Typed {
445- metadata,
446- typed_value,
447- } ) ,
448- ( metadata, None , None ) => Ok ( Self :: AllNull { metadata } ) ,
449- }
450- }
451-
452- /// Return a reference to the metadata field
453- pub fn metadata_field ( & self ) -> & BinaryViewArray {
454- match self {
455- ShreddingState :: Unshredded { metadata, .. } => metadata,
456- ShreddingState :: Typed { metadata, .. } => metadata,
457- ShreddingState :: PartiallyShredded { metadata, .. } => metadata,
458- ShreddingState :: AllNull { metadata } => metadata,
459- }
460-
461436 Ok ( Self { value, typed_value } )
462437 }
463438
464439 /// Return a reference to the value field, if present
465440 pub fn value_field ( & self ) -> Option < & BinaryViewArray > {
466- match self {
467- ShreddingState :: Unshredded { value, .. } => Some ( value) ,
468- ShreddingState :: Typed { .. } => None ,
469- ShreddingState :: PartiallyShredded { value, .. } => Some ( value) ,
470- ShreddingState :: AllNull { .. } => None ,
471- }
441+ self . value . as_ref ( )
472442 }
473443
474444 /// Return a reference to the typed_value field, if present
475445 pub fn typed_value_field ( & self ) -> Option < & ArrayRef > {
476- match self {
477- ShreddingState :: Unshredded { .. } => None ,
478- ShreddingState :: Typed { typed_value, .. } => Some ( typed_value) ,
479- ShreddingState :: PartiallyShredded { typed_value, .. } => Some ( typed_value) ,
480- ShreddingState :: AllNull { .. } => None ,
481- }
446+ self . typed_value . as_ref ( )
482447 }
483448
484449 /// Slice all the underlying arrays
485450 pub fn slice ( & self , offset : usize , length : usize ) -> Self {
486- match self {
487- ShreddingState :: Unshredded { value } => ShreddingState :: Unshredded {
488- value : value. slice ( offset, length) ,
489- } ,
490- ShreddingState :: Typed {
491- metadata,
492- typed_value,
493- } => ShreddingState :: Typed {
494- metadata : metadata. slice ( offset, length) ,
495- typed_value : typed_value. slice ( offset, length) ,
496- } ,
497- ShreddingState :: PartiallyShredded {
498- metadata,
499- value,
500- typed_value,
501- } => ShreddingState :: PartiallyShredded {
502- metadata : metadata. slice ( offset, length) ,
503- value : value. slice ( offset, length) ,
504- typed_value : typed_value. slice ( offset, length) ,
505- } ,
506- ShreddingState :: AllNull { metadata } => ShreddingState :: AllNull {
507- metadata : metadata. slice ( offset, length) ,
508- } ,
451+ Self {
452+ value : self . value . as_ref ( ) . map ( |v| v. slice ( offset, length) ) ,
453+ typed_value : self . typed_value . as_ref ( ) . map ( |tv| tv. slice ( offset, length) ) ,
509454 }
510455 }
511456}
@@ -664,11 +609,10 @@ mod test {
664609 // This is a pragmatic decision to handle missing data gracefully.
665610 let variant_array = VariantArray :: try_new ( Arc :: new ( array) ) . unwrap ( ) ;
666611
667- // Verify the shredding state is AllNull
668- assert ! ( matches!(
669- variant_array. shredding_state( ) ,
670- ShreddingState :: AllNull { .. }
671- ) ) ;
612+ // Verify the shredding state is AllNull (both value and typed_value are None)
613+ let shredding_state = variant_array. shredding_state ( ) ;
614+ assert ! ( shredding_state. value_field( ) . is_none( ) ) ;
615+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
672616
673617 // Verify that value() returns Variant::Null (compensating for spec violation)
674618 for i in 0 ..variant_array. len ( ) {
@@ -727,13 +671,11 @@ mod test {
727671 let metadata = BinaryViewArray :: from ( vec ! [ b"test" as & [ u8 ] ] ) ;
728672 let shredding_state = ShreddingState :: try_new ( metadata. clone ( ) , None , None ) . unwrap ( ) ;
729673
730- assert ! ( matches!( shredding_state, ShreddingState :: AllNull { .. } ) ) ;
674+ // Verify the shredding state is AllNull (both value and typed_value are None)
675+ assert ! ( shredding_state. value_field( ) . is_none( ) ) ;
676+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
731677
732- // Verify metadata is preserved correctly
733- if let ShreddingState :: AllNull { metadata : m } = shredding_state {
734- assert_eq ! ( m. len( ) , metadata. len( ) ) ;
735- assert_eq ! ( m. value( 0 ) , metadata. value( 0 ) ) ;
736- }
678+ // Note: metadata is no longer stored in the struct, it's passed to try_new but not stored
737679 }
738680
739681 #[ test]
@@ -746,11 +688,10 @@ mod test {
746688
747689 let variant_array = VariantArray :: try_new ( Arc :: new ( struct_array) ) . unwrap ( ) ;
748690
749- // Verify the shredding state is AllNull
750- assert ! ( matches!(
751- variant_array. shredding_state( ) ,
752- ShreddingState :: AllNull { .. }
753- ) ) ;
691+ // Verify the shredding state is AllNull (both value and typed_value are None)
692+ let shredding_state = variant_array. shredding_state ( ) ;
693+ assert ! ( shredding_state. value_field( ) . is_none( ) ) ;
694+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
754695
755696 // Verify all values are null
756697 assert_eq ! ( variant_array. len( ) , 3 ) ;
@@ -797,9 +738,8 @@ mod test {
797738 let variant_array = VariantArray :: try_new ( Arc :: new ( struct_array) ) . unwrap ( ) ;
798739
799740 // This should be Unshredded, not AllNull, because value field exists in schema
800- assert ! ( matches!(
801- variant_array. shredding_state( ) ,
802- ShreddingState :: Unshredded { .. }
803- ) ) ;
741+ let shredding_state = variant_array. shredding_state ( ) ;
742+ assert ! ( shredding_state. value_field( ) . is_some( ) ) ;
743+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
804744 }
805745}
0 commit comments