@@ -105,11 +105,26 @@ impl VariantArray {
105105 ) ) ) ;
106106 } ;
107107
108+ // Extract value and typed_value fields
109+ let value = if let Some ( value_col) = inner. column_by_name ( "value" ) {
110+ if let Some ( binary_view) = value_col. as_binary_view_opt ( ) {
111+ Some ( binary_view. clone ( ) )
112+ } else {
113+ return Err ( ArrowError :: NotYetImplemented ( format ! (
114+ "VariantArray 'value' field must be BinaryView, got {}" ,
115+ value_col. data_type( )
116+ ) ) ) ;
117+ }
118+ } else {
119+ None
120+ } ;
121+ let typed_value = inner. column_by_name ( "typed_value" ) . cloned ( ) ;
122+
108123 // Note these clones are cheap, they just bump the ref count
109124 Ok ( Self {
110125 inner : inner. clone ( ) ,
111126 metadata : metadata. clone ( ) ,
112- shredding_state : ShreddingState :: try_new ( inner ) ?,
127+ shredding_state : ShreddingState :: try_new ( metadata . clone ( ) , value , typed_value ) ?,
113128 } )
114129 }
115130
@@ -135,7 +150,7 @@ impl VariantArray {
135150 // This would be a lot simpler if ShreddingState were just a pair of Option... we already
136151 // have everything we need.
137152 let inner = builder. build ( ) ;
138- let shredding_state = ShreddingState :: try_new ( & inner ) . unwrap ( ) ; // valid by construction
153+ let shredding_state = ShreddingState :: try_new ( metadata . clone ( ) , value , typed_value ) . unwrap ( ) ; // valid by construction
139154 Self {
140155 inner,
141156 metadata,
@@ -180,24 +195,28 @@ impl VariantArray {
180195 /// caller to ensure that the metadata and value were constructed correctly.
181196 pub fn value ( & self , index : usize ) -> Variant < ' _ , ' _ > {
182197 match & self . shredding_state {
183- ShreddingState :: Unshredded { value } => {
198+ ShreddingState :: Unshredded { value, .. } => {
199+ // Unshredded case
184200 Variant :: new ( self . metadata . value ( index) , value. value ( index) )
185201 }
186- ShreddingState :: PerfectlyShredded { typed_value, .. } => {
202+ ShreddingState :: Typed { typed_value, .. } => {
203+ // Typed case (formerly PerfectlyShredded)
187204 if typed_value. is_null ( index) {
188205 Variant :: Null
189206 } else {
190207 typed_value_to_variant ( typed_value, index)
191208 }
192209 }
193- ShreddingState :: ImperfectlyShredded { value, typed_value } => {
210+ ShreddingState :: PartiallyShredded { value, typed_value, .. } => {
211+ // PartiallyShredded case (formerly ImperfectlyShredded)
194212 if typed_value. is_null ( index) {
195213 Variant :: new ( self . metadata . value ( index) , value. value ( index) )
196214 } else {
197215 typed_value_to_variant ( typed_value, index)
198216 }
199217 }
200218 ShreddingState :: AllNull { .. } => {
219+ // AllNull case: neither value nor typed_value fields exist
201220 // NOTE: This handles the case where neither value nor typed_value fields exist.
202221 // For top-level variants, this returns Variant::Null (JSON null).
203222 // For shredded object fields, this technically should indicate SQL NULL,
@@ -256,8 +275,11 @@ impl VariantArray {
256275/// additional fields), or NULL (`v:a` was an object containing only the single expected field `b`).
257276///
258277/// Finally, `v.typed_value.a.typed_value.b.value` is either NULL (`v:a.b` was an integer) or else a
259- /// variant value.
278+ /// variant value (which could be `Variant::Null`).
279+ #[ derive( Debug ) ]
260280pub struct ShreddedVariantFieldArray {
281+ /// Reference to the underlying StructArray
282+ inner : StructArray ,
261283 shredding_state : ShreddingState ,
262284}
263285
@@ -284,15 +306,24 @@ impl ShreddedVariantFieldArray {
284306 ///
285307 /// Currently, only `value` columns of type [`BinaryViewArray`] are supported.
286308 pub fn try_new ( inner : ArrayRef ) -> Result < Self , ArrowError > {
287- let Some ( inner ) = inner. as_struct_opt ( ) else {
309+ let Some ( inner_struct ) = inner. as_struct_opt ( ) else {
288310 return Err ( ArrowError :: InvalidArgumentError (
289- "Invalid VariantArray : requires StructArray as input" . to_string ( ) ,
311+ "Invalid ShreddedVariantFieldArray : requires StructArray as input" . to_string ( ) ,
290312 ) ) ;
291313 } ;
292314
315+ // Extract value and typed_value fields (metadata is not expected in ShreddedVariantFieldArray)
316+ let value = inner_struct. column_by_name ( "value" ) . and_then ( |col| col. as_binary_view_opt ( ) . cloned ( ) ) ;
317+ let typed_value = inner_struct. column_by_name ( "typed_value" ) . cloned ( ) ;
318+
319+ // Use a dummy metadata for the constructor (ShreddedVariantFieldArray doesn't have metadata)
320+ let dummy_metadata = arrow:: array:: BinaryViewArray :: new_null ( inner_struct. len ( ) ) ;
321+
293322 // Note this clone is cheap, it just bumps the ref count
323+ let inner = inner_struct. clone ( ) ;
294324 Ok ( Self {
295- shredding_state : ShreddingState :: try_new ( inner) ?,
325+ inner : inner. clone ( ) ,
326+ shredding_state : ShreddingState :: try_new ( dummy_metadata, value, typed_value) ?,
296327 } )
297328 }
298329
@@ -310,6 +341,65 @@ impl ShreddedVariantFieldArray {
310341 pub fn typed_value_field ( & self ) -> Option < & ArrayRef > {
311342 self . shredding_state . typed_value_field ( )
312343 }
344+
345+ /// Returns a reference to the underlying [`StructArray`].
346+ pub fn inner ( & self ) -> & StructArray {
347+ & self . inner
348+ }
349+ }
350+
351+ impl Array for ShreddedVariantFieldArray {
352+ fn as_any ( & self ) -> & dyn Any {
353+ self
354+ }
355+
356+ fn to_data ( & self ) -> ArrayData {
357+ self . inner . to_data ( )
358+ }
359+
360+ fn into_data ( self ) -> ArrayData {
361+ self . inner . into_data ( )
362+ }
363+
364+ fn data_type ( & self ) -> & DataType {
365+ self . inner . data_type ( )
366+ }
367+
368+ fn slice ( & self , offset : usize , length : usize ) -> ArrayRef {
369+ let inner = self . inner . slice ( offset, length) ;
370+ let shredding_state = self . shredding_state . slice ( offset, length) ;
371+ Arc :: new ( Self {
372+ inner,
373+ shredding_state,
374+ } )
375+ }
376+
377+ fn len ( & self ) -> usize {
378+ self . inner . len ( )
379+ }
380+
381+ fn is_empty ( & self ) -> bool {
382+ self . inner . is_empty ( )
383+ }
384+
385+ fn offset ( & self ) -> usize {
386+ self . inner . offset ( )
387+ }
388+
389+ fn nulls ( & self ) -> Option < & NullBuffer > {
390+ // According to the shredding spec, ShreddedVariantFieldArray should be
391+ // physically non-nullable - SQL NULL is inferred by both value and
392+ // typed_value being physically NULL
393+ None
394+ }
395+
396+ fn get_buffer_memory_size ( & self ) -> usize {
397+ self . inner . get_buffer_memory_size ( )
398+ }
399+
400+ fn get_array_memory_size ( & self ) -> usize {
401+ self . inner . get_array_memory_size ( )
402+ }
313403}
314404
315405/// Represents the shredding state of a [`VariantArray`]
@@ -333,10 +423,16 @@ impl ShreddedVariantFieldArray {
333423#[ derive( Debug ) ]
334424pub enum ShreddingState {
335425 /// This variant has no typed_value field
336- Unshredded { value : BinaryViewArray } ,
426+ Unshredded {
427+ metadata : BinaryViewArray ,
428+ value : BinaryViewArray ,
429+ } ,
337430 /// This variant has a typed_value field and no value field
338431 /// meaning it is the shredded type
339- PerfectlyShredded { typed_value : ArrayRef } ,
432+ Typed {
433+ metadata : BinaryViewArray ,
434+ typed_value : ArrayRef ,
435+ } ,
340436 /// Imperfectly shredded: Shredded values reside in `typed_value` while those that failed to
341437 /// shred reside in `value`. Missing field values are NULL in both columns, while NULL primitive
342438 /// values have NULL `typed_value` and `Variant::Null` in `value`.
@@ -347,7 +443,8 @@ pub enum ShreddingState {
347443 /// and/or typed_value sub-fields that indicate how shredding actually turned out). Meanwhile,
348444 /// the `value` is a variant object containing the subset of fields for which shredding was
349445 /// not even attempted.
350- ImperfectlyShredded {
446+ PartiallyShredded {
447+ metadata : BinaryViewArray ,
351448 value : BinaryViewArray ,
352449 typed_value : ArrayRef ,
353450 } ,
@@ -357,7 +454,9 @@ pub enum ShreddingState {
357454 /// Note: By strict spec interpretation, this should only be valid for shredded object fields,
358455 /// not top-level variants. However, we allow it and treat as Variant::Null for pragmatic
359456 /// handling of missing data.
360- AllNull { metadata : BinaryViewArray } ,
457+ AllNull {
458+ metadata : BinaryViewArray ,
459+ } ,
361460}
362461
363462impl ShreddingState {
@@ -415,7 +514,8 @@ impl ShreddingState {
415514 /// Slice all the underlying arrays
416515 pub fn slice ( & self , offset : usize , length : usize ) -> Self {
417516 match self {
418- ShreddingState :: Unshredded { value } => ShreddingState :: Unshredded {
517+ ShreddingState :: Unshredded { metadata, value } => ShreddingState :: Unshredded {
518+ metadata : metadata. slice ( offset, length) ,
419519 value : value. slice ( offset, length) ,
420520 } ,
421521 ShreddingState :: Typed {
@@ -445,7 +545,7 @@ impl ShreddingState {
445545///
446546/// TODO: move to arrow crate
447547#[ derive( Debug , Default , Clone ) ]
448- pub struct StructArrayBuilder {
548+ pub ( crate ) struct StructArrayBuilder {
449549 fields : Vec < FieldRef > ,
450550 arrays : Vec < ArrayRef > ,
451551 nulls : Option < NullBuffer > ,
@@ -658,6 +758,7 @@ mod test {
658758 let metadata = BinaryViewArray :: from ( vec ! [ b"test" as & [ u8 ] ] ) ;
659759 let shredding_state = ShreddingState :: try_new ( metadata. clone ( ) , None , None ) . unwrap ( ) ;
660760
761+ // Verify the shredding state is AllNull
661762 assert ! ( matches!( shredding_state, ShreddingState :: AllNull { .. } ) ) ;
662763
663764 // Verify metadata is preserved correctly
0 commit comments