1919
2020use crate :: { BorrowedShreddingState , VariantArray , VariantValueArrayBuilder } ;
2121use arrow:: array:: {
22- Array , AsArray as _, BinaryViewArray , BooleanArray , FixedSizeBinaryArray , NullBufferBuilder ,
23- PrimitiveArray , StringArray , StructArray ,
22+ Array , AsArray as _, BinaryViewArray , BooleanArray , FixedSizeBinaryArray , PrimitiveArray ,
23+ StringArray , StructArray ,
2424} ;
2525use arrow:: buffer:: NullBuffer ;
2626use arrow:: datatypes:: {
@@ -58,21 +58,19 @@ pub fn unshred_variant(array: &VariantArray) -> Result<VariantArray> {
5858
5959 // NOTE: None/None at top-level is technically invalid, but the shredding spec requires us to
6060 // emit `Variant::Null` when a required value is missing.
61- let mut row_builder = make_unshred_variant_row_builder ( array. shredding_state ( ) . borrow ( ) ) ?
62- . unwrap_or_else ( || UnshredVariantRowBuilder :: null ( array. nulls ( ) ) ) ;
61+ let nulls = array. nulls ( ) ;
62+ let mut row_builder = UnshredVariantRowBuilder :: try_new_opt ( array. shredding_state ( ) . borrow ( ) ) ?
63+ . unwrap_or_else ( || UnshredVariantRowBuilder :: null ( nulls) ) ;
6364
6465 let metadata = array. metadata_field ( ) ;
6566 let mut value_builder = VariantValueArrayBuilder :: new ( array. len ( ) ) ;
66- let mut null_builder = NullBufferBuilder :: new ( array. len ( ) ) ;
6767 for i in 0 ..array. len ( ) {
6868 if array. is_null ( i) {
6969 value_builder. append_null ( ) ;
70- null_builder. append_null ( ) ;
7170 } else {
7271 let metadata = VariantMetadata :: new ( metadata. value ( i) ) ;
7372 let mut value_builder = value_builder. builder_ext ( & metadata) ;
7473 row_builder. append_row ( & mut value_builder, & metadata, i) ?;
75- null_builder. append_non_null ( ) ;
7674 }
7775 }
7876
@@ -81,7 +79,7 @@ pub fn unshred_variant(array: &VariantArray) -> Result<VariantArray> {
8179 metadata. clone ( ) ,
8280 Some ( value) ,
8381 None ,
84- null_builder . finish ( ) ,
82+ nulls . cloned ( ) ,
8583 ) )
8684}
8785
@@ -139,86 +137,86 @@ impl<'a> UnshredVariantRowBuilder<'a> {
139137 Self :: Null ( b) => b. append_row ( builder, metadata, index) ,
140138 }
141139 }
142- }
143-
144- /// Factory function to create the appropriate row builder for given field components
145- /// Returns None for None/None case - caller decides how to handle based on context
146- fn make_unshred_variant_row_builder < ' a > (
147- shredding_state : BorrowedShreddingState < ' a > ,
148- ) -> Result < Option < UnshredVariantRowBuilder < ' a > > > {
149- let value = shredding_state. value_field ( ) ;
150- let typed_value = shredding_state. typed_value_field ( ) ;
151- let Some ( typed_value) = typed_value else {
152- // Copy the value across directly, if present. Else caller decides what to do.
153- return Ok ( value
154- . map ( |v| UnshredVariantRowBuilder :: ValueOnly ( ValueOnlyUnshredVariantBuilder :: new ( v) ) ) ) ;
155- } ;
156140
157- // Has typed_value -> determine type and create appropriate builder
158- macro_rules! primitive_builder {
159- ( $enum_variant: ident, $cast_fn: ident) => {
160- UnshredVariantRowBuilder :: $enum_variant( UnshredPrimitiveRowBuilder :: new(
161- value,
162- typed_value. $cast_fn( ) ,
163- ) )
141+ /// Creates a new UnshredVariantRowBuilder from shredding state
142+ /// Returns None for None/None case - caller decides how to handle based on context
143+ fn try_new_opt ( shredding_state : BorrowedShreddingState < ' a > ) -> Result < Option < Self > > {
144+ let value = shredding_state. value_field ( ) ;
145+ let typed_value = shredding_state. typed_value_field ( ) ;
146+ let Some ( typed_value) = typed_value else {
147+ // Copy the value across directly, if present. Else caller decides what to do.
148+ return Ok ( value. map ( |v| Self :: ValueOnly ( ValueOnlyUnshredVariantBuilder :: new ( v) ) ) ) ;
164149 } ;
165- }
166150
167- let builder = match typed_value. data_type ( ) {
168- DataType :: Int8 => primitive_builder ! ( PrimitiveInt8 , as_primitive) ,
169- DataType :: Int16 => primitive_builder ! ( PrimitiveInt16 , as_primitive) ,
170- DataType :: Int32 => primitive_builder ! ( PrimitiveInt32 , as_primitive) ,
171- DataType :: Int64 => primitive_builder ! ( PrimitiveInt64 , as_primitive) ,
172- DataType :: Float32 => primitive_builder ! ( PrimitiveFloat32 , as_primitive) ,
173- DataType :: Float64 => primitive_builder ! ( PrimitiveFloat64 , as_primitive) ,
174- DataType :: Date32 => primitive_builder ! ( PrimitiveDate32 , as_primitive) ,
175- DataType :: Time64 ( TimeUnit :: Microsecond ) => {
176- primitive_builder ! ( PrimitiveTime64 , as_primitive)
177- }
178- DataType :: Time64 ( time_unit) => {
179- return Err ( ArrowError :: InvalidArgumentError ( format ! (
180- "Time64({time_unit}) is not a valid variant shredding type" ,
181- ) ) ) ;
182- }
183- DataType :: Timestamp ( TimeUnit :: Microsecond , timezone) => {
184- UnshredVariantRowBuilder :: TimestampMicrosecond ( TimestampUnshredRowBuilder :: new (
185- value,
186- typed_value. as_primitive ( ) ,
187- timezone. is_some ( ) ,
188- ) )
151+ // Has typed_value -> determine type and create appropriate builder
152+ macro_rules! primitive_builder {
153+ ( $enum_variant: ident, $cast_fn: ident) => {
154+ Self :: $enum_variant( UnshredPrimitiveRowBuilder :: new(
155+ value,
156+ typed_value. $cast_fn( ) ,
157+ ) )
158+ } ;
189159 }
190- DataType :: Timestamp ( TimeUnit :: Nanosecond , timezone) => {
191- UnshredVariantRowBuilder :: TimestampNanosecond ( TimestampUnshredRowBuilder :: new (
160+
161+ let builder = match typed_value. data_type ( ) {
162+ DataType :: Int8 => primitive_builder ! ( PrimitiveInt8 , as_primitive) ,
163+ DataType :: Int16 => primitive_builder ! ( PrimitiveInt16 , as_primitive) ,
164+ DataType :: Int32 => primitive_builder ! ( PrimitiveInt32 , as_primitive) ,
165+ DataType :: Int64 => primitive_builder ! ( PrimitiveInt64 , as_primitive) ,
166+ DataType :: Float32 => primitive_builder ! ( PrimitiveFloat32 , as_primitive) ,
167+ DataType :: Float64 => primitive_builder ! ( PrimitiveFloat64 , as_primitive) ,
168+ DataType :: Date32 => primitive_builder ! ( PrimitiveDate32 , as_primitive) ,
169+ DataType :: Time64 ( TimeUnit :: Microsecond ) => {
170+ primitive_builder ! ( PrimitiveTime64 , as_primitive)
171+ }
172+ DataType :: Time64 ( time_unit) => {
173+ return Err ( ArrowError :: InvalidArgumentError ( format ! (
174+ "Time64({time_unit}) is not a valid variant shredding type" ,
175+ ) ) ) ;
176+ }
177+ DataType :: Timestamp ( TimeUnit :: Microsecond , timezone) => {
178+ Self :: TimestampMicrosecond ( TimestampUnshredRowBuilder :: new (
179+ value,
180+ typed_value. as_primitive ( ) ,
181+ timezone. is_some ( ) ,
182+ ) )
183+ }
184+ DataType :: Timestamp ( TimeUnit :: Nanosecond , timezone) => {
185+ Self :: TimestampNanosecond ( TimestampUnshredRowBuilder :: new (
186+ value,
187+ typed_value. as_primitive ( ) ,
188+ timezone. is_some ( ) ,
189+ ) )
190+ }
191+ DataType :: Timestamp ( time_unit, _) => {
192+ return Err ( ArrowError :: InvalidArgumentError ( format ! (
193+ "Timestamp({time_unit}) is not a valid variant shredding type" ,
194+ ) ) ) ;
195+ }
196+ DataType :: Boolean => primitive_builder ! ( PrimitiveBoolean , as_boolean) ,
197+ DataType :: Utf8 => primitive_builder ! ( PrimitiveString , as_string) ,
198+ DataType :: BinaryView => primitive_builder ! ( PrimitiveBinaryView , as_binary_view) ,
199+ DataType :: FixedSizeBinary ( 16 ) => {
200+ primitive_builder ! ( PrimitiveUuid , as_fixed_size_binary)
201+ }
202+ DataType :: FixedSizeBinary ( size) => {
203+ return Err ( ArrowError :: InvalidArgumentError ( format ! (
204+ "FixedSizeBinary({size}) is not a valid variant shredding type" ,
205+ ) ) ) ;
206+ }
207+ DataType :: Struct ( _) => Self :: Struct ( StructUnshredVariantBuilder :: try_new (
192208 value,
193- typed_value. as_primitive ( ) ,
194- timezone. is_some ( ) ,
195- ) )
196- }
197- DataType :: Timestamp ( time_unit, _) => {
198- return Err ( ArrowError :: InvalidArgumentError ( format ! (
199- "Timestamp({time_unit}) is not a valid variant shredding type" ,
200- ) ) ) ;
201- }
202- DataType :: Boolean => primitive_builder ! ( PrimitiveBoolean , as_boolean) ,
203- DataType :: Utf8 => primitive_builder ! ( PrimitiveString , as_string) ,
204- DataType :: BinaryView => primitive_builder ! ( PrimitiveBinaryView , as_binary_view) ,
205- DataType :: FixedSizeBinary ( 16 ) => primitive_builder ! ( PrimitiveUuid , as_fixed_size_binary) ,
206- DataType :: FixedSizeBinary ( size) => {
207- return Err ( ArrowError :: InvalidArgumentError ( format ! (
208- "FixedSizeBinary({size}) is not a valid variant shredding type" ,
209- ) ) ) ;
210- }
211- DataType :: Struct ( _) => UnshredVariantRowBuilder :: Struct (
212- StructUnshredVariantBuilder :: try_new ( value, typed_value. as_struct ( ) ) ?,
213- ) ,
214- _ => {
215- return Err ( ArrowError :: NotYetImplemented ( format ! (
216- "Unshredding not yet supported for type: {}" ,
217- typed_value. data_type( )
218- ) ) ) ;
219- }
220- } ;
221- Ok ( Some ( builder) )
209+ typed_value. as_struct ( ) ,
210+ ) ?) ,
211+ _ => {
212+ return Err ( ArrowError :: NotYetImplemented ( format ! (
213+ "Unshredding not yet supported for type: {}" ,
214+ typed_value. data_type( )
215+ ) ) ) ;
216+ }
217+ } ;
218+ Ok ( Some ( builder) )
219+ }
222220}
223221
224222/// Builder for arrays with neither typed_value nor value (all NULL/Variant::Null)
@@ -466,7 +464,7 @@ impl<'a> StructUnshredVariantBuilder<'a> {
466464 field_array. data_type( )
467465 ) ) ) ;
468466 } ;
469- let field_unshredder = make_unshred_variant_row_builder ( field_array. try_into ( ) ?) ?;
467+ let field_unshredder = UnshredVariantRowBuilder :: try_new_opt ( field_array. try_into ( ) ?) ?;
470468 field_unshredders. insert ( field. name ( ) . as_ref ( ) , field_unshredder) ;
471469 }
472470
@@ -518,3 +516,6 @@ impl<'a> StructUnshredVariantBuilder<'a> {
518516 Ok ( ( ) )
519517 }
520518}
519+
520+ // TODO: This code is covered by tests in `parquet/tests/variant_integration.rs`. Does that suffice?
521+ // Or do we also need targeted stand-alone unit tests for full coverage?
0 commit comments