1717
1818use std:: sync:: Arc ;
1919
20+ use crate :: type_conversion:: {
21+ decimal_to_variant_decimal, generic_conversion_array, non_generic_conversion_array,
22+ primitive_conversion_array,
23+ } ;
2024use crate :: { VariantArray , VariantArrayBuilder } ;
2125use arrow:: array:: {
2226 Array , AsArray , TimestampMicrosecondArray , TimestampMillisecondArray , TimestampNanosecondArray ,
@@ -37,60 +41,10 @@ use arrow::temporal_conversions::{
3741} ;
3842use arrow_schema:: { ArrowError , DataType , TimeUnit } ;
3943use chrono:: { DateTime , NaiveDate , NaiveDateTime , NaiveTime , TimeZone , Utc } ;
40- use half:: f16;
4144use parquet_variant:: {
4245 Variant , VariantBuilder , VariantDecimal16 , VariantDecimal4 , VariantDecimal8 ,
4346} ;
4447
45- /// Convert the input array of a specific primitive type to a `VariantArray`
46- /// row by row
47- macro_rules! primitive_conversion {
48- ( $t: ty, $input: expr, $builder: expr) => { {
49- let array = $input. as_primitive:: <$t>( ) ;
50- for i in 0 ..array. len( ) {
51- if array. is_null( i) {
52- $builder. append_null( ) ;
53- continue ;
54- }
55- $builder. append_variant( Variant :: from( array. value( i) ) ) ;
56- }
57- } } ;
58- }
59-
60- /// Convert the input array to a `VariantArray` row by row, using `method`
61- /// requiring a generic type to downcast the generic array to a specific
62- /// array type and `cast_fn` to transform each element to a type compatible with Variant
63- macro_rules! generic_conversion {
64- ( $t: ty, $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
65- let array = $input. $method:: <$t>( ) ;
66- for i in 0 ..array. len( ) {
67- if array. is_null( i) {
68- $builder. append_null( ) ;
69- continue ;
70- }
71- let cast_value = $cast_fn( array. value( i) ) ;
72- $builder. append_variant( Variant :: from( cast_value) ) ;
73- }
74- } } ;
75- }
76-
77- /// Convert the input array to a `VariantArray` row by row, using `method`
78- /// not requiring a generic type to downcast the generic array to a specific
79- /// array type and `cast_fn` to transform each element to a type compatible with Variant
80- macro_rules! non_generic_conversion {
81- ( $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
82- let array = $input. $method( ) ;
83- for i in 0 ..array. len( ) {
84- if array. is_null( i) {
85- $builder. append_null( ) ;
86- continue ;
87- }
88- let cast_value = $cast_fn( array. value( i) ) ;
89- $builder. append_variant( Variant :: from( cast_value) ) ;
90- }
91- } } ;
92- }
93-
9448fn convert_timestamp (
9549 time_unit : & TimeUnit ,
9650 time_zone : & Option < Arc < str > > ,
@@ -159,61 +113,6 @@ fn convert_timestamp(
159113 }
160114}
161115
162- /// Convert a decimal value to a `VariantDecimal`
163- macro_rules! decimal_to_variant_decimal {
164- ( $v: ident, $scale: expr, $value_type: ty, $variant_type: ty) => {
165- if * $scale < 0 {
166- // For negative scale, we need to multiply the value by 10^|scale|
167- // For example: 123 with scale -2 becomes 12300
168- let multiplier = ( 10 as $value_type) . pow( ( -* $scale) as u32 ) ;
169- // Check for overflow
170- if $v > 0 && $v > <$value_type>:: MAX / multiplier {
171- return Variant :: Null ;
172- }
173- if $v < 0 && $v < <$value_type>:: MIN / multiplier {
174- return Variant :: Null ;
175- }
176- <$variant_type>:: try_new( $v * multiplier, 0 )
177- . map( |v| v. into( ) )
178- . unwrap_or( Variant :: Null )
179- } else {
180- <$variant_type>:: try_new( $v, * $scale as u8 )
181- . map( |v| v. into( ) )
182- . unwrap_or( Variant :: Null )
183- }
184- } ;
185- }
186-
187- /// Convert arrays that don't need generic type parameters
188- macro_rules! cast_conversion_nongeneric {
189- ( $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
190- let array = $input. $method( ) ;
191- for i in 0 ..array. len( ) {
192- if array. is_null( i) {
193- $builder. append_null( ) ;
194- continue ;
195- }
196- let cast_value = $cast_fn( array. value( i) ) ;
197- $builder. append_variant( Variant :: from( cast_value) ) ;
198- }
199- } } ;
200- }
201-
202- /// Convert string arrays using the offset size as the type parameter
203- macro_rules! cast_conversion_string {
204- ( $offset_type: ty, $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
205- let array = $input. $method:: <$offset_type>( ) ;
206- for i in 0 ..array. len( ) {
207- if array. is_null( i) {
208- $builder. append_null( ) ;
209- continue ;
210- }
211- let cast_value = $cast_fn( array. value( i) ) ;
212- $builder. append_variant( Variant :: from( cast_value) ) ;
213- }
214- } } ;
215- }
216-
217116/// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you
218117/// need to convert a specific data type
219118///
@@ -250,58 +149,52 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
250149 // todo: handle other types like Boolean, Date, Timestamp, etc.
251150 match input_type {
252151 DataType :: Boolean => {
253- non_generic_conversion ! ( as_boolean, |v| v, input , builder) ;
152+ non_generic_conversion_array ! ( input . as_boolean( ) , |v| v, builder) ;
254153 }
255154 DataType :: Binary => {
256- generic_conversion ! ( BinaryType , as_bytes, |v| v, input, builder) ;
155+ generic_conversion_array ! ( BinaryType , as_bytes, |v| v, input, builder) ;
257156 }
258157 DataType :: LargeBinary => {
259- generic_conversion ! ( LargeBinaryType , as_bytes, |v| v, input, builder) ;
158+ generic_conversion_array ! ( LargeBinaryType , as_bytes, |v| v, input, builder) ;
260159 }
261160 DataType :: BinaryView => {
262- generic_conversion ! ( BinaryViewType , as_byte_view, |v| v, input, builder) ;
161+ generic_conversion_array ! ( BinaryViewType , as_byte_view, |v| v, input, builder) ;
263162 }
264163 DataType :: Int8 => {
265- primitive_conversion ! ( Int8Type , input, builder) ;
164+ primitive_conversion_array ! ( Int8Type , input, builder) ;
266165 }
267166 DataType :: Int16 => {
268- primitive_conversion ! ( Int16Type , input, builder) ;
167+ primitive_conversion_array ! ( Int16Type , input, builder) ;
269168 }
270169 DataType :: Int32 => {
271- primitive_conversion ! ( Int32Type , input, builder) ;
170+ primitive_conversion_array ! ( Int32Type , input, builder) ;
272171 }
273172 DataType :: Int64 => {
274- primitive_conversion ! ( Int64Type , input, builder) ;
173+ primitive_conversion_array ! ( Int64Type , input, builder) ;
275174 }
276175 DataType :: UInt8 => {
277- primitive_conversion ! ( UInt8Type , input, builder) ;
176+ primitive_conversion_array ! ( UInt8Type , input, builder) ;
278177 }
279178 DataType :: UInt16 => {
280- primitive_conversion ! ( UInt16Type , input, builder) ;
179+ primitive_conversion_array ! ( UInt16Type , input, builder) ;
281180 }
282181 DataType :: UInt32 => {
283- primitive_conversion ! ( UInt32Type , input, builder) ;
182+ primitive_conversion_array ! ( UInt32Type , input, builder) ;
284183 }
285184 DataType :: UInt64 => {
286- primitive_conversion ! ( UInt64Type , input, builder) ;
185+ primitive_conversion_array ! ( UInt64Type , input, builder) ;
287186 }
288187 DataType :: Float16 => {
289- generic_conversion ! (
290- Float16Type ,
291- as_primitive,
292- |v: f16| -> f32 { v. into( ) } ,
293- input,
294- builder
295- ) ;
188+ generic_conversion_array ! ( Float16Type , as_primitive, f32 :: from, input, builder) ;
296189 }
297190 DataType :: Float32 => {
298- primitive_conversion ! ( Float32Type , input, builder) ;
191+ primitive_conversion_array ! ( Float32Type , input, builder) ;
299192 }
300193 DataType :: Float64 => {
301- primitive_conversion ! ( Float64Type , input, builder) ;
194+ primitive_conversion_array ! ( Float64Type , input, builder) ;
302195 }
303196 DataType :: Decimal32 ( _, scale) => {
304- generic_conversion ! (
197+ generic_conversion_array ! (
305198 Decimal32Type ,
306199 as_primitive,
307200 |v| decimal_to_variant_decimal!( v, scale, i32 , VariantDecimal4 ) ,
@@ -310,7 +203,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
310203 ) ;
311204 }
312205 DataType :: Decimal64 ( _, scale) => {
313- generic_conversion ! (
206+ generic_conversion_array ! (
314207 Decimal64Type ,
315208 as_primitive,
316209 |v| decimal_to_variant_decimal!( v, scale, i64 , VariantDecimal8 ) ,
@@ -319,7 +212,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
319212 ) ;
320213 }
321214 DataType :: Decimal128 ( _, scale) => {
322- generic_conversion ! (
215+ generic_conversion_array ! (
323216 Decimal128Type ,
324217 as_primitive,
325218 |v| decimal_to_variant_decimal!( v, scale, i128 , VariantDecimal16 ) ,
@@ -328,7 +221,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
328221 ) ;
329222 }
330223 DataType :: Decimal256 ( _, scale) => {
331- generic_conversion ! (
224+ generic_conversion_array ! (
332225 Decimal256Type ,
333226 as_primitive,
334227 |v: i256| {
@@ -346,7 +239,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
346239 ) ;
347240 }
348241 DataType :: FixedSizeBinary ( _) => {
349- non_generic_conversion ! ( as_fixed_size_binary, |v| v, input , builder) ;
242+ non_generic_conversion_array ! ( input . as_fixed_size_binary( ) , |v| v, builder) ;
350243 }
351244 DataType :: Null => {
352245 for _ in 0 ..input. len ( ) {
@@ -359,7 +252,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
359252 DataType :: Time32 ( unit) => {
360253 match * unit {
361254 TimeUnit :: Second => {
362- generic_conversion ! (
255+ generic_conversion_array ! (
363256 Time32SecondType ,
364257 as_primitive,
365258 // nano second are always 0
@@ -369,7 +262,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
369262 ) ;
370263 }
371264 TimeUnit :: Millisecond => {
372- generic_conversion ! (
265+ generic_conversion_array ! (
373266 Time32MillisecondType ,
374267 as_primitive,
375268 |v| NaiveTime :: from_num_seconds_from_midnight_opt(
@@ -392,7 +285,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
392285 DataType :: Time64 ( unit) => {
393286 match * unit {
394287 TimeUnit :: Microsecond => {
395- generic_conversion ! (
288+ generic_conversion_array ! (
396289 Time64MicrosecondType ,
397290 as_primitive,
398291 |v| NaiveTime :: from_num_seconds_from_midnight_opt(
@@ -405,7 +298,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
405298 ) ;
406299 }
407300 TimeUnit :: Nanosecond => {
408- generic_conversion ! (
301+ generic_conversion_array ! (
409302 Time64NanosecondType ,
410303 as_primitive,
411304 |v| NaiveTime :: from_num_seconds_from_midnight_opt(
@@ -433,13 +326,13 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
433326 ) ) ;
434327 }
435328 DataType :: Utf8 => {
436- cast_conversion_string ! ( i32 , as_string, |v| v, input, builder) ;
329+ generic_conversion_array ! ( i32 , as_string, |v| v, input, builder) ;
437330 }
438331 DataType :: LargeUtf8 => {
439- cast_conversion_string ! ( i64 , as_string, |v| v, input, builder) ;
332+ generic_conversion_array ! ( i64 , as_string, |v| v, input, builder) ;
440333 }
441334 DataType :: Utf8View => {
442- cast_conversion_nongeneric ! ( as_string_view, |v| v, input , builder) ;
335+ non_generic_conversion_array ! ( input . as_string_view( ) , |v| v, builder) ;
443336 }
444337 DataType :: Struct ( _) => {
445338 let struct_array = input. as_struct ( ) ;
@@ -487,7 +380,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
487380 }
488381 }
489382 DataType :: Date32 => {
490- generic_conversion ! (
383+ generic_conversion_array ! (
491384 Date32Type ,
492385 as_primitive,
493386 |v: i32 | -> NaiveDate { Date32Type :: to_naive_date( v) } ,
@@ -496,7 +389,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
496389 ) ;
497390 }
498391 DataType :: Date64 => {
499- generic_conversion ! (
392+ generic_conversion_array ! (
500393 Date64Type ,
501394 as_primitive,
502395 |v: i64 | { Date64Type :: to_naive_date_opt( v) . unwrap( ) } ,
@@ -723,6 +616,7 @@ mod tests {
723616 use arrow_schema:: {
724617 DECIMAL128_MAX_PRECISION , DECIMAL32_MAX_PRECISION , DECIMAL64_MAX_PRECISION ,
725618 } ;
619+ use half:: f16;
726620 use parquet_variant:: { Variant , VariantDecimal16 } ;
727621 use std:: { sync:: Arc , vec} ;
728622
0 commit comments