@@ -29,8 +29,9 @@ use arrow_array::{
29
29
TimestampNanosecondBufferBuilder , TimestampSecondBufferBuilder ,
30
30
} ,
31
31
ArrayRef , BooleanArray , Decimal128Array , Decimal256Array , Float32Array , Float64Array ,
32
- Int32Array , Int64Array , TimestampMicrosecondArray , TimestampMillisecondArray ,
33
- TimestampNanosecondArray , TimestampSecondArray , UInt32Array , UInt64Array ,
32
+ Int16Array , Int32Array , Int64Array , Int8Array , TimestampMicrosecondArray ,
33
+ TimestampMillisecondArray , TimestampNanosecondArray , TimestampSecondArray , UInt16Array ,
34
+ UInt32Array , UInt64Array , UInt8Array ,
34
35
} ;
35
36
use arrow_buffer:: { i256, BooleanBuffer , Buffer } ;
36
37
use arrow_data:: ArrayDataBuilder ;
@@ -261,6 +262,45 @@ where
261
262
// - date64: cast int32 to date32, then date32 to date64.
262
263
// - decimal: cast int32 to decimal, int64 to decimal
263
264
let array = match target_type {
265
+ // Using `arrow_cast::cast` has been found to be very slow for converting
266
+ // INT32 physical type to lower bitwidth logical types. Since rust casts
267
+ // are infallible, instead use `unary` which is much faster (by up to 40%).
268
+ // One consequence of this approach is that some malformed integer columns
269
+ // will return (an arguably correct) result rather than null.
270
+ // See https://github.com/apache/arrow-rs/issues/7040 for a discussion of this
271
+ // issue.
272
+ ArrowType :: UInt8 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
273
+ let array = array
274
+ . as_any ( )
275
+ . downcast_ref :: < Int32Array > ( )
276
+ . unwrap ( )
277
+ . unary ( |i| i as u8 ) as UInt8Array ;
278
+ Arc :: new ( array) as ArrayRef
279
+ }
280
+ ArrowType :: Int8 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
281
+ let array = array
282
+ . as_any ( )
283
+ . downcast_ref :: < Int32Array > ( )
284
+ . unwrap ( )
285
+ . unary ( |i| i as i8 ) as Int8Array ;
286
+ Arc :: new ( array) as ArrayRef
287
+ }
288
+ ArrowType :: UInt16 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
289
+ let array = array
290
+ . as_any ( )
291
+ . downcast_ref :: < Int32Array > ( )
292
+ . unwrap ( )
293
+ . unary ( |i| i as u16 ) as UInt16Array ;
294
+ Arc :: new ( array) as ArrayRef
295
+ }
296
+ ArrowType :: Int16 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
297
+ let array = array
298
+ . as_any ( )
299
+ . downcast_ref :: < Int32Array > ( )
300
+ . unwrap ( )
301
+ . unary ( |i| i as i16 ) as Int16Array ;
302
+ Arc :: new ( array) as ArrayRef
303
+ }
264
304
ArrowType :: Date64 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
265
305
// this is cheap as it internally reinterprets the data
266
306
let a = arrow_cast:: cast ( & array, & ArrowType :: Date32 ) ?;
0 commit comments