Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 49 additions & 99 deletions parquet/src/arrow/array_reader/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ use arrow_array::{
TimestampMicrosecondBufferBuilder, TimestampMillisecondBufferBuilder,
TimestampNanosecondBufferBuilder, TimestampSecondBufferBuilder,
},
cast::AsArray,
types::*,
};
use arrow_buffer::{BooleanBuffer, Buffer, i256};
use arrow_data::ArrayDataBuilder;
Expand Down Expand Up @@ -274,53 +276,36 @@ where
// See https://github.com/apache/arrow-rs/issues/7040 for a discussion of this
// issue.
ArrowType::UInt8 if *(array.data_type()) == ArrowType::Int32 => {
let array = array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as u8) as UInt8Array;
let array = array.as_primitive::<Int32Type>().unary(|i| i as u8) as UInt8Array;
Arc::new(array) as ArrayRef
}
ArrowType::Int8 if *(array.data_type()) == ArrowType::Int32 => {
let array = array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as i8) as Int8Array;
let array = array.as_primitive::<Int32Type>().unary(|i| i as i8) as Int8Array;
Arc::new(array) as ArrayRef
}
ArrowType::UInt16 if *(array.data_type()) == ArrowType::Int32 => {
let array = array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as u16) as UInt16Array;
let array = array.as_primitive::<Int32Type>().unary(|i| i as u16) as UInt16Array;
Arc::new(array) as ArrayRef
}
ArrowType::Int16 if *(array.data_type()) == ArrowType::Int32 => {
let array = array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as i16) as Int16Array;
let array = array.as_primitive::<Int32Type>().unary(|i| i as i16) as Int16Array;
Arc::new(array) as ArrayRef
}
ArrowType::Date64 if *(array.data_type()) == ArrowType::Int32 => {
// this is cheap as it internally reinterprets the data
let a = arrow_cast::cast(&array, &ArrowType::Date32)?;
let a = array
.as_primitive::<Int32Type>()
.reinterpret_cast::<Date32Type>();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When not performing a widening / truncating conversion, reinterpret_cast will be faster

arrow_cast::cast(&a, target_type)?
}
ArrowType::Decimal64(p, s) if *(array.data_type()) == ArrowType::Int32 => {
// Apply conversion to all elements regardless of null slots as the conversion
// to `i64` is infallible. This improves performance by avoiding a branch in
// the inner loop (see docs for `PrimitiveArray::unary`).
let array = match array.data_type() {
ArrowType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as i64)
as Decimal64Array,
ArrowType::Int32 => {
array.as_primitive::<Int32Type>().unary(|i| i as i64) as Decimal64Array
}
_ => {
return Err(arrow_err!(
"Cannot convert {:?} to decimal",
Expand All @@ -334,43 +319,32 @@ where
}
ArrowType::Decimal128(p, s) => {
// See above comment. Conversion to `i128` is likewise infallible.
let array = match array.data_type() {
ArrowType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as i128)
as Decimal128Array,
ArrowType::Int64 => array
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.unary(|i| i as i128)
as Decimal128Array,
_ => {
return Err(arrow_err!(
"Cannot convert {:?} to decimal",
array.data_type()
));
let array =
match array.data_type() {
ArrowType::Int32 => array.as_primitive::<Int32Type>().unary(|i| i as i128)
as Decimal128Array,
ArrowType::Int64 => array.as_primitive::<Int64Type>().unary(|i| i as i128)
as Decimal128Array,
_ => {
return Err(arrow_err!(
"Cannot convert {:?} to decimal",
array.data_type()
));
}
}
}
.with_precision_and_scale(*p, *s)?;
.with_precision_and_scale(*p, *s)?;

Arc::new(array) as ArrayRef
}
ArrowType::Decimal256(p, s) => {
// See above comment. Conversion to `i256` is likewise infallible.
let array = match array.data_type() {
ArrowType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.as_primitive::<Int32Type>()
.unary(|i| i256::from_i128(i as i128))
as Decimal256Array,
ArrowType::Int64 => array
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.as_primitive::<Int64Type>()
.unary(|i| i256::from_i128(i as i128))
as Decimal256Array,
_ => {
Expand All @@ -388,11 +362,8 @@ where
ArrowType::Decimal32(p, s) => {
let array = match array.data_type() {
ArrowType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i)
as Decimal32Array,
.as_primitive::<Int32Type>()
.reinterpret_cast::<Decimal32Type>(),
_ => {
return Err(arrow_err!(
"Cannot convert {:?} to decimal dictionary",
Expand All @@ -406,18 +377,12 @@ where
}
ArrowType::Decimal64(p, s) => {
let array = match array.data_type() {
ArrowType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as i64)
as Decimal64Array,
ArrowType::Int32 => {
array.as_primitive::<Int32Type>().unary(|i| i as i64) as Decimal64Array
}
ArrowType::Int64 => array
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.unary(|i| i)
as Decimal64Array,
.as_primitive::<Int64Type>()
.reinterpret_cast::<Decimal64Type>(),
_ => {
return Err(arrow_err!(
"Cannot convert {:?} to decimal dictionary",
Expand All @@ -431,17 +396,9 @@ where
}
ArrowType::Decimal128(p, s) => {
let array = match array.data_type() {
ArrowType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(|i| i as i128)
ArrowType::Int32 => array.as_primitive::<Int32Type>().unary(|i| i as i128)
as Decimal128Array,
ArrowType::Int64 => array
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.unary(|i| i as i128)
ArrowType::Int64 => array.as_primitive::<Int64Type>().unary(|i| i as i128)
as Decimal128Array,
_ => {
return Err(arrow_err!(
Expand All @@ -455,27 +412,20 @@ where
arrow_cast::cast(&array, target_type)?
}
ArrowType::Decimal256(p, s) => {
let array = match array.data_type() {
ArrowType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.unary(i256::from)
as Decimal256Array,
ArrowType::Int64 => array
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.unary(i256::from)
as Decimal256Array,
_ => {
return Err(arrow_err!(
"Cannot convert {:?} to decimal dictionary",
array.data_type()
));
let array =
match array.data_type() {
ArrowType::Int32 => array.as_primitive::<Int32Type>().unary(i256::from)
as Decimal256Array,
ArrowType::Int64 => array.as_primitive::<Int64Type>().unary(i256::from)
as Decimal256Array,
_ => {
return Err(arrow_err!(
"Cannot convert {:?} to decimal dictionary",
array.data_type()
));
}
}
}
.with_precision_and_scale(*p, *s)?;
.with_precision_and_scale(*p, *s)?;

arrow_cast::cast(&array, target_type)?
}
Expand Down
34 changes: 12 additions & 22 deletions parquet/src/arrow/arrow_writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1141,9 +1141,10 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
ArrowDataType::Date64 => {
// If the column is a Date64, we cast it to a Date32, and then interpret that as Int32
let array = arrow_cast::cast(column, &ArrowDataType::Date32)?;
let array = arrow_cast::cast(&array, &ArrowDataType::Int32)?;
let array = array
.as_primitive::<Date32Type>()
.reinterpret_cast::<Int32Type>();

let array = array.as_primitive::<Int32Type>();
write_primitive(typed, array.values(), levels)
}
ArrowDataType::UInt32 => {
Expand Down Expand Up @@ -1233,9 +1234,10 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
ColumnWriter::Int64ColumnWriter(typed) => {
match column.data_type() {
ArrowDataType::Date64 => {
let array = arrow_cast::cast(column, &ArrowDataType::Int64)?;
let array = column
.as_primitive::<Date64Type>()
.reinterpret_cast::<Int64Type>();

let array = array.as_primitive::<Int64Type>();
write_primitive(typed, array.values(), levels)
}
ArrowDataType::Int64 => {
Expand All @@ -1252,7 +1254,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
ArrowDataType::Decimal64(_, _) => {
let array = column
.as_primitive::<Decimal64Type>()
.unary::<_, Int64Type>(|v| v);
.reinterpret_cast::<Int64Type>();
write_primitive(typed, array.values(), levels)
}
ArrowDataType::Decimal128(_, _) => {
Expand All @@ -1274,7 +1276,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
let array = arrow_cast::cast(column, value_type)?;
let array = array
.as_primitive::<Decimal64Type>()
.unary::<_, Int64Type>(|v| v);
.reinterpret_cast::<Int64Type>();
write_primitive(typed, array.values(), levels)
}
ArrowDataType::Decimal128(_, _) => {
Expand Down Expand Up @@ -1322,17 +1324,11 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
let bytes = match column.data_type() {
ArrowDataType::Interval(interval_unit) => match interval_unit {
IntervalUnit::YearMonth => {
let array = column
.as_any()
.downcast_ref::<arrow_array::IntervalYearMonthArray>()
.unwrap();
let array = column.as_primitive::<IntervalYearMonthType>();
get_interval_ym_array_slice(array, indices)
}
IntervalUnit::DayTime => {
let array = column
.as_any()
.downcast_ref::<arrow_array::IntervalDayTimeArray>()
.unwrap();
let array = column.as_primitive::<IntervalDayTimeType>();
get_interval_dt_array_slice(array, indices)
}
_ => {
Expand All @@ -1342,10 +1338,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
}
},
ArrowDataType::FixedSizeBinary(_) => {
let array = column
.as_any()
.downcast_ref::<arrow_array::FixedSizeBinaryArray>()
.unwrap();
let array = column.as_fixed_size_binary();
get_fsb_array_slice(array, indices)
}
ArrowDataType::Decimal32(_, _) => {
Expand All @@ -1361,10 +1354,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
get_decimal_128_array_slice(array, indices)
}
ArrowDataType::Decimal256(_, _) => {
let array = column
.as_any()
.downcast_ref::<arrow_array::Decimal256Array>()
.unwrap();
let array = column.as_primitive::<Decimal256Type>();
get_decimal_256_array_slice(array, indices)
}
ArrowDataType::Float16 => {
Expand Down
3 changes: 1 addition & 2 deletions parquet/src/arrow/buffer/dictionary_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,7 @@ impl<K: ArrowNativeType + Ord, V: OffsetSizeTrait> DictionaryBuffer<K, V> {

// This will compute a new dictionary
let array =
arrow_cast::cast(&values.into_array(null_buffer, value_type), data_type)
.expect("cast should be infallible");
arrow_cast::cast(&values.into_array(null_buffer, value_type), data_type)?;

Ok(array)
}
Expand Down
Loading