From 251d693e28ccf4a847f051ffeacf433d2c05dc0a Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 3 Aug 2022 09:52:23 +0800 Subject: [PATCH 1/3] const fn get_data_type for binary and string Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/array/array_binary.rs | 4 +--- arrow/src/array/array_string.rs | 4 +--- arrow/src/array/builder/generic_binary_builder.rs | 15 ++++----------- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index 3e49f60318ce..eb03e3e037ee 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -38,9 +38,7 @@ pub struct GenericBinaryArray { impl GenericBinaryArray { /// Get the data type of the array. - // Declare this function as `pub const fn` after - // https://github.com/rust-lang/rust/issues/93706 is merged. - pub fn get_data_type() -> DataType { + pub const fn get_data_type() -> DataType { if OffsetSize::IS_LARGE { DataType::LargeBinary } else { diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs index c332aa197688..d52841b3e068 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow/src/array/array_string.rs @@ -40,9 +40,7 @@ pub struct GenericStringArray { impl GenericStringArray { /// Get the data type of the array. - // Declare this function as `pub const fn` after - // https://github.com/rust-lang/rust/issues/93706 is merged. - pub fn get_data_type() -> DataType { + pub const fn get_data_type() -> DataType { if OffsetSize::IS_LARGE { DataType::LargeUtf8 } else { diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow/src/array/builder/generic_binary_builder.rs index 8f242243cd7c..8333f275cf07 100644 --- a/arrow/src/array/builder/generic_binary_builder.rs +++ b/arrow/src/array/builder/generic_binary_builder.rs @@ -15,12 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::{ - array::{ - ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait, - UInt8BufferBuilder, - }, - datatypes::DataType, +use crate::array::{ + ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait, + UInt8BufferBuilder, }; use std::any::Any; use std::sync::Arc; @@ -80,11 +77,7 @@ impl GenericBinaryBuilder { /// Builds the [`GenericBinaryArray`] and reset this builder. pub fn finish(&mut self) -> GenericBinaryArray { - let array_type = if OffsetSize::IS_LARGE { - DataType::LargeBinary - } else { - DataType::Binary - }; + let array_type = GenericBinaryArray::::get_data_type(); let array_builder = ArrayDataBuilder::new(array_type) .len(self.len()) .add_buffer(self.offsets_builder.finish()) From b0d7818d5e84fa5ecd6356d08622e2891daa31fc Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 3 Aug 2022 12:09:14 +0800 Subject: [PATCH 2/3] const value instead of const fn Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/array/array_binary.rs | 24 ++++++++--------- arrow/src/array/array_string.rs | 27 ++++++++----------- .../array/builder/generic_binary_builder.rs | 2 +- arrow/src/compute/kernels/concat_elements.rs | 4 +-- arrow/src/compute/kernels/substring.rs | 12 ++++----- arrow/src/compute/kernels/take.rs | 11 ++++---- 6 files changed, 36 insertions(+), 44 deletions(-) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index eb03e3e037ee..5d5e24db3af2 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -37,14 +37,12 @@ pub struct GenericBinaryArray { } impl GenericBinaryArray { - /// Get the data type of the array. - pub const fn get_data_type() -> DataType { - if OffsetSize::IS_LARGE { - DataType::LargeBinary - } else { - DataType::Binary - } - } + /// Data type of the array. + pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE { + DataType::LargeBinary + } else { + DataType::Binary + }; /// Returns the length for value at index `i`. #[inline] @@ -156,7 +154,7 @@ impl GenericBinaryArray { "The child array cannot contain null values." ); - let builder = ArrayData::builder(Self::get_data_type()) + let builder = ArrayData::builder(Self::DATA_TYPE) .len(v.len()) .offset(v.offset()) .add_buffer(v.data_ref().buffers()[0].clone()) @@ -195,7 +193,7 @@ impl GenericBinaryArray { assert!(!offsets.is_empty()); // wrote at least one let actual_len = (offsets.len() / std::mem::size_of::()) - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(actual_len) .add_buffer(offsets.into()) .add_buffer(values.into()); @@ -274,7 +272,7 @@ impl From for GenericBinaryArray Self { assert_eq!( data.data_type(), - &Self::get_data_type(), + &Self::DATA_TYPE, "[Large]BinaryArray expects Datatype::[Large]Binary" ); assert_eq!( @@ -351,7 +349,7 @@ where // calculate actual data_len, which may be different from the iterator's upper bound let data_len = offsets.len() - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(data_len) .add_buffer(Buffer::from_slice_ref(&offsets)) .add_buffer(Buffer::from_slice_ref(&values)) @@ -596,7 +594,7 @@ mod tests { let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap()); // Array data: ["hello", "", "parquet"] - let array_data1 = ArrayData::builder(GenericBinaryArray::::get_data_type()) + let array_data1 = ArrayData::builder(GenericBinaryArray::::DATA_TYPE) .len(3) .add_buffer(Buffer::from_slice_ref(&offsets)) .add_buffer(Buffer::from_slice_ref(&values)) diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs index d52841b3e068..a01ccd1f916e 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow/src/array/array_string.rs @@ -39,14 +39,12 @@ pub struct GenericStringArray { } impl GenericStringArray { - /// Get the data type of the array. - pub const fn get_data_type() -> DataType { - if OffsetSize::IS_LARGE { - DataType::LargeUtf8 - } else { - DataType::Utf8 - } - } + /// Data type of the array. + pub const DATA_TYPE: DataType = if OffsetSize::IS_LARGE { + DataType::LargeUtf8 + } else { + DataType::Utf8 + }; /// Returns the length for the element at index `i`. #[inline] @@ -146,7 +144,7 @@ impl GenericStringArray { "The child array cannot contain null values." ); - let builder = ArrayData::builder(Self::get_data_type()) + let builder = ArrayData::builder(Self::DATA_TYPE) .len(v.len()) .offset(v.offset()) .add_buffer(v.data().buffers()[0].clone()) @@ -185,7 +183,7 @@ impl GenericStringArray { assert!(!offsets.is_empty()); // wrote at least one let actual_len = (offsets.len() / std::mem::size_of::()) - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(actual_len) .add_buffer(offsets.into()) .add_buffer(values.into()); @@ -262,7 +260,7 @@ where // calculate actual data_len, which may be different from the iterator's upper bound let data_len = (offsets.len() / offset_size) - 1; - let array_data = ArrayData::builder(Self::get_data_type()) + let array_data = ArrayData::builder(Self::DATA_TYPE) .len(data_len) .add_buffer(offsets.into()) .add_buffer(values.into()) @@ -340,10 +338,7 @@ impl From> for GenericStringArray { fn from(v: GenericBinaryArray) -> Self { - let builder = v - .into_data() - .into_builder() - .data_type(Self::get_data_type()); + let builder = v.into_data().into_builder().data_type(Self::DATA_TYPE); let data = unsafe { builder.build_unchecked() }; Self::from(data) } @@ -353,7 +348,7 @@ impl From for GenericStringArray Self { assert_eq!( data.data_type(), - &Self::get_data_type(), + &Self::DATA_TYPE, "[Large]StringArray expects Datatype::[Large]Utf8" ); assert_eq!( diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow/src/array/builder/generic_binary_builder.rs index 8333f275cf07..aca2e1d9694e 100644 --- a/arrow/src/array/builder/generic_binary_builder.rs +++ b/arrow/src/array/builder/generic_binary_builder.rs @@ -77,7 +77,7 @@ impl GenericBinaryBuilder { /// Builds the [`GenericBinaryArray`] and reset this builder. pub fn finish(&mut self) -> GenericBinaryArray { - let array_type = GenericBinaryArray::::get_data_type(); + let array_type = GenericBinaryArray::::DATA_TYPE; let array_builder = ArrayDataBuilder::new(array_type) .len(self.len()) .add_buffer(self.offsets_builder.finish()) diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs index 7d460b21cb0d..ac365a0968ec 100644 --- a/arrow/src/compute/kernels/concat_elements.rs +++ b/arrow/src/compute/kernels/concat_elements.rs @@ -75,7 +75,7 @@ pub fn concat_elements_utf8( output_offsets.append(Offset::from_usize(output_values.len()).unwrap()); } - let builder = ArrayDataBuilder::new(GenericStringArray::::get_data_type()) + let builder = ArrayDataBuilder::new(GenericStringArray::::DATA_TYPE) .len(left.len()) .add_buffer(output_offsets.finish()) .add_buffer(output_values.finish()) @@ -155,7 +155,7 @@ pub fn concat_elements_utf8_many( output_offsets.append(Offset::from_usize(output_values.len()).unwrap()); } - let builder = ArrayDataBuilder::new(GenericStringArray::::get_data_type()) + let builder = ArrayDataBuilder::new(GenericStringArray::::DATA_TYPE) .len(size) .add_buffer(output_offsets.finish()) .add_buffer(output_values.finish()) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index 024f5633fef4..5190d0bf0b67 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -205,7 +205,7 @@ pub fn substring_by_char( }); let data = unsafe { ArrayData::new_unchecked( - GenericStringArray::::get_data_type(), + GenericStringArray::::DATA_TYPE, array.len(), None, array @@ -294,7 +294,7 @@ fn binary_substring( let data = unsafe { ArrayData::new_unchecked( - GenericBinaryArray::::get_data_type(), + GenericBinaryArray::::DATA_TYPE, array.len(), None, array @@ -425,7 +425,7 @@ fn utf8_substring( let data = unsafe { ArrayData::new_unchecked( - GenericStringArray::::get_data_type(), + GenericStringArray::::DATA_TYPE, array.len(), None, array @@ -587,7 +587,7 @@ mod tests { // set the first and third element to be valid let bitmap = [0b101_u8]; - let data = ArrayData::builder(GenericBinaryArray::::get_data_type()) + let data = ArrayData::builder(GenericBinaryArray::::DATA_TYPE) .len(2) .add_buffer(Buffer::from_slice_ref(offsets)) .add_buffer(Buffer::from_iter(values)) @@ -814,7 +814,7 @@ mod tests { // set the first and third element to be valid let bitmap = [0b101_u8]; - let data = ArrayData::builder(GenericStringArray::::get_data_type()) + let data = ArrayData::builder(GenericStringArray::::DATA_TYPE) .len(2) .add_buffer(Buffer::from_slice_ref(offsets)) .add_buffer(Buffer::from(values)) @@ -939,7 +939,7 @@ mod tests { // set the first and third element to be valid let bitmap = [0b101_u8]; - let data = ArrayData::builder(GenericStringArray::::get_data_type()) + let data = ArrayData::builder(GenericStringArray::::DATA_TYPE) .len(2) .add_buffer(Buffer::from_slice_ref(offsets)) .add_buffer(Buffer::from(values)) diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index 5bfd257fcf46..ab99acd2c04b 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -771,12 +771,11 @@ where }; } - let array_data = - ArrayData::builder(GenericStringArray::::get_data_type()) - .len(data_len) - .add_buffer(offsets_buffer.into()) - .add_buffer(values.into()) - .null_bit_buffer(nulls); + let array_data = ArrayData::builder(GenericStringArray::::DATA_TYPE) + .len(data_len) + .add_buffer(offsets_buffer.into()) + .add_buffer(values.into()) + .null_bit_buffer(nulls); let array_data = unsafe { array_data.build_unchecked() }; From 661d42cc28b10d9854186d80275fbc8926457e1e Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 3 Aug 2022 17:26:25 +0800 Subject: [PATCH 3/3] deprecate Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/array/array_binary.rs | 6 ++++++ arrow/src/array/array_string.rs | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index 5d5e24db3af2..dd21e0d51763 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -44,6 +44,12 @@ impl GenericBinaryArray { DataType::Binary }; + /// Get the data type of the array. + #[deprecated(note = "please use `Self::DATA_TYPE` instead")] + pub const fn get_data_type() -> DataType { + Self::DATA_TYPE + } + /// Returns the length for value at index `i`. #[inline] pub fn value_length(&self, i: usize) -> OffsetSize { diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs index a01ccd1f916e..1bb99fce7eda 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow/src/array/array_string.rs @@ -46,6 +46,12 @@ impl GenericStringArray { DataType::Utf8 }; + /// Get the data type of the array. + #[deprecated(note = "please use `Self::DATA_TYPE` instead")] + pub const fn get_data_type() -> DataType { + Self::DATA_TYPE + } + /// Returns the length for the element at index `i`. #[inline] pub fn value_length(&self, i: usize) -> OffsetSize {