Skip to content

Commit cbfa8ec

Browse files
committed
add from_slice trait to ease arrow2 migration
1 parent 6f7b2d2 commit cbfa8ec

38 files changed

+353
-253
lines changed

datafusion-cli/src/print_format.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ mod tests {
120120
use super::*;
121121
use arrow::array::Int32Array;
122122
use arrow::datatypes::{DataType, Field, Schema};
123+
use datafusion::from_slice::FromSlice;
123124
use std::sync::Arc;
124125

125126
#[test]
@@ -168,9 +169,9 @@ mod tests {
168169
let batch = RecordBatch::try_new(
169170
schema,
170171
vec![
171-
Arc::new(Int32Array::from(vec![1, 2, 3])),
172-
Arc::new(Int32Array::from(vec![4, 5, 6])),
173-
Arc::new(Int32Array::from(vec![7, 8, 9])),
172+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
173+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
174+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
174175
],
175176
)
176177
.unwrap();
@@ -198,9 +199,9 @@ mod tests {
198199
let batch = RecordBatch::try_new(
199200
schema,
200201
vec![
201-
Arc::new(Int32Array::from(vec![1, 2, 3])),
202-
Arc::new(Int32Array::from(vec![4, 5, 6])),
203-
Arc::new(Int32Array::from(vec![7, 8, 9])),
202+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
203+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
204+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
204205
],
205206
)
206207
.unwrap();

datafusion-examples/examples/dataframe_in_memory.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ use std::sync::Arc;
2020
use datafusion::arrow::array::{Int32Array, StringArray};
2121
use datafusion::arrow::datatypes::{DataType, Field, Schema};
2222
use datafusion::arrow::record_batch::RecordBatch;
23-
2423
use datafusion::datasource::MemTable;
2524
use datafusion::error::Result;
25+
use datafusion::from_slice::FromSlice;
2626
use datafusion::prelude::*;
2727

2828
/// This example demonstrates how to use the DataFrame API against in-memory data.
@@ -39,7 +39,7 @@ async fn main() -> Result<()> {
3939
schema.clone(),
4040
vec![
4141
Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
42-
Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
42+
Arc::new(Int32Array::from_slice(&[1, 10, 10, 100])),
4343
],
4444
)?;
4545

datafusion-examples/examples/simple_udaf.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use datafusion::arrow::{
2222
record_batch::RecordBatch,
2323
};
2424

25+
use datafusion::from_slice::FromSlice;
2526
use datafusion::physical_plan::functions::Volatility;
2627
use datafusion::{error::Result, logical_plan::create_udaf, physical_plan::Accumulator};
2728
use datafusion::{prelude::*, scalar::ScalarValue};
@@ -37,11 +38,11 @@ fn create_context() -> Result<ExecutionContext> {
3738
// define data in two partitions
3839
let batch1 = RecordBatch::try_new(
3940
schema.clone(),
40-
vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
41+
vec![Arc::new(Float32Array::from_slice(&[2.0, 4.0, 8.0]))],
4142
)?;
4243
let batch2 = RecordBatch::try_new(
4344
schema.clone(),
44-
vec![Arc::new(Float32Array::from(vec![64.0]))],
45+
vec![Arc::new(Float32Array::from_slice(&[64.0]))],
4546
)?;
4647

4748
// declare a new context. In spark API, this corresponds to a new spark SQLsession

datafusion-examples/examples/simple_udf.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use datafusion::{
2424
physical_plan::functions::Volatility,
2525
};
2626

27+
use datafusion::from_slice::FromSlice;
2728
use datafusion::prelude::*;
2829
use datafusion::{error::Result, physical_plan::functions::make_scalar_function};
2930
use std::sync::Arc;
@@ -42,8 +43,8 @@ fn create_context() -> Result<ExecutionContext> {
4243
let batch = RecordBatch::try_new(
4344
schema.clone(),
4445
vec![
45-
Arc::new(Float32Array::from(vec![2.1, 3.1, 4.1, 5.1])),
46-
Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
46+
Arc::new(Float32Array::from_slice(&[2.1, 3.1, 4.1, 5.1])),
47+
Arc::new(Float64Array::from_slice(&[1.0, 2.0, 3.0, 4.0])),
4748
],
4849
)?;
4950

datafusion/src/datasource/memory.rs

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ impl TableProvider for MemTable {
146146
#[cfg(test)]
147147
mod tests {
148148
use super::*;
149+
use crate::from_slice::FromSlice;
149150
use arrow::array::Int32Array;
150151
use arrow::datatypes::{DataType, Field, Schema};
151152
use futures::StreamExt;
@@ -164,9 +165,9 @@ mod tests {
164165
let batch = RecordBatch::try_new(
165166
schema.clone(),
166167
vec![
167-
Arc::new(Int32Array::from(vec![1, 2, 3])),
168-
Arc::new(Int32Array::from(vec![4, 5, 6])),
169-
Arc::new(Int32Array::from(vec![7, 8, 9])),
168+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
169+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
170+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
170171
Arc::new(Int32Array::from(vec![None, None, Some(9)])),
171172
],
172173
)?;
@@ -197,9 +198,9 @@ mod tests {
197198
let batch = RecordBatch::try_new(
198199
schema.clone(),
199200
vec![
200-
Arc::new(Int32Array::from(vec![1, 2, 3])),
201-
Arc::new(Int32Array::from(vec![4, 5, 6])),
202-
Arc::new(Int32Array::from(vec![7, 8, 9])),
201+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
202+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
203+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
203204
],
204205
)?;
205206

@@ -225,9 +226,9 @@ mod tests {
225226
let batch = RecordBatch::try_new(
226227
schema.clone(),
227228
vec![
228-
Arc::new(Int32Array::from(vec![1, 2, 3])),
229-
Arc::new(Int32Array::from(vec![4, 5, 6])),
230-
Arc::new(Int32Array::from(vec![7, 8, 9])),
229+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
230+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
231+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
231232
],
232233
)?;
233234

@@ -262,9 +263,9 @@ mod tests {
262263
let batch = RecordBatch::try_new(
263264
schema1,
264265
vec![
265-
Arc::new(Int32Array::from(vec![1, 2, 3])),
266-
Arc::new(Int32Array::from(vec![4, 5, 6])),
267-
Arc::new(Int32Array::from(vec![7, 8, 9])),
266+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
267+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
268+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
268269
],
269270
)?;
270271

@@ -295,8 +296,8 @@ mod tests {
295296
let batch = RecordBatch::try_new(
296297
schema1,
297298
vec![
298-
Arc::new(Int32Array::from(vec![1, 2, 3])),
299-
Arc::new(Int32Array::from(vec![7, 5, 9])),
299+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
300+
Arc::new(Int32Array::from_slice(&[7, 5, 9])),
300301
],
301302
)?;
302303

@@ -339,18 +340,18 @@ mod tests {
339340
let batch1 = RecordBatch::try_new(
340341
Arc::new(schema1),
341342
vec![
342-
Arc::new(Int32Array::from(vec![1, 2, 3])),
343-
Arc::new(Int32Array::from(vec![4, 5, 6])),
344-
Arc::new(Int32Array::from(vec![7, 8, 9])),
343+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
344+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
345+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
345346
],
346347
)?;
347348

348349
let batch2 = RecordBatch::try_new(
349350
Arc::new(schema2),
350351
vec![
351-
Arc::new(Int32Array::from(vec![1, 2, 3])),
352-
Arc::new(Int32Array::from(vec![4, 5, 6])),
353-
Arc::new(Int32Array::from(vec![7, 8, 9])),
352+
Arc::new(Int32Array::from_slice(&[1, 2, 3])),
353+
Arc::new(Int32Array::from_slice(&[4, 5, 6])),
354+
Arc::new(Int32Array::from_slice(&[7, 8, 9])),
354355
],
355356
)?;
356357

datafusion/src/execution/context.rs

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,7 @@ impl FunctionRegistry for ExecutionContextState {
12141214
#[cfg(test)]
12151215
mod tests {
12161216
use super::*;
1217+
use crate::from_slice::FromSlice;
12171218
use crate::logical_plan::plan::Projection;
12181219
use crate::logical_plan::TableScan;
12191220
use crate::logical_plan::{binary_expr, lit, Operator};
@@ -1514,9 +1515,9 @@ mod tests {
15141515
let partitions = vec![vec![RecordBatch::try_new(
15151516
schema.clone(),
15161517
vec![
1517-
Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
1518-
Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
1519-
Arc::new(Int32Array::from(vec![3, 12, 12, 120])),
1518+
Arc::new(Int32Array::from_slice(&[1, 10, 10, 100])),
1519+
Arc::new(Int32Array::from_slice(&[2, 12, 12, 120])),
1520+
Arc::new(Int32Array::from_slice(&[3, 12, 12, 120])),
15201521
],
15211522
)?]];
15221523

@@ -2936,11 +2937,11 @@ mod tests {
29362937
),
29372938
(
29382939
DataType::Int32,
2939-
Arc::new(Int32Array::from(vec![1])) as ArrayRef,
2940+
Arc::new(Int32Array::from_slice(&[1])) as ArrayRef,
29402941
),
29412942
(
29422943
DataType::Int64,
2943-
Arc::new(Int64Array::from(vec![1])) as ArrayRef,
2944+
Arc::new(Int64Array::from_slice(&[1])) as ArrayRef,
29442945
),
29452946
(
29462947
DataType::UInt8,
@@ -2952,19 +2953,19 @@ mod tests {
29522953
),
29532954
(
29542955
DataType::UInt32,
2955-
Arc::new(UInt32Array::from(vec![1])) as ArrayRef,
2956+
Arc::new(UInt32Array::from_slice(&[1])) as ArrayRef,
29562957
),
29572958
(
29582959
DataType::UInt64,
2959-
Arc::new(UInt64Array::from(vec![1])) as ArrayRef,
2960+
Arc::new(UInt64Array::from_slice(&[1])) as ArrayRef,
29602961
),
29612962
(
29622963
DataType::Float32,
2963-
Arc::new(Float32Array::from(vec![1.0_f32])) as ArrayRef,
2964+
Arc::new(Float32Array::from_slice(&[1.0_f32])) as ArrayRef,
29642965
),
29652966
(
29662967
DataType::Float64,
2967-
Arc::new(Float64Array::from(vec![1.0_f64])) as ArrayRef,
2968+
Arc::new(Float64Array::from_slice(&[1.0_f64])) as ArrayRef,
29682969
),
29692970
];
29702971

@@ -3278,8 +3279,8 @@ mod tests {
32783279
let batch = RecordBatch::try_new(
32793280
Arc::new(schema.clone()),
32803281
vec![
3281-
Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
3282-
Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
3282+
Arc::new(Int32Array::from_slice(&[1, 10, 10, 100])),
3283+
Arc::new(Int32Array::from_slice(&[2, 12, 12, 120])),
32833284
],
32843285
)?;
32853286

@@ -3379,11 +3380,11 @@ mod tests {
33793380

33803381
let batch1 = RecordBatch::try_new(
33813382
Arc::new(schema.clone()),
3382-
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
3383+
vec![Arc::new(Int32Array::from_slice(&[1, 2, 3]))],
33833384
)?;
33843385
let batch2 = RecordBatch::try_new(
33853386
Arc::new(schema.clone()),
3386-
vec![Arc::new(Int32Array::from(vec![4, 5]))],
3387+
vec![Arc::new(Int32Array::from_slice(&[4, 5]))],
33873388
)?;
33883389

33893390
let mut ctx = ExecutionContext::new();
@@ -3416,11 +3417,11 @@ mod tests {
34163417

34173418
let batch1 = RecordBatch::try_new(
34183419
Arc::new(schema.clone()),
3419-
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
3420+
vec![Arc::new(Int32Array::from_slice(&[1, 2, 3]))],
34203421
)?;
34213422
let batch2 = RecordBatch::try_new(
34223423
Arc::new(schema.clone()),
3423-
vec![Arc::new(Int32Array::from(vec![4, 5]))],
3424+
vec![Arc::new(Int32Array::from_slice(&[4, 5]))],
34243425
)?;
34253426

34263427
let mut ctx = ExecutionContext::new();
@@ -3880,8 +3881,8 @@ mod tests {
38803881
let batch = RecordBatch::try_new(
38813882
Arc::new(schema.clone()),
38823883
vec![
3883-
Arc::new(Int32Array::from(vec![1])),
3884-
Arc::new(Float64Array::from(vec![1.0])),
3884+
Arc::new(Int32Array::from_slice(&[1])),
3885+
Arc::new(Float64Array::from_slice(&[1.0])),
38853886
Arc::new(StringArray::from(vec![Some("foo")])),
38863887
Arc::new(LargeStringArray::from(vec![Some("bar")])),
38873888
Arc::new(BinaryArray::from(vec![b"foo" as &[u8]])),

datafusion/src/from_slice.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! A trait to define from_slice functions for arrow types
19+
//!
20+
//! This file essentially exists to ease the transition onto arrow2
21+
22+
use arrow::array::{ArrayData, PrimitiveArray};
23+
use arrow::buffer::Buffer;
24+
use arrow::datatypes::ArrowPrimitiveType;
25+
26+
/// A trait to define from_slice functions for arrow primitive array types
27+
pub trait FromSlice<T>
28+
where
29+
T: ArrowPrimitiveType,
30+
{
31+
/// convert a slice of native types into a primitive array (without nulls)
32+
fn from_slice(slice: &[T::Native]) -> PrimitiveArray<T>;
33+
}
34+
35+
/// default implementation for primitive types
36+
// #[cfg(test)]
37+
impl<T: ArrowPrimitiveType> FromSlice<T> for PrimitiveArray<T> {
38+
fn from_slice(slice: &[T::Native]) -> PrimitiveArray<T> {
39+
let array_data = ArrayData::builder(T::DATA_TYPE)
40+
.len(slice.len())
41+
.add_buffer(Buffer::from_slice_ref(&slice));
42+
let array_data = unsafe { array_data.build_unchecked() };
43+
PrimitiveArray::<T>::from(array_data)
44+
}
45+
}

datafusion/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ pub(crate) mod field_util;
227227
#[cfg(feature = "pyarrow")]
228228
mod pyarrow;
229229

230+
pub mod from_slice;
231+
230232
#[cfg(test)]
231233
pub mod test;
232234
pub mod test_util;

datafusion/src/physical_plan/coalesce_batches.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ pub fn concat_batches(
295295
#[cfg(test)]
296296
mod tests {
297297
use super::*;
298+
use crate::from_slice::FromSlice;
298299
use crate::physical_plan::{memory::MemoryExec, repartition::RepartitionExec};
299300
use arrow::array::UInt32Array;
300301
use arrow::datatypes::{DataType, Field, Schema};
@@ -336,7 +337,7 @@ mod tests {
336337
fn create_batch(schema: &Arc<Schema>) -> RecordBatch {
337338
RecordBatch::try_new(
338339
schema.clone(),
339-
vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
340+
vec![Arc::new(UInt32Array::from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]))],
340341
)
341342
.unwrap()
342343
}

datafusion/src/physical_plan/common.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ impl<T> Drop for AbortOnDropMany<T> {
274274
#[cfg(test)]
275275
mod tests {
276276
use super::*;
277+
use crate::from_slice::FromSlice;
277278
use arrow::{
278279
array::{Float32Array, Float64Array},
279280
datatypes::{DataType, Field, Schema},
@@ -343,8 +344,8 @@ mod tests {
343344
let batch = RecordBatch::try_new(
344345
Arc::clone(&schema),
345346
vec![
346-
Arc::new(Float32Array::from(vec![1., 2., 3.])),
347-
Arc::new(Float64Array::from(vec![9., 8., 7.])),
347+
Arc::new(Float32Array::from_slice(&[1., 2., 3.])),
348+
Arc::new(Float64Array::from_slice(&[9., 8., 7.])),
348349
],
349350
)?;
350351
let result =

0 commit comments

Comments
 (0)