Skip to content

Commit 8f1fd12

Browse files
authored
Add ScalarBuffer abstraction (#1811) (#1820)
* Add ScalarBuffer abstraction (#1811) * Lint fixes
1 parent ba38ebe commit 8f1fd12

File tree

2 files changed

+152
-0
lines changed

2 files changed

+152
-0
lines changed

arrow/src/buffer/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ pub use immutable::*;
2323
mod mutable;
2424
pub use mutable::*;
2525
mod ops;
26+
mod scalar;
27+
pub use scalar::*;
28+
2629
pub use ops::*;
2730

2831
use crate::error::{ArrowError, Result};

arrow/src/buffer/scalar.rs

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::buffer::Buffer;
19+
use crate::datatypes::ArrowNativeType;
20+
use std::ops::Deref;
21+
22+
/// Provides a safe API for interpreting a [`Buffer`] as a slice of [`ArrowNativeType`]
23+
///
24+
/// # Safety
25+
///
26+
/// All [`ArrowNativeType`] are valid for all possible backing byte representations, and as
27+
/// a result they are "trivially safely transmutable".
28+
#[derive(Debug)]
29+
pub struct ScalarBuffer<T: ArrowNativeType> {
30+
#[allow(unused)]
31+
buffer: Buffer,
32+
// Borrows from `buffer` and is valid for the lifetime of `buffer`
33+
ptr: *const T,
34+
// The length of this slice
35+
len: usize,
36+
}
37+
38+
impl<T: ArrowNativeType> ScalarBuffer<T> {
39+
/// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset`
40+
/// and `length` in units of `T`
41+
///
42+
/// # Panics
43+
///
44+
/// This method will panic if
45+
///
46+
/// * `offset` or `len` would result in overflow
47+
/// * `buffer` is not aligned to a multiple of `std::mem::size_of::<T>`
48+
/// * `bytes` is not large enough for the requested slice
49+
pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
50+
let size = std::mem::size_of::<T>();
51+
let offset_len = offset.checked_add(len).expect("length overflow");
52+
let start_bytes = offset.checked_mul(size).expect("start bytes overflow");
53+
let end_bytes = offset_len.checked_mul(size).expect("end bytes overflow");
54+
55+
let bytes = &buffer.as_slice()[start_bytes..end_bytes];
56+
57+
// SAFETY: all byte sequences correspond to a valid instance of T
58+
let (prefix, offsets, suffix) = unsafe { bytes.align_to::<T>() };
59+
assert!(
60+
prefix.is_empty() && suffix.is_empty(),
61+
"buffer is not aligned to {} byte boundary",
62+
size
63+
);
64+
65+
let ptr = offsets.as_ptr();
66+
Self { buffer, ptr, len }
67+
}
68+
}
69+
70+
impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
71+
type Target = [T];
72+
73+
fn deref(&self) -> &Self::Target {
74+
// SAFETY: Bounds checked in constructor and ptr is valid for the lifetime of self
75+
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
76+
}
77+
}
78+
79+
impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> {
80+
fn as_ref(&self) -> &[T] {
81+
self
82+
}
83+
}
84+
85+
#[cfg(test)]
86+
mod tests {
87+
use super::*;
88+
89+
#[test]
90+
fn test_basic() {
91+
let expected = [0_i32, 1, 2];
92+
let buffer = Buffer::from_iter(expected.iter().cloned());
93+
let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3);
94+
assert_eq!(*typed, expected);
95+
96+
let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2);
97+
assert_eq!(*typed, expected[1..]);
98+
99+
let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0);
100+
assert!(typed.is_empty());
101+
102+
let typed = ScalarBuffer::<i32>::new(buffer, 3, 0);
103+
assert!(typed.is_empty());
104+
}
105+
106+
#[test]
107+
#[should_panic(expected = "buffer is not aligned to 4 byte boundary")]
108+
fn test_unaligned() {
109+
let expected = [0_i32, 1, 2];
110+
let buffer = Buffer::from_iter(expected.iter().cloned());
111+
let buffer = buffer.slice(1);
112+
ScalarBuffer::<i32>::new(buffer, 0, 2);
113+
}
114+
115+
#[test]
116+
#[should_panic(expected = "range end index 16 out of range for slice of length 12")]
117+
fn test_length_out_of_bounds() {
118+
let buffer = Buffer::from_iter([0_i32, 1, 2]);
119+
ScalarBuffer::<i32>::new(buffer, 1, 3);
120+
}
121+
122+
#[test]
123+
#[should_panic(expected = "range end index 16 out of range for slice of length 12")]
124+
fn test_offset_out_of_bounds() {
125+
let buffer = Buffer::from_iter([0_i32, 1, 2]);
126+
ScalarBuffer::<i32>::new(buffer, 4, 0);
127+
}
128+
129+
#[test]
130+
#[should_panic(expected = "length overflow")]
131+
fn test_length_overflow() {
132+
let buffer = Buffer::from_iter([0_i32, 1, 2]);
133+
ScalarBuffer::<i32>::new(buffer, usize::MAX, 1);
134+
}
135+
136+
#[test]
137+
#[should_panic(expected = "start bytes overflow")]
138+
fn test_start_overflow() {
139+
let buffer = Buffer::from_iter([0_i32, 1, 2]);
140+
ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0);
141+
}
142+
143+
#[test]
144+
#[should_panic(expected = "end bytes overflow")]
145+
fn test_end_overflow() {
146+
let buffer = Buffer::from_iter([0_i32, 1, 2]);
147+
ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1);
148+
}
149+
}

0 commit comments

Comments
 (0)