Skip to content

Commit 77fafb9

Browse files
tustvoldalamb
andauthored
Specialize single column primitive group values (#7043)
* Specialize primitive group values * Split module * RawTable * Support all primitives * Add docs * Update datafusion-cli cargo lock * Make Cargo.toml order 'just so' * Review feedback --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 368f6e6 commit 77fafb9

File tree

7 files changed

+487
-232
lines changed

7 files changed

+487
-232
lines changed

datafusion-cli/Cargo.lock

Lines changed: 24 additions & 49 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ datafusion-sql = { path = "../sql", version = "27.0.0" }
7171
flate2 = { version = "1.0.24", optional = true }
7272
futures = "0.3"
7373
glob = "0.3.0"
74+
half = { version = "2.1", default-features = false }
7475
hashbrown = { version = "0.14", features = ["raw"] }
7576
indexmap = "2.0.0"
7677
itertools = "0.11"
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_array::{downcast_primitive, ArrayRef};
19+
use arrow_schema::SchemaRef;
20+
use datafusion_common::Result;
21+
use datafusion_physical_expr::EmitTo;
22+
23+
mod primitive;
24+
use primitive::GroupValuesPrimitive;
25+
26+
mod row;
27+
use row::GroupValuesRows;
28+
29+
/// An interning store for group keys
30+
pub trait GroupValues: Send {
31+
/// Calculates the `groups` for each input row of `cols`
32+
fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()>;
33+
34+
/// Returns the number of bytes used by this [`GroupValues`]
35+
fn size(&self) -> usize;
36+
37+
/// Returns true if this [`GroupValues`] is empty
38+
fn is_empty(&self) -> bool;
39+
40+
/// The number of values stored in this [`GroupValues`]
41+
fn len(&self) -> usize;
42+
43+
/// Emits the group values
44+
fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
45+
}
46+
47+
pub fn new_group_values(schema: SchemaRef) -> Result<Box<dyn GroupValues>> {
48+
if schema.fields.len() == 1 {
49+
let d = schema.fields[0].data_type();
50+
51+
macro_rules! downcast_helper {
52+
($t:ty, $d:ident) => {
53+
return Ok(Box::new(GroupValuesPrimitive::<$t>::new($d.clone())))
54+
};
55+
}
56+
57+
downcast_primitive! {
58+
d => (downcast_helper, d),
59+
_ => {}
60+
}
61+
}
62+
63+
Ok(Box::new(GroupValuesRows::try_new(schema)?))
64+
}

0 commit comments

Comments
 (0)