Skip to content

Commit 24fc99c

Browse files
authored
Optimization: concat function (#9732)
* optimization: concat function fix: concat_ws chore: add license header add arrow feature update concat * change Cargo.toml * pass cargo clippy * chore: add annotation
1 parent 4bd7c13 commit 24fc99c

File tree

4 files changed

+390
-79
lines changed

4 files changed

+390
-79
lines changed

datafusion/physical-expr/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ regex = { version = "1.8", optional = true }
7373
sha2 = { version = "^0.10.1", optional = true }
7474

7575
[dev-dependencies]
76+
arrow = { workspace = true, features = ["test_utils"] }
7677
criterion = "0.5"
7778
rand = { workspace = true }
7879
rstest = { workspace = true }
@@ -81,3 +82,7 @@ tokio = { workspace = true, features = ["rt-multi-thread"] }
8182
[[bench]]
8283
harness = false
8384
name = "in_list"
85+
86+
[[bench]]
87+
harness = false
88+
name = "concat"
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::util::bench_util::create_string_array_with_len;
19+
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
20+
use datafusion_common::ScalarValue;
21+
use datafusion_expr::ColumnarValue;
22+
use datafusion_physical_expr::string_expressions::concat;
23+
use std::sync::Arc;
24+
25+
fn create_args(size: usize, str_len: usize) -> Vec<ColumnarValue> {
26+
let array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, str_len));
27+
let scalar = ScalarValue::Utf8(Some(", ".to_string()));
28+
vec![
29+
ColumnarValue::Array(array.clone()),
30+
ColumnarValue::Scalar(scalar),
31+
ColumnarValue::Array(array),
32+
]
33+
}
34+
35+
fn criterion_benchmark(c: &mut Criterion) {
36+
for size in [1024, 4096, 8192] {
37+
let args = create_args(size, 32);
38+
let mut group = c.benchmark_group("concat function");
39+
group.bench_function(BenchmarkId::new("concat", size), |b| {
40+
b.iter(|| criterion::black_box(concat(&args).unwrap()))
41+
});
42+
group.finish();
43+
}
44+
}
45+
46+
criterion_group!(benches, criterion_benchmark);
47+
criterion_main!(benches);

datafusion/physical-expr/src/functions.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,9 +221,9 @@ pub fn create_physical_fun(
221221
// string functions
222222
BuiltinScalarFunction::Coalesce => Arc::new(conditional_expressions::coalesce),
223223
BuiltinScalarFunction::Concat => Arc::new(string_expressions::concat),
224-
BuiltinScalarFunction::ConcatWithSeparator => Arc::new(|args| {
225-
make_scalar_function_inner(string_expressions::concat_ws)(args)
226-
}),
224+
BuiltinScalarFunction::ConcatWithSeparator => {
225+
Arc::new(string_expressions::concat_ws)
226+
}
227227
BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
228228
DataType::Utf8 => {
229229
make_scalar_function_inner(string_expressions::initcap::<i32>)(args)

0 commit comments

Comments
 (0)