Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ name = "coalesce_kernels"
harness = false
required-features = ["test_utils"]


[[bench]]
name = "take_kernels"
harness = false
Expand Down Expand Up @@ -311,6 +310,11 @@ name = "lexsort"
harness = false
required-features = ["test_utils"]

[[bench]]
name = "nullif_kernel"
harness = false
required-features = ["test_utils"]

[[test]]
name = "csv"
required-features = ["csv", "chrono-tz"]
Expand Down
66 changes: 66 additions & 0 deletions arrow/benches/nullif_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;
use criterion::Criterion;

use arrow::util::bench_util::{create_boolean_array, create_primitive_array};

use arrow::array::*;
use arrow_array::types::Int64Type;
use arrow_select::nullif::nullif;
use std::hint;

fn bench_nullif(left: &dyn Array, right: &BooleanArray) {
hint::black_box(nullif(left, right).unwrap());
}

fn add_benchmark(c: &mut Criterion) {
let size = 8192usize;

// create input before benchmark to ensure allocations are consistent
let int64_no_nulls = create_primitive_array::<Int64Type>(size, 0.0);
let int64_nulls = create_primitive_array::<Int64Type>(size, 0.1);

let mask_10 = create_boolean_array(size, 0.0, 0.1);
let mask_10_sliced = create_boolean_array(size + 7, 0.0, 0.1).slice(7, size);
let mask_1 = create_boolean_array(size, 0.0, 0.01);

c.bench_function("nullif no-nulls mask(10%)", |b| {
b.iter(|| bench_nullif(&int64_no_nulls, &mask_10))
});
c.bench_function("nullif no-nulls mask(10%, sliced)", |b| {
b.iter(|| bench_nullif(&int64_no_nulls, &mask_10_sliced))
});
c.bench_function("nullif no-nulls mask(1%)", |b| {
b.iter(|| bench_nullif(&int64_no_nulls, &mask_1))
});

c.bench_function("nullif nulls mask(10%)", |b| {
b.iter(|| bench_nullif(&int64_nulls, &mask_10))
});
c.bench_function("nullif nulls mask(10%, sliced)", |b| {
b.iter(|| bench_nullif(&int64_nulls, &mask_10_sliced))
});
c.bench_function("nullif nulls mask(1%)", |b| {
b.iter(|| bench_nullif(&int64_nulls, &mask_1))
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
7 changes: 6 additions & 1 deletion parquet/src/arrow/arrow_reader/selection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub enum RowSelectionPolicy {

impl Default for RowSelectionPolicy {
fn default() -> Self {
Self::Auto { threshold: 32 }
Self::Auto { threshold: 16 }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a great call @Weijun-H -- this is left over code that was not indended for this PR. Sorry about that I will remove it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed in 7266d10

}
}

Expand Down Expand Up @@ -146,6 +146,7 @@ impl RowSelection {
/// # Panic
///
/// Panics if any of the [`BooleanArray`] contain nulls
#[inline(never)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these inline tags for profiling or performance?

pub fn from_filters(filters: &[BooleanArray]) -> Self {
let mut next_offset = 0;
let total_rows = filters.iter().map(|x| x.len()).sum();
Expand All @@ -161,6 +162,7 @@ impl RowSelection {
}

/// Creates a [`RowSelection`] from an iterator of consecutive ranges to keep
#[inline(never)]
pub fn from_consecutive_ranges<I: Iterator<Item = Range<usize>>>(
ranges: I,
total_rows: usize,
Expand Down Expand Up @@ -201,6 +203,7 @@ impl RowSelection {
/// Note: this method does not make any effort to combine consecutive ranges, nor coalesce
/// ranges that are close together. This is instead delegated to the IO subsystem to optimise,
/// e.g. [`ObjectStore::get_ranges`](object_store::ObjectStore::get_ranges)
#[inline(never)]
pub fn scan_ranges(&self, page_locations: &[PageLocation]) -> Vec<Range<u64>> {
let mut ranges: Vec<Range<u64>> = vec![];
let mut row_offset = 0;
Expand Down Expand Up @@ -342,6 +345,7 @@ impl RowSelection {
/// Panics if `other` does not have a length equal to the number of rows selected
/// by this RowSelection
///
#[inline(never)]
pub fn and_then(&self, other: &Self) -> Self {
let mut selectors = vec![];
let mut first = self.selectors.iter().cloned().peekable();
Expand Down Expand Up @@ -923,6 +927,7 @@ impl RowSelectionCursor {
}
}

#[inline(never)]
fn boolean_mask_from_selectors(selectors: &[RowSelector]) -> BooleanBuffer {
let total_rows: usize = selectors.iter().map(|s| s.row_count).sum();
let mut builder = BooleanBufferBuilder::new(total_rows);
Expand Down
Loading