Skip to content

Commit 3415659

Browse files
committed
impl vectorized_append.
1 parent 1a7c2eb commit 3415659

File tree

2 files changed

+135
-14
lines changed

2 files changed

+135
-14
lines changed

datafusion/physical-plan/src/aggregates/group_values/column.rs

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,16 +319,23 @@ impl GroupValuesColumn {
319319
next_group_index = self.group_index_lists[current_group_index];
320320
}
321321
}
322-
323-
self.vectorized_equal_to_results
324-
.resize(self.vectorized_equal_to_group_indices.len(), true);
325322
}
326323

327324
/// Perform `vectorized_equal_to`
328325
///
329-
///
330326
fn vectorized_equal_to(&mut self, cols: &[ArrayRef]) {
327+
debug_assert_eq!(
328+
self.vectorized_equal_to_group_indices.len(),
329+
self.vectorized_equal_to_row_indices.len()
330+
);
331+
332+
if self.vectorized_equal_to_group_indices.is_empty() {
333+
return;
334+
}
335+
336+
// Vectorized equal to `cols` and `group columns`
331337
let mut equal_to_results = mem::take(&mut self.vectorized_equal_to_results);
338+
equal_to_results.resize(self.vectorized_equal_to_group_indices.len(), true);
332339
for (col_idx, group_col) in self.group_values.iter().enumerate() {
333340
group_col.vectorized_equal_to(
334341
&self.vectorized_equal_to_group_indices,
@@ -337,8 +344,40 @@ impl GroupValuesColumn {
337344
&mut equal_to_results,
338345
);
339346
}
347+
348+
let mut current_row_equal_to_result = false;
349+
let mut current_row = *self.vectorized_equal_to_row_indices.first().unwrap();
350+
for (idx, &row) in self.vectorized_equal_to_row_indices.iter().enumerate() {
351+
// If found next row, according to the equal to result of `current_row`
352+
if current_row != row {
353+
if !current_row_equal_to_result {
354+
self.vectorized_append_row_indices.push(row);
355+
}
356+
current_row = row;
357+
current_row_equal_to_result = equal_to_results[idx];
358+
continue;
359+
}
360+
current_row_equal_to_result |= equal_to_results[idx];
361+
}
362+
363+
if !current_row_equal_to_result {
364+
self.vectorized_append_row_indices.push(current_row);
365+
}
366+
340367
self.vectorized_equal_to_results = equal_to_results;
341368
}
369+
370+
/// Perform `vectorized_append`
371+
///
372+
/// 1. Vectorized append new values into `group_values`
373+
/// 2. Update `map` and `group_index_lists`
374+
fn vectorized_append(&mut self, cols: &[ArrayRef], batch_hashes: &[u64]) {
375+
if self.vectorized_append_row_indices.is_empty() {
376+
return;
377+
}
378+
379+
// 1. Vectorized append new values into `group_values`
380+
}
342381
}
343382

344383
/// instantiates a [`PrimitiveGroupValueBuilder`] and pushes it into $v

datafusion/physical-plan/src/aggregates/group_values/group_column.rs

Lines changed: 92 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
180180
all_non_null: bool,
181181
) {
182182
let arr = array.as_primitive::<T>();
183+
183184
match (NULLABLE, all_non_null) {
184185
(true, true) => {
185186
self.nulls.append_n(rows.len(), false);
@@ -280,7 +281,7 @@ where
280281
}
281282
}
282283

283-
fn append_batch_inner<B>(
284+
fn vectorized_append_inner<B>(
284285
&mut self,
285286
array: &ArrayRef,
286287
rows: &[usize],
@@ -293,7 +294,7 @@ where
293294
if all_non_null {
294295
self.nulls.append_n(rows.len(), false);
295296
for &row in rows {
296-
self.append_value(arr, row);
297+
self.do_append_val_inner(arr, row);
297298
}
298299
} else {
299300
for &row in rows {
@@ -304,7 +305,7 @@ where
304305
self.offsets.push(O::usize_as(offset));
305306
} else {
306307
self.nulls.append(false);
307-
self.append_value(arr, row);
308+
self.do_append_val_inner(arr, row);
308309
}
309310
}
310311
}
@@ -322,11 +323,11 @@ where
322323
self.offsets.push(O::usize_as(offset));
323324
} else {
324325
self.nulls.append(false);
325-
self.append_value(arr, row);
326+
self.do_append_val_inner(arr, row);
326327
}
327328
}
328329

329-
fn append_value<B>(&mut self, array: &GenericByteArray<B>, row: usize)
330+
fn do_append_val_inner<B>(&mut self, array: &GenericByteArray<B>, row: usize)
330331
where
331332
B: ByteArrayType,
332333
{
@@ -340,6 +341,40 @@ where
340341
B: ByteArrayType,
341342
{
342343
let array = array.as_bytes::<B>();
344+
self.do_equal_to_inner(lhs_row, array, rhs_row)
345+
}
346+
347+
fn vectorized_equal_to_inner<B>(
348+
&self,
349+
group_indices: &[usize],
350+
array: &ArrayRef,
351+
rows: &[usize],
352+
equal_to_results: &mut [bool],
353+
) where
354+
B: ByteArrayType,
355+
{
356+
let array = array.as_bytes::<B>();
357+
358+
for (idx, &lhs_row) in group_indices.iter().enumerate() {
359+
// Has found not equal to, don't need to check
360+
if !equal_to_results[idx] {
361+
continue;
362+
}
363+
364+
let rhs_row = rows[idx];
365+
equal_to_results[idx] = self.do_equal_to_inner(lhs_row, array, rhs_row);
366+
}
367+
}
368+
369+
fn do_equal_to_inner<B>(
370+
&self,
371+
lhs_row: usize,
372+
array: &GenericByteArray<B>,
373+
rhs_row: usize,
374+
) -> bool
375+
where
376+
B: ByteArrayType,
377+
{
343378
let exist_null = self.nulls.is_null(lhs_row);
344379
let input_null = array.is_null(rhs_row);
345380
if let Some(result) = nulls_equal_to(exist_null, input_null) {
@@ -411,7 +446,34 @@ where
411446
rows: &[usize],
412447
equal_to_results: &mut [bool],
413448
) {
414-
todo!()
449+
// Sanity array type
450+
match self.output_type {
451+
OutputType::Binary => {
452+
debug_assert!(matches!(
453+
array.data_type(),
454+
DataType::Binary | DataType::LargeBinary
455+
));
456+
self.vectorized_equal_to_inner::<GenericBinaryType<O>>(
457+
group_indices,
458+
array,
459+
rows,
460+
equal_to_results,
461+
);
462+
}
463+
OutputType::Utf8 => {
464+
debug_assert!(matches!(
465+
array.data_type(),
466+
DataType::Utf8 | DataType::LargeUtf8
467+
));
468+
self.vectorized_equal_to_inner::<GenericStringType<O>>(
469+
group_indices,
470+
array,
471+
rows,
472+
equal_to_results,
473+
);
474+
}
475+
_ => unreachable!("View types should use `ArrowBytesViewMap`"),
476+
}
415477
}
416478

417479
fn vectorized_append(
@@ -426,7 +488,7 @@ where
426488
column.data_type(),
427489
DataType::Binary | DataType::LargeBinary
428490
));
429-
self.append_batch_inner::<GenericBinaryType<O>>(
491+
self.vectorized_append_inner::<GenericBinaryType<O>>(
430492
column,
431493
rows,
432494
all_non_null,
@@ -437,7 +499,7 @@ where
437499
column.data_type(),
438500
DataType::Utf8 | DataType::LargeUtf8
439501
));
440-
self.append_batch_inner::<GenericStringType<O>>(
502+
self.vectorized_append_inner::<GenericStringType<O>>(
441503
column,
442504
rows,
443505
all_non_null,
@@ -606,7 +668,7 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
606668
self
607669
}
608670

609-
fn append_batch_inner(
671+
fn vectorized_append_inner(
610672
&mut self,
611673
array: &ArrayRef,
612674
rows: &[usize],
@@ -693,6 +755,26 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
693755
self.do_equal_to_inner(lhs_row, array, rhs_row)
694756
}
695757

758+
fn vectorized_equal_to_inner(
759+
&self,
760+
group_indices: &[usize],
761+
array: &ArrayRef,
762+
rows: &[usize],
763+
equal_to_results: &mut [bool],
764+
) {
765+
let array = array.as_byte_view::<B>();
766+
767+
for (idx, &lhs_row) in group_indices.iter().enumerate() {
768+
// Has found not equal to, don't need to check
769+
if !equal_to_results[idx] {
770+
continue;
771+
}
772+
773+
let rhs_row = rows[idx];
774+
equal_to_results[idx] = self.do_equal_to_inner(lhs_row, array, rhs_row);
775+
}
776+
}
777+
696778
fn do_equal_to_inner(
697779
&self,
698780
lhs_row: usize,
@@ -992,7 +1074,7 @@ impl<B: ByteViewType> GroupColumn for ByteViewGroupValueBuilder<B> {
9921074
rows: &[usize],
9931075
all_non_null: bool,
9941076
) {
995-
self.append_batch_inner(array, rows, all_non_null);
1077+
self.vectorized_append_inner(array, rows, all_non_null);
9961078
}
9971079

9981080
fn len(&self) -> usize {

0 commit comments

Comments
 (0)