Skip to content

Commit 815a85e

Browse files
committed
feat: add ListView equal
1 parent 7aecc3f commit 815a85e

File tree

3 files changed

+213
-6
lines changed

3 files changed

+213
-6
lines changed

arrow-data/src/equal/list_view.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::ArrayData;
19+
use arrow_buffer::ArrowNativeType;
20+
use num::Integer;
21+
22+
use super::equal_range;
23+
24+
pub(super) fn list_view_equal<T: ArrowNativeType + Integer>(
25+
lhs: &ArrayData,
26+
rhs: &ArrayData,
27+
lhs_start: usize,
28+
rhs_start: usize,
29+
len: usize,
30+
) -> bool {
31+
let lhs_offsets = lhs.buffer::<T>(0);
32+
let rhs_offsets = rhs.buffer::<T>(0);
33+
let lhs_sizes = lhs.buffer::<T>(1);
34+
let rhs_sizes = rhs.buffer::<T>(1);
35+
let lhs_nulls = lhs.nulls();
36+
let rhs_nulls = rhs.nulls();
37+
for i in 0..len {
38+
let lhs_pos = lhs_start + i;
39+
let rhs_pos = rhs_start + i;
40+
41+
// get offset and size
42+
let lhs_offset_start = lhs_offsets[lhs_pos].to_usize().unwrap();
43+
let rhs_offset_start = rhs_offsets[rhs_pos].to_usize().unwrap();
44+
let lhs_size = lhs_sizes[lhs_pos].to_usize().unwrap();
45+
let rhs_size = rhs_sizes[rhs_pos].to_usize().unwrap();
46+
47+
if lhs_size != rhs_size {
48+
return false;
49+
}
50+
51+
// check if null
52+
if let (Some(lhs_null), Some(rhs_null)) = (lhs_nulls, rhs_nulls) {
53+
if lhs_null.is_null(lhs_pos) != rhs_null.is_null(rhs_pos) {
54+
return false;
55+
}
56+
if lhs_null.is_null(lhs_pos) {
57+
continue;
58+
}
59+
}
60+
61+
// compare values
62+
if !equal_range(
63+
&lhs.child_data()[0],
64+
&rhs.child_data()[0],
65+
lhs_offset_start,
66+
rhs_offset_start,
67+
lhs_size,
68+
) {
69+
return false;
70+
}
71+
}
72+
true
73+
}

arrow-data/src/equal/mod.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ mod dictionary;
3030
mod fixed_binary;
3131
mod fixed_list;
3232
mod list;
33+
mod list_view;
3334
mod null;
3435
mod primitive;
3536
mod run;
@@ -47,6 +48,7 @@ use dictionary::dictionary_equal;
4748
use fixed_binary::fixed_binary_equal;
4849
use fixed_list::fixed_list_equal;
4950
use list::list_equal;
51+
use list_view::list_view_equal;
5052
use null::null_equal;
5153
use primitive::primitive_equal;
5254
use structure::struct_equal;
@@ -102,9 +104,8 @@ fn equal_values(
102104
byte_view_equal(lhs, rhs, lhs_start, rhs_start, len)
103105
}
104106
DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
105-
DataType::ListView(_) | DataType::LargeListView(_) => {
106-
unimplemented!("ListView/LargeListView not yet implemented")
107-
}
107+
DataType::ListView(_) => list_view_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
108+
DataType::LargeListView(_) => list_view_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
108109
DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
109110
DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
110111
DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),

arrow/tests/array_equal.rs

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818
use arrow::array::{
1919
make_array, Array, ArrayRef, BooleanArray, Decimal128Array, FixedSizeBinaryArray,
2020
FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, GenericStringArray,
21-
Int32Array, Int32Builder, Int64Builder, ListArray, ListBuilder, NullArray, OffsetSizeTrait,
22-
StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
21+
Int32Array, Int32Builder, Int64Builder, ListArray, ListBuilder, ListViewBuilder, NullArray,
22+
OffsetSizeTrait, StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
2323
};
2424
use arrow::datatypes::{Int16Type, Int32Type};
2525
use arrow_array::builder::{StringBuilder, StringViewBuilder, StructBuilder};
26-
use arrow_array::{DictionaryArray, FixedSizeListArray, StringViewArray};
26+
use arrow_array::{DictionaryArray, FixedSizeListArray, ListViewArray, StringViewArray};
2727
use arrow_buffer::{Buffer, ToByteSlice};
2828
use arrow_data::{ArrayData, ArrayDataBuilder};
2929
use arrow_schema::{DataType, Field, Fields};
@@ -1292,3 +1292,136 @@ fn test_list_excess_children_equal() {
12921292
assert_eq!(b.value_offsets(), &[0, 0, 2]);
12931293
assert_eq!(a, b);
12941294
}
1295+
1296+
fn create_list_view_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(data: T) -> ListViewArray {
1297+
let mut builder = ListViewBuilder::new(Int32Builder::with_capacity(10));
1298+
for d in data.as_ref() {
1299+
if let Some(v) = d {
1300+
builder.values().append_slice(v.as_ref());
1301+
builder.append(true);
1302+
} else {
1303+
builder.append(false);
1304+
}
1305+
}
1306+
builder.finish()
1307+
}
1308+
1309+
#[test]
1310+
fn test_list_view_equal() {
1311+
let a = create_list_view_array([Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
1312+
let b = create_list_view_array([Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
1313+
test_equal(&a, &b, true);
1314+
1315+
let b = create_list_view_array([Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
1316+
test_equal(&a, &b, false);
1317+
}
1318+
1319+
#[test]
1320+
fn test_empty_offsets_list_view_equal() {
1321+
let empty: Vec<i32> = vec![];
1322+
let values = Int32Array::from(empty);
1323+
let empty_offsets: [u8; 0] = [];
1324+
let empty_sizes: [u8; 0] = [];
1325+
let a: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
1326+
Field::new_list_field(DataType::Int32, true),
1327+
)))
1328+
.len(0)
1329+
.add_buffer(Buffer::from(&empty_offsets))
1330+
.add_buffer(Buffer::from(&empty_sizes))
1331+
.add_child_data(values.to_data())
1332+
.null_bit_buffer(Some(Buffer::from(&empty_offsets)))
1333+
.build()
1334+
.unwrap()
1335+
.into();
1336+
1337+
let b: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
1338+
Field::new_list_field(DataType::Int32, true),
1339+
)))
1340+
.len(0)
1341+
.add_buffer(Buffer::from(&empty_offsets))
1342+
.add_buffer(Buffer::from(&empty_sizes))
1343+
.add_child_data(values.to_data())
1344+
.null_bit_buffer(Some(Buffer::from(&empty_offsets)))
1345+
.build()
1346+
.unwrap()
1347+
.into();
1348+
1349+
test_equal(&a, &b, true);
1350+
}
1351+
1352+
// Test the case where null_count > 0
1353+
#[test]
1354+
fn test_list_view_null() {
1355+
let a = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
1356+
let b = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
1357+
test_equal(&a, &b, true);
1358+
1359+
let b = create_list_view_array([
1360+
Some(&[1, 2]),
1361+
None,
1362+
Some(&[5, 6]),
1363+
Some(&[3, 4]),
1364+
None,
1365+
None,
1366+
]);
1367+
test_equal(&a, &b, false);
1368+
1369+
let b = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
1370+
test_equal(&a, &b, false);
1371+
1372+
// a list where the nullness of values is determined by the list's bitmap
1373+
let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]);
1374+
let c: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
1375+
Field::new_list_field(DataType::Int32, true),
1376+
)))
1377+
.len(8)
1378+
.add_buffer(Buffer::from([0i32, 2, 3, 4, 4, 1, 4, 4].to_byte_slice()))
1379+
.add_buffer(Buffer::from([3i32, 2, 1, 2, 1, 1, 1, 1].to_byte_slice()))
1380+
.add_child_data(c_values.into_data())
1381+
.null_bit_buffer(Some(Buffer::from([0b0001001])))
1382+
.build()
1383+
.unwrap()
1384+
.into();
1385+
1386+
let d_values = Int32Array::from(vec![
1387+
Some(1),
1388+
Some(2),
1389+
Some(-1),
1390+
None,
1391+
Some(3),
1392+
Some(4),
1393+
None,
1394+
None,
1395+
]);
1396+
let d: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
1397+
Field::new_list_field(DataType::Int32, true),
1398+
)))
1399+
.len(8)
1400+
.add_buffer(Buffer::from([0i32, 2, 3, 4, 4, 1, 4, 4].to_byte_slice()))
1401+
.add_buffer(Buffer::from([3i32, 2, 1, 2, 1, 1, 1, 1].to_byte_slice()))
1402+
.add_child_data(d_values.into_data())
1403+
.null_bit_buffer(Some(Buffer::from([0b0001001])))
1404+
.build()
1405+
.unwrap()
1406+
.into();
1407+
test_equal(&c, &d, true);
1408+
}
1409+
1410+
// Test the case where offset != 0
1411+
#[test]
1412+
fn test_list_view_offsets() {
1413+
let a = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
1414+
let b = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
1415+
1416+
let a_slice = a.slice(0, 3);
1417+
let b_slice = b.slice(0, 3);
1418+
test_equal(&a_slice, &b_slice, true);
1419+
1420+
let a_slice = a.slice(0, 5);
1421+
let b_slice = b.slice(0, 5);
1422+
test_equal(&a_slice, &b_slice, false);
1423+
1424+
let a_slice = a.slice(4, 1);
1425+
let b_slice = b.slice(4, 1);
1426+
test_equal(&a_slice, &b_slice, true);
1427+
}

0 commit comments

Comments
 (0)