Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions parquet/src/arrow/arrow_writer/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,8 +550,8 @@ impl LevelInfoBuilder {
/// and the other is a native array, the dictionary values must have the same type as the
/// native array
fn types_compatible(a: &DataType, b: &DataType) -> bool {
// if the Arrow data types are the same, the types are clearly compatible
if a == b {
// if the Arrow data types are equal, the types are deemed compatible
if a.equals_datatype(b) {
return true;
}

Expand Down
51 changes: 49 additions & 2 deletions parquet/src/arrow/arrow_writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1522,11 +1522,12 @@ fn get_fsb_array_slice(
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;

use std::fs::File;

use crate::arrow::ARROW_SCHEMA_META_KEY;
use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
use crate::arrow::{ARROW_SCHEMA_META_KEY, PARQUET_FIELD_ID_META_KEY};
use crate::column::page::{Page, PageReader};
use crate::file::metadata::thrift::PageHeader;
use crate::file::page_index::column_index::ColumnIndexMetaData;
Expand All @@ -1539,7 +1540,7 @@ mod tests {
use arrow::util::data_gen::create_random_array;
use arrow::util::pretty::pretty_format_batches;
use arrow::{array::*, buffer::Buffer};
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer, i256};
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer, OffsetBuffer, i256};
use arrow_schema::Fields;
use half::f16;
use num_traits::{FromPrimitive, ToPrimitive};
Expand Down Expand Up @@ -3323,6 +3324,52 @@ mod tests {
BinaryViewArray::from_iter_values(vec![b"barquet"]),
LargeBinaryArray::from_iter_values(vec![b"parquet", b"barquet"]),
);

// check compatibility for list types

let list_field_metadata = HashMap::from_iter(vec![(
PARQUET_FIELD_ID_META_KEY.to_string(),
"1".to_string(),
)]);
let list_field = Field::new_list_field(DataType::Int32, false);

let values1 = Arc::new(Int32Array::from(vec![0, 1, 2, 3, 4]));
let offsets1 = OffsetBuffer::new(vec![0, 2, 5].into());

let values2 = Arc::new(Int32Array::from(vec![5, 6, 7, 8, 9]));
let offsets2 = OffsetBuffer::new(vec![0, 3, 5].into());

let values_expected = Arc::new(Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]));
let offsets_expected = OffsetBuffer::new(vec![0, 2, 5, 8, 10].into());

ensure_compatible_write(
// when the initial schema has the metadata ...
ListArray::try_new(
Arc::new(
list_field
.clone()
.with_metadata(list_field_metadata.clone()),
),
offsets1,
values1,
None,
)
.unwrap(),
// ... and some intermediate schema doesn't have the metadata
ListArray::try_new(Arc::new(list_field.clone()), offsets2, values2, None).unwrap(),
// ... the write will still go through, and the resulting schema will inherit the initial metadata
ListArray::try_new(
Arc::new(
list_field
.clone()
.with_metadata(list_field_metadata.clone()),
),
offsets_expected,
values_expected,
None,
)
.unwrap(),
);
}

#[test]
Expand Down
Loading