Skip to content

Commit 6859c87

Browse files
zeevmZe'ev Maor
and
Ze'ev Maor
authored
Handle primitive REPEATED field not contained in LIST annotated group (apache#6649)
* Handle primitive REPEATED field not contained in LIST annotated group * cargo fmt * Add UT * cargo fmt * comment * clippy * clippy * update parquet-testing module * cargo fmt --------- Co-authored-by: Ze'ev Maor <[email protected]>
1 parent 37cd34d commit 6859c87

File tree

2 files changed

+137
-2
lines changed

2 files changed

+137
-2
lines changed

parquet/src/record/reader.rs

+136-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,17 @@ impl TreeBuilder {
138138
.column_descr_ptr();
139139
let col_reader = row_group_reader.get_column_reader(orig_index)?;
140140
let column = TripletIter::new(col_descr, col_reader, self.batch_size);
141-
Reader::PrimitiveReader(field, Box::new(column))
141+
let reader = Reader::PrimitiveReader(field.clone(), Box::new(column));
142+
if repetition == Repetition::REPEATED {
143+
Reader::RepeatedReader(
144+
field,
145+
curr_def_level - 1,
146+
curr_rep_level - 1,
147+
Box::new(reader),
148+
)
149+
} else {
150+
reader
151+
}
142152
} else {
143153
match field.get_basic_info().converted_type() {
144154
// List types
@@ -1688,6 +1698,131 @@ mod tests {
16881698
assert_eq!(rows, expected_rows);
16891699
}
16901700

1701+
#[test]
1702+
fn test_tree_reader_handle_primitive_repeated_fields_with_no_annotation() {
1703+
// In this test the REPEATED fields are primitives
1704+
let rows = test_file_reader_rows("repeated_primitive_no_list.parquet", None).unwrap();
1705+
let expected_rows = vec![
1706+
row![
1707+
(
1708+
"Int32_list".to_string(),
1709+
Field::ListInternal(make_list([0, 1, 2, 3].map(Field::Int).to_vec()))
1710+
),
1711+
(
1712+
"String_list".to_string(),
1713+
Field::ListInternal(make_list(
1714+
["foo", "zero", "one", "two"]
1715+
.map(|s| Field::Str(s.to_string()))
1716+
.to_vec()
1717+
))
1718+
),
1719+
(
1720+
"group_of_lists".to_string(),
1721+
group![
1722+
(
1723+
"Int32_list_in_group".to_string(),
1724+
Field::ListInternal(make_list([0, 1, 2, 3].map(Field::Int).to_vec()))
1725+
),
1726+
(
1727+
"String_list_in_group".to_string(),
1728+
Field::ListInternal(make_list(
1729+
["foo", "zero", "one", "two"]
1730+
.map(|s| Field::Str(s.to_string()))
1731+
.to_vec()
1732+
))
1733+
)
1734+
]
1735+
)
1736+
],
1737+
row![
1738+
(
1739+
"Int32_list".to_string(),
1740+
Field::ListInternal(make_list(vec![]))
1741+
),
1742+
(
1743+
"String_list".to_string(),
1744+
Field::ListInternal(make_list(
1745+
["three"].map(|s| Field::Str(s.to_string())).to_vec()
1746+
))
1747+
),
1748+
(
1749+
"group_of_lists".to_string(),
1750+
group![
1751+
(
1752+
"Int32_list_in_group".to_string(),
1753+
Field::ListInternal(make_list(vec![]))
1754+
),
1755+
(
1756+
"String_list_in_group".to_string(),
1757+
Field::ListInternal(make_list(
1758+
["three"].map(|s| Field::Str(s.to_string())).to_vec()
1759+
))
1760+
)
1761+
]
1762+
)
1763+
],
1764+
row![
1765+
(
1766+
"Int32_list".to_string(),
1767+
Field::ListInternal(make_list(vec![Field::Int(4)]))
1768+
),
1769+
(
1770+
"String_list".to_string(),
1771+
Field::ListInternal(make_list(
1772+
["four"].map(|s| Field::Str(s.to_string())).to_vec()
1773+
))
1774+
),
1775+
(
1776+
"group_of_lists".to_string(),
1777+
group![
1778+
(
1779+
"Int32_list_in_group".to_string(),
1780+
Field::ListInternal(make_list(vec![Field::Int(4)]))
1781+
),
1782+
(
1783+
"String_list_in_group".to_string(),
1784+
Field::ListInternal(make_list(
1785+
["four"].map(|s| Field::Str(s.to_string())).to_vec()
1786+
))
1787+
)
1788+
]
1789+
)
1790+
],
1791+
row![
1792+
(
1793+
"Int32_list".to_string(),
1794+
Field::ListInternal(make_list([5, 6, 7, 8].map(Field::Int).to_vec()))
1795+
),
1796+
(
1797+
"String_list".to_string(),
1798+
Field::ListInternal(make_list(
1799+
["five", "six", "seven", "eight"]
1800+
.map(|s| Field::Str(s.to_string()))
1801+
.to_vec()
1802+
))
1803+
),
1804+
(
1805+
"group_of_lists".to_string(),
1806+
group![
1807+
(
1808+
"Int32_list_in_group".to_string(),
1809+
Field::ListInternal(make_list([5, 6, 7, 8].map(Field::Int).to_vec()))
1810+
),
1811+
(
1812+
"String_list_in_group".to_string(),
1813+
Field::ListInternal(make_list(
1814+
["five", "six", "seven", "eight"]
1815+
.map(|s| Field::Str(s.to_string()))
1816+
.to_vec()
1817+
))
1818+
)
1819+
]
1820+
)
1821+
],
1822+
];
1823+
assert_eq!(rows, expected_rows);
1824+
}
1825+
16911826
fn test_file_reader_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
16921827
let file = get_test_file(file_name);
16931828
let file_reader: Box<dyn FileReader> = Box::new(SerializedFileReader::new(file)?);

0 commit comments

Comments
 (0)