Skip to content

Commit 551ddec

Browse files
committed
fast-path parquet root projection without nested fields
1 parent 88902fa commit 551ddec

File tree

2 files changed

+26
-4
lines changed

2 files changed

+26
-4
lines changed

src/mito2/src/sst/parquet/read_columns.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pub type ParquetNestedPath = Vec<String>;
2323
#[derive(Debug, Clone, PartialEq, Eq)]
2424
pub struct ParquetReadColumns {
2525
cols: Vec<ParquetReadColumn>,
26+
has_nested: bool,
2627
}
2728

2829
impl ParquetReadColumns {
@@ -35,12 +36,23 @@ impl ParquetReadColumns {
3536
.into_iter()
3637
.map(ParquetReadColumn::new)
3738
.collect();
38-
Self { cols }
39+
Self {
40+
cols,
41+
has_nested: false,
42+
}
3943
}
4044

4145
pub fn columns(&self) -> &[ParquetReadColumn] {
4246
&self.cols
4347
}
48+
49+
pub fn has_nested(&self) -> bool {
50+
self.has_nested
51+
}
52+
53+
pub fn root_indices_iter(&self) -> impl Iterator<Item = usize> + '_ {
54+
self.cols.iter().map(|col| col.root_index)
55+
}
4456
}
4557

4658
/// Read requirement for a single parquet root column.
@@ -141,6 +153,7 @@ mod tests {
141153
root_index: 0,
142154
nested_paths: vec![],
143155
}],
156+
has_nested: false,
144157
};
145158

146159
assert_eq!(
@@ -164,6 +177,7 @@ mod tests {
164177
nested_paths: vec![],
165178
},
166179
],
180+
has_nested: true,
167181
};
168182

169183
assert_eq!(
@@ -181,6 +195,7 @@ mod tests {
181195
root_index: 0,
182196
nested_paths: vec![vec!["j".to_string(), "b".to_string()]],
183197
}],
198+
has_nested: true,
184199
};
185200

186201
assert_eq!(
@@ -198,6 +213,7 @@ mod tests {
198213
root_index: 0,
199214
nested_paths: vec![vec!["j".to_string(), "b".to_string(), "c".to_string()]],
200215
}],
216+
has_nested: true,
201217
};
202218

203219
assert_eq!(
@@ -218,6 +234,7 @@ mod tests {
218234
vec!["j".to_string(), "b".to_string(), "d".to_string()],
219235
],
220236
}],
237+
has_nested: true,
221238
};
222239

223240
assert_eq!(

src/mito2/src/sst/parquet/reader.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,9 +424,14 @@ impl ParquetReaderBuilder {
424424
let parquet_projection = ParquetReadColumns::from_deduped_root_indices(
425425
read_format.projection_indices().iter().copied(),
426426
);
427-
let leaf_indices = build_parquet_leaves_indices(parquet_schema_desc, &parquet_projection);
428-
let projection_mask =
429-
ProjectionMask::leaves(parquet_schema_desc, leaf_indices.iter().copied());
427+
428+
let projection_mask = if parquet_projection.has_nested() {
429+
let leaf_indices =
430+
build_parquet_leaves_indices(parquet_schema_desc, &parquet_projection);
431+
ProjectionMask::leaves(parquet_schema_desc, leaf_indices.iter().copied())
432+
} else {
433+
ProjectionMask::roots(parquet_schema_desc, parquet_projection.root_indices_iter())
434+
};
430435
let selection = self
431436
.row_groups_to_read(&read_format, &parquet_meta, &mut metrics.filter_metrics)
432437
.await;

0 commit comments

Comments
 (0)