Skip to content

Commit 21a9e98

Browse files
github-actions[bot]liutang123liutang123
authored
branch-4.0: [External](parquet) pass non predicates column's offset index to RowGroupReader #55795 (#57270)
Cherry-picked from #55795 Co-authored-by: Lijia Liu <[email protected]> Co-authored-by: liutang123 <[email protected]>
1 parent e231bba commit 21a9e98

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

be/src/vec/exec/format/parquet/vparquet_reader.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,10 +1081,6 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
10811081
continue;
10821082
}
10831083
auto slot_id = _colname_to_slot_id->at(read_table_col);
1084-
if (!_push_down_simple_expr.contains(slot_id)) {
1085-
continue;
1086-
}
1087-
const auto& push_down_expr = _push_down_simple_expr[slot_id];
10881084

10891085
int parquet_col_id =
10901086
_file_metadata->schema().get_column(read_file_col)->physical_column_index;
@@ -1093,6 +1089,19 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
10931089
continue;
10941090
}
10951091
auto& chunk = row_group.columns[parquet_col_id];
1092+
1093+
if (chunk.offset_index_length == 0) {
1094+
continue;
1095+
}
1096+
tparquet::OffsetIndex offset_index;
1097+
RETURN_IF_ERROR(page_index.parse_offset_index(chunk, off_index_buff.data(), &offset_index));
1098+
_col_offsets[parquet_col_id] = offset_index;
1099+
1100+
if (!_push_down_simple_expr.contains(slot_id)) {
1101+
continue;
1102+
}
1103+
const auto& push_down_expr = _push_down_simple_expr[slot_id];
1104+
10961105
if (chunk.column_index_offset == 0 && chunk.column_index_length == 0) {
10971106
continue;
10981107
}
@@ -1132,16 +1141,13 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
11321141
if (skipped_page_range.empty()) {
11331142
continue;
11341143
}
1135-
tparquet::OffsetIndex offset_index;
1136-
RETURN_IF_ERROR(page_index.parse_offset_index(chunk, off_index_buff.data(), &offset_index));
11371144
for (int page_id : skipped_page_range) {
11381145
RowRange skipped_row_range;
11391146
RETURN_IF_ERROR(page_index.create_skipped_row_range(offset_index, row_group.num_rows,
11401147
page_id, &skipped_row_range));
11411148
// use the union row range
11421149
skipped_row_ranges.emplace_back(skipped_row_range);
11431150
}
1144-
_col_offsets[parquet_col_id] = offset_index;
11451151
}
11461152
if (skipped_row_ranges.empty()) {
11471153
read_whole_row_group();

0 commit comments

Comments
 (0)