Skip to content

Commit 2ae7474

Browse files
authored
Add nvtx ranges to public APIs of the experimental parquet reader (#19618)
Contributes to #19469 This PR moves the NVTX ranges from detail to public APIs of the experimental Parquet reader. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Shruti Shivakumar (https://github.com/shrshi) - Nghia Truong (https://github.com/ttnghia) URL: #19618
1 parent 37ea851 commit 2ae7474

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

cpp/src/io/parquet/experimental/hybrid_scan.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "hybrid_scan_impl.hpp"
1818

19+
#include <cudf/detail/nvtx/ranges.hpp>
1920
#include <cudf/io/experimental/hybrid_scan.hpp>
2021
#include <cudf/utilities/error.hpp>
2122

@@ -33,22 +34,30 @@ hybrid_scan_reader::~hybrid_scan_reader() = default;
3334

3435
[[nodiscard]] text::byte_range_info hybrid_scan_reader::page_index_byte_range() const
3536
{
37+
CUDF_FUNC_RANGE();
38+
3639
return _impl->page_index_byte_range();
3740
}
3841

3942
[[nodiscard]] FileMetaData hybrid_scan_reader::parquet_metadata() const
4043
{
44+
CUDF_FUNC_RANGE();
45+
4146
return _impl->parquet_metadata();
4247
}
4348

4449
void hybrid_scan_reader::setup_page_index(cudf::host_span<uint8_t const> page_index_bytes) const
4550
{
51+
CUDF_FUNC_RANGE();
52+
4653
return _impl->setup_page_index(page_index_bytes);
4754
}
4855

4956
std::vector<cudf::size_type> hybrid_scan_reader::all_row_groups(
5057
parquet_reader_options const& options) const
5158
{
59+
CUDF_FUNC_RANGE();
60+
5261
CUDF_EXPECTS(options.get_row_groups().size() <= 1,
5362
"Encountered invalid size of row group indices in parquet reader options");
5463

@@ -61,6 +70,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::all_row_groups(
6170
size_type hybrid_scan_reader::total_rows_in_row_groups(
6271
cudf::host_span<size_type const> row_group_indices) const
6372
{
73+
CUDF_FUNC_RANGE();
74+
6475
if (row_group_indices.empty()) { return 0; }
6576

6677
auto const input_row_group_indices =
@@ -73,6 +84,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::filter_row_groups_with_stats(
7384
parquet_reader_options const& options,
7485
rmm::cuda_stream_view stream) const
7586
{
87+
CUDF_FUNC_RANGE();
88+
7689
// Temporary vector with row group indices from the first source
7790
auto const input_row_group_indices =
7891
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -84,6 +97,7 @@ std::pair<std::vector<text::byte_range_info>, std::vector<text::byte_range_info>
8497
hybrid_scan_reader::secondary_filters_byte_ranges(
8598
cudf::host_span<size_type const> row_group_indices, parquet_reader_options const& options) const
8699
{
100+
CUDF_FUNC_RANGE();
87101
// Temporary vector with row group indices from the first source
88102
auto const input_row_group_indices =
89103
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -97,6 +111,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::filter_row_groups_with_dictiona
97111
parquet_reader_options const& options,
98112
rmm::cuda_stream_view stream) const
99113
{
114+
CUDF_FUNC_RANGE();
115+
100116
// Temporary vector with row group indices from the first source
101117
auto const input_row_group_indices =
102118
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -113,6 +129,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::filter_row_groups_with_bloom_fi
113129
parquet_reader_options const& options,
114130
rmm::cuda_stream_view stream) const
115131
{
132+
CUDF_FUNC_RANGE();
133+
116134
// Temporary vector with row group indices from the first source
117135
auto const input_row_group_indices =
118136
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -129,6 +147,8 @@ std::unique_ptr<cudf::column> hybrid_scan_reader::build_row_mask_with_page_index
129147
rmm::cuda_stream_view stream,
130148
rmm::device_async_resource_ref mr) const
131149
{
150+
CUDF_FUNC_RANGE();
151+
132152
// Temporary vector with row group indices from the first source
133153
auto const input_row_group_indices =
134154
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -140,6 +160,8 @@ std::unique_ptr<cudf::column> hybrid_scan_reader::build_row_mask_with_page_index
140160
hybrid_scan_reader::filter_column_chunks_byte_ranges(
141161
cudf::host_span<size_type const> row_group_indices, parquet_reader_options const& options) const
142162
{
163+
CUDF_FUNC_RANGE();
164+
143165
// Temporary vector with row group indices from the first source
144166
auto const input_row_group_indices =
145167
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -155,6 +177,8 @@ table_with_metadata hybrid_scan_reader::materialize_filter_columns(
155177
parquet_reader_options const& options,
156178
rmm::cuda_stream_view stream) const
157179
{
180+
CUDF_FUNC_RANGE();
181+
158182
// Temporary vector with row group indices from the first source
159183
auto const input_row_group_indices =
160184
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -171,6 +195,8 @@ table_with_metadata hybrid_scan_reader::materialize_filter_columns(
171195
hybrid_scan_reader::payload_column_chunks_byte_ranges(
172196
cudf::host_span<size_type const> row_group_indices, parquet_reader_options const& options) const
173197
{
198+
CUDF_FUNC_RANGE();
199+
174200
auto const input_row_group_indices =
175201
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
176202

@@ -185,6 +211,8 @@ table_with_metadata hybrid_scan_reader::materialize_payload_columns(
185211
parquet_reader_options const& options,
186212
rmm::cuda_stream_view stream) const
187213
{
214+
CUDF_FUNC_RANGE();
215+
188216
// Temporary vector with row group indices from the first source
189217
auto const input_row_group_indices =
190218
std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};

cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "io/parquet/reader_impl_helpers.hpp"
2121
#include "io/utilities/row_selection.hpp"
2222

23+
#include <cudf/detail/nvtx/ranges.hpp>
2324
#include <cudf/logger.hpp>
2425

2526
#include <thrust/iterator/counting_iterator.h>
@@ -72,6 +73,8 @@ namespace {
7273

7374
metadata::metadata(cudf::host_span<uint8_t const> footer_bytes)
7475
{
76+
CUDF_FUNC_RANGE();
77+
7578
CompactProtocolReader cp(footer_bytes.data(), footer_bytes.size());
7679
cp.read(this);
7780
CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");

cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "hybrid_scan_helpers.hpp"
2121
#include "io/parquet/reader_impl_chunking_utils.cuh"
2222

23-
#include <cudf/detail/nvtx/ranges.hpp>
2423
#include <cudf/detail/stream_compaction.hpp>
2524
#include <cudf/detail/structs/utilities.hpp>
2625
#include <cudf/detail/transform.hpp>
@@ -169,8 +168,6 @@ std::vector<std::vector<size_type>> hybrid_scan_reader_impl::filter_row_groups_w
169168
parquet_reader_options const& options,
170169
rmm::cuda_stream_view stream)
171170
{
172-
CUDF_FUNC_RANGE();
173-
174171
CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
175172
CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
176173

@@ -195,8 +192,6 @@ hybrid_scan_reader_impl::secondary_filters_byte_ranges(
195192
cudf::host_span<std::vector<size_type> const> row_group_indices,
196193
parquet_reader_options const& options)
197194
{
198-
CUDF_FUNC_RANGE();
199-
200195
CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
201196
CUDF_EXPECTS(options.get_filter().has_value(), "Filter expression must not be empty");
202197

@@ -230,8 +225,6 @@ hybrid_scan_reader_impl::filter_row_groups_with_dictionary_pages(
230225
parquet_reader_options const& options,
231226
rmm::cuda_stream_view stream)
232227
{
233-
CUDF_FUNC_RANGE();
234-
235228
CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
236229
CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
237230

@@ -302,8 +295,6 @@ std::vector<std::vector<size_type>> hybrid_scan_reader_impl::filter_row_groups_w
302295
CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
303296
CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
304297

305-
CUDF_FUNC_RANGE();
306-
307298
select_columns(read_columns_mode::FILTER_COLUMNS, options);
308299

309300
table_metadata metadata;
@@ -331,8 +322,6 @@ std::unique_ptr<cudf::column> hybrid_scan_reader_impl::build_row_mask_with_page_
331322
CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
332323
CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
333324

334-
CUDF_FUNC_RANGE();
335-
336325
select_columns(read_columns_mode::FILTER_COLUMNS, options);
337326

338327
table_metadata metadata;
@@ -355,8 +344,6 @@ std::pair<std::vector<byte_range_info>, std::vector<cudf::size_type>>
355344
hybrid_scan_reader_impl::get_input_column_chunk_byte_ranges(
356345
cudf::host_span<std::vector<size_type> const> row_group_indices) const
357346
{
358-
CUDF_FUNC_RANGE();
359-
360347
// Descriptors for all the chunks that make up the selected columns
361348
auto const num_input_columns = _input_columns.size();
362349
auto const num_row_groups =
@@ -438,8 +425,6 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
438425
CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
439426
CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
440427

441-
CUDF_FUNC_RANGE();
442-
443428
reset_internal_state();
444429

445430
table_metadata metadata;
@@ -477,8 +462,6 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
477462
CUDF_EXPECTS(row_mask.null_count() == 0,
478463
"Row mask must not have any nulls when materializing payload column");
479464

480-
CUDF_FUNC_RANGE();
481-
482465
reset_internal_state();
483466

484467
initialize_options(row_group_indices, options, stream);
@@ -728,8 +711,6 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output(
728711
void hybrid_scan_reader_impl::set_pass_page_mask(
729712
cudf::host_span<std::vector<bool> const> data_page_mask)
730713
{
731-
CUDF_FUNC_RANGE();
732-
733714
auto const& pass = _pass_itm_data;
734715
auto const& chunks = pass->chunks;
735716

0 commit comments

Comments
 (0)