Skip to content

Commit 4003e52

Browse files
authored
Remove hostdevice_vector::element due to unnecessary synchronization (#19092)
Contributes to #18967, part of #18968 In this PR, `hostdevice_vector::element` is removed due to its internal `cudaMemcpy` into host pageable memory. Also, the only call in it is replaced manually. Authors: - Jigao Luo (https://github.com/JigaoLuo) - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - Vukasin Milovanovic (https://github.com/vuule) URL: #19092
1 parent a7650d3 commit 4003e52

File tree

2 files changed

+12
-18
lines changed

2 files changed

+12
-18
lines changed

cpp/src/io/orc/writer_impl.cu

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1149,7 +1149,18 @@ cudf::detail::hostdevice_vector<uint8_t> allocate_and_encode_blobs(
11491149
// figure out the buffer size needed for protobuf format
11501150
orc_init_statistics_buffersize(
11511151
stats_merge_groups.device_ptr(), stat_chunks.data(), num_stat_blobs, stream);
1152-
auto max_blobs = stats_merge_groups.element(num_stat_blobs - 1, stream);
1152+
1153+
// get stats_merge_groups[num_stat_blobs - 1] via a host pinned bounce buffer
1154+
auto const max_blobs = [&]() {
1155+
auto max_blobs_element =
1156+
cudf::detail::make_pinned_vector_async<statistics_merge_group>(1, stream);
1157+
cudf::detail::cuda_memcpy<statistics_merge_group>(
1158+
max_blobs_element,
1159+
cudf::device_span<statistics_merge_group>{stats_merge_groups.device_ptr(num_stat_blobs - 1),
1160+
1},
1161+
stream);
1162+
return max_blobs_element.front();
1163+
}();
11531164

11541165
cudf::detail::hostdevice_vector<uint8_t> blobs(max_blobs.start_chunk + max_blobs.num_chunks,
11551166
stream);

cpp/src/io/utilities/hostdevice_vector.hpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -100,23 +100,6 @@ class hostdevice_vector {
100100
[[nodiscard]] T* d_end() { return device_ptr(size()); }
101101
[[nodiscard]] T const* d_end() const { return device_ptr(size()); }
102102

103-
/**
104-
* @brief Returns the specified element from device memory
105-
*
106-
* @note This function incurs a device to host memcpy and should be used sparingly.
107-
* @note This function synchronizes `stream`.
108-
*
109-
* @throws rmm::out_of_range exception if `element_index >= size()`
110-
*
111-
* @param element_index Index of the desired element
112-
* @param stream The stream on which to perform the copy
113-
* @return The value of the specified element
114-
*/
115-
[[nodiscard]] T element(std::size_t element_index, rmm::cuda_stream_view stream) const
116-
{
117-
return d_data.element(element_index, stream);
118-
}
119-
120103
operator cudf::host_span<T>() { return host_span<T>{h_data}.subspan(0, size()); }
121104
operator cudf::host_span<T const>() const
122105
{

0 commit comments

Comments
 (0)