Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
8ee9f16
Add CMake options for making legacy MR interface configurable
bdice Oct 14, 2025
5bb56db
Implement more legacy blocks, require CCCL 3.1+
bdice Oct 14, 2025
52b5368
Refactor to ifdef
bdice Oct 14, 2025
324dd0e
Refactor call sites to new CCCL MR interface
bdice Oct 14, 2025
c6c5268
Refactor host, pinned, and pinned_host MRs
bdice Oct 14, 2025
b197014
Refactoring to use CCCL MR interface
bdice Oct 16, 2025
4014ad8
Refactoring to use CCCL MR interface
bdice Oct 16, 2025
2cc5caa
Continue refactoring
bdice Oct 16, 2025
8bff193
Continue refactoring
bdice Oct 16, 2025
ab39683
Continue refactoring
bdice Oct 16, 2025
f4f25be
Complete internal refactoring to use CCCL MR interface
bdice Oct 16, 2025
390e2c5
Fix README error
bdice Oct 16, 2025
606b414
Use CCCL MR interface in Cython
bdice Oct 16, 2025
406a990
Hardcode alignment
bdice Oct 16, 2025
cd2145d
Use _stream member
bdice Oct 17, 2025
d7b39b6
Merge branch 'main' into make-legacy-interface-configurable
trxcllnt Oct 22, 2025
879f6aa
Merge remote-tracking branch 'upstream/main' into make-legacy-interfa…
bdice Oct 25, 2025
66c58db
Don't overallocate for alignment
bdice Oct 25, 2025
244fc6c
Remove using declaration
bdice Oct 25, 2025
378f69a
Merge branch 'main' into make-legacy-interface-configurable
bdice Oct 27, 2025
827af4b
Use CCCL MR interface in benchmarks
bdice Oct 28, 2025
c72c067
Merge branch 'make-legacy-interface-configurable' of github.com:bdice…
bdice Oct 28, 2025
69d4161
Merge remote-tracking branch 'upstream/main' into make-legacy-interfa…
bdice Oct 28, 2025
2e62232
Merge remote-tracking branch 'upstream/main' into make-legacy-interfa…
bdice Oct 29, 2025
8b18b8e
Skip pytorch tests if not using a CUDA build
bdice Oct 29, 2025
ceb08ef
Stop aligning up sizes
bdice Oct 29, 2025
bb41214
Fix statistics tests
bdice Oct 29, 2025
df57fb3
Fix tracking MR sizes
bdice Oct 29, 2025
b118aa7
Add Cython overload with no alignment, since Cython doesn't seem to p…
bdice Oct 29, 2025
a7b15fc
Remove alignment overloads
bdice Oct 29, 2025
780b025
Merge remote-tracking branch 'upstream/main' into make-legacy-interfa…
bdice Oct 30, 2025
6b70843
Merge branch 'main' into make-legacy-interface-configurable
bdice Oct 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
# This is mostly so that dependent libraries are configured in shared mode for downstream dependents
# of RMM that get their common dependencies transitively.
option(BUILD_SHARED_LIBS "Build RMM shared libraries" ON)
option(RMM_ENABLE_LEGACY_MR_INTERFACE "Enable legacy memory resource interface" ON)
set(RMM_LOGGING_LEVEL
"INFO"
CACHE STRING "Choose the logging level.")
Expand All @@ -54,6 +55,7 @@ message(VERBOSE "RMM: Build with NVTX support: ${RMM_NVTX}")
# Set logging level. Must go before including gtests and benchmarks. Set the possible values of
# build type for cmake-gui.
message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
message(STATUS "RMM: Legacy MR interface enabled: ${RMM_ENABLE_LEGACY_MR_INTERFACE}")

# cudart can be linked statically or dynamically
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
Expand Down Expand Up @@ -132,6 +134,11 @@ if(RMM_NVTX)
target_compile_definitions(rmm PUBLIC RMM_NVTX)
endif()

# Control legacy MR interface visibility
if(RMM_ENABLE_LEGACY_MR_INTERFACE)
target_compile_definitions(rmm PUBLIC RMM_ENABLE_LEGACY_MR_INTERFACE)
endif()

# ##################################################################################################
# * tests and benchmarks ---------------------------------------------------------------------------

Expand Down
68 changes: 4 additions & 64 deletions cpp/include/rmm/detail/cccl_adaptors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class cccl_resource_ref : public ResourceType {

cccl_resource_ref(base&& other) : base(std::move(other)) {}

#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
void* allocate(std::size_t bytes) { return this->allocate_sync(bytes); }

void* allocate(std::size_t bytes, std::size_t alignment)
Expand All @@ -50,8 +51,8 @@ class cccl_resource_ref : public ResourceType {
{
return this->deallocate_sync(ptr, bytes, alignment);
}
#endif // RMM_ENABLE_LEGACY_MR_INTERFACE

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK so the idea is that the cccl_resource_ref models our old setup where allocate -> synchronous and allocate_async -> stream-ordered.

Whereas async_cccl_resource_ref is allocate_sync -> synchronous and allocate -> stream-ordered.

Since currently we assume allocate is synchronous we need to adapt everyone to that first. And the way to do that is to migrate everyone using allocate to use allocate_sync. Then we can move them onto the async_cccl_resource_ref concept and then we can move sync allocations that could be async back to allocate?

Copy link
Contributor Author

@bdice bdice Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally that's the right direction.

The migration here is actually a little easier than stated above, because the signature for allocate changed too (in addition to changing sync-to-async). Disabling the "legacy" interface will cause a compile error anywhere the old interface was being used, allowing us to migrate to the new API names and new parameter order in each RAPIDS repository. I am starting that migration now. :)

Once that migration is complete, I will deprecate the "legacy" interface (at which point RAPIDS should not be using the legacy interface at all), then remove it in the subsequent release.

#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
void* allocate_sync(std::size_t bytes) { return base::allocate_sync(bytes); }

void* allocate_sync(std::size_t bytes, std::size_t alignment)
Expand All @@ -68,24 +69,6 @@ class cccl_resource_ref : public ResourceType {
{
return base::deallocate_sync(ptr, bytes, alignment);
}
#else
void* allocate_sync(std::size_t bytes) { return base::allocate(bytes); }

void* allocate_sync(std::size_t bytes, std::size_t alignment)
{
return base::allocate(bytes, alignment);
}

void deallocate_sync(void* ptr, std::size_t bytes) noexcept
{
return base::deallocate(ptr, bytes);
}

void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept
{
return base::deallocate(ptr, bytes, alignment);
}
#endif
};

template <typename ResourceType>
Expand All @@ -98,8 +81,7 @@ class cccl_async_resource_ref : public ResourceType {
cccl_async_resource_ref(base const& other) : base(other) {}
cccl_async_resource_ref(base&& other) : base(std::move(other)) {}

// BEGINNING OF LEGACY MR METHODS

#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
void* allocate(std::size_t bytes) { return this->allocate_sync(bytes); }

void* allocate(std::size_t bytes, std::size_t alignment)
Expand Down Expand Up @@ -140,9 +122,8 @@ class cccl_async_resource_ref : public ResourceType {
return this->deallocate(stream, ptr, bytes, alignment);
}

// END OF LEGACY MR METHODS
#endif // RMM_ENABLE_LEGACY_MR_INTERFACE

#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
void* allocate_sync(std::size_t bytes) { return base::allocate_sync(bytes); }

void* allocate_sync(std::size_t bytes, std::size_t alignment)
Expand Down Expand Up @@ -182,47 +163,6 @@ class cccl_async_resource_ref : public ResourceType {
{
return base::deallocate(stream, ptr, bytes, alignment);
}
#else
void* allocate_sync(std::size_t bytes) { return base::allocate(bytes); }

void* allocate_sync(std::size_t bytes, std::size_t alignment)
{
return base::allocate(bytes, alignment);
}

void deallocate_sync(void* ptr, std::size_t bytes) noexcept
{
return base::deallocate(ptr, bytes);
}

void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept
{
return base::deallocate(ptr, bytes, alignment);
}

void* allocate(cuda_stream_view stream, std::size_t bytes)
{
return base::allocate_async(bytes, stream);
}

void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment)
{
return base::allocate_async(bytes, alignment, stream);
}

void deallocate(cuda_stream_view stream, void* ptr, std::size_t bytes) noexcept
{
return base::deallocate_async(ptr, bytes, stream);
}

void deallocate(cuda_stream_view stream,
void* ptr,
std::size_t bytes,
std::size_t alignment) noexcept
{
return base::deallocate_async(ptr, bytes, alignment, stream);
}
#endif
};

} // namespace detail
Expand Down
11 changes: 0 additions & 11 deletions cpp/include/rmm/detail/cuda_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ namespace RMM_NAMESPACE {
namespace detail {
namespace polyfill {

#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
template <class Resource>
inline constexpr bool resource = cuda::mr::synchronous_resource<Resource>;
template <class Resource, class... Properties>
Expand All @@ -37,16 +36,6 @@ template <class Resource>
inline constexpr bool async_resource = cuda::mr::resource<Resource>;
template <class Resource, class... Properties>
inline constexpr bool async_resource_with = cuda::mr::resource_with<Resource, Properties...>;
#else // ^^^ CCCL >= 3.1 ^^^ / vvv CCCL < 3.1 vvv
template <class Resource>
inline constexpr bool resource = cuda::mr::resource<Resource>;
template <class Resource, class... Properties>
inline constexpr bool resource_with = cuda::mr::resource_with<Resource, Properties...>;
template <class Resource>
inline constexpr bool async_resource = cuda::mr::async_resource<Resource>;
template <class Resource, class... Properties>
inline constexpr bool async_resource_with = cuda::mr::async_resource_with<Resource, Properties...>;
#endif // CCCL < 3.1

} // namespace polyfill
} // namespace detail
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
{
void* ptr{nullptr};
ptr = pool_.allocate(bytes, stream);
ptr = pool_.allocate(stream, bytes);
return ptr;
}

Expand All @@ -194,7 +194,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
*/
void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override
{
pool_.deallocate(ptr, bytes, stream);
pool_.deallocate(stream, ptr, bytes);
}

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/rmm/mr/device/detail/arena.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ class global_arena final {
~global_arena()
{
std::lock_guard lock(mtx_);
upstream_mr_.deallocate(upstream_block_.pointer(), upstream_block_.size());
upstream_mr_.deallocate_sync(upstream_block_.pointer(), upstream_block_.size());
}

/**
Expand Down Expand Up @@ -701,7 +701,7 @@ class global_arena final {
*/
void initialize(std::size_t size)
{
upstream_block_ = {upstream_mr_.allocate(size), size};
upstream_block_ = {upstream_mr_.allocate_sync(size), size};
superblocks_.emplace(upstream_block_.pointer(), size);
}

Expand Down
42 changes: 20 additions & 22 deletions cpp/include/rmm/mr/device/device_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ class device_memory_resource {
device_memory_resource& operator=(device_memory_resource&&) noexcept =
default; ///< @default_move_assignment{device_memory_resource}

#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
/**
* @brief Allocates memory of size at least \p bytes.
*
Expand Down Expand Up @@ -151,24 +152,6 @@ class device_memory_resource {
do_deallocate(ptr, bytes, stream);
}

/**
* @brief Compare this resource to another.
*
* Two device_memory_resources compare equal if and only if memory allocated
* from one device_memory_resource can be deallocated from the other and vice
* versa.
*
* By default, simply checks if \p *this and \p other refer to the same
* object, i.e., does not check if they are two objects of the same class.
*
* @param other The other resource to compare to
* @returns If the two resources are equivalent
*/
[[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept
{
return do_is_equal(other);
}

/**
* @brief Allocates memory of size at least \p bytes.
*
Expand Down Expand Up @@ -293,9 +276,7 @@ class device_memory_resource {
RMM_FUNC_RANGE();
do_deallocate(ptr, bytes, stream);
}

#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
// CCCL >= 3.1 needs a different set of methods to satisfy the memory resource concepts
#endif // RMM_ENABLE_LEGACY_MR_INTERFACE

/**
* @brief Allocates memory of size at least \p bytes.
Expand Down Expand Up @@ -369,7 +350,24 @@ class device_memory_resource {
{
do_deallocate(ptr, rmm::align_up(bytes, alignment), stream);
}
#endif // CCCL >= 3.1

/**
* @brief Compare this resource to another.
*
* Two device_memory_resources compare equal if and only if memory allocated
* from one device_memory_resource can be deallocated from the other and vice
* versa.
*
* By default, simply checks if \p *this and \p other refer to the same
* object, i.e., does not check if they are two objects of the same class.
*
* @param other The other resource to compare to
* @returns If the two resources are equivalent
*/
[[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept
{
return do_is_equal(other);
}

/**
* @brief Comparison operator with another device_memory_resource
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/rmm/mr/device/owning_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class owning_wrapper : public device_memory_resource {
*/
void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
{
return wrapped().allocate(bytes, stream);
return wrapped().allocate(stream, bytes);
}

/**
Expand All @@ -184,7 +184,7 @@ class owning_wrapper : public device_memory_resource {
*/
void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
{
wrapped().deallocate(ptr, bytes, stream);
wrapped().deallocate(stream, ptr, bytes);
}

/**
Expand Down
83 changes: 41 additions & 42 deletions cpp/include/rmm/mr/host/host_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class host_memory_resource {
/**
* @brief Allocates memory on the host of size at least `bytes` bytes.
*
* The returned storage is aligned to the specified `alignment` if supported, and to
* The returned storage is aligned to the specified `alignment` if provided, and to
* `alignof(std::max_align_t)` otherwise.
*
* @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
Expand All @@ -86,7 +86,7 @@ class host_memory_resource {
/**
* @brief Deallocate memory pointed to by `ptr`.
*
* `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
* `ptr` must have been returned by a prior call to `allocate(bytes, alignment)` on a
* `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
* yet have been deallocated, otherwise behavior is undefined.
*
Expand All @@ -104,6 +104,45 @@ class host_memory_resource {
do_deallocate(ptr, bytes, alignment);
}

/**
* @brief Allocates memory on the host of size at least `bytes` bytes.
*
* The returned storage is aligned to the specified `alignment` if provided, and to
* `alignof(std::max_align_t)` otherwise.
*
* @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
*
* @param bytes The size of the allocation
* @param alignment Alignment of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
{
RMM_FUNC_RANGE();
return do_allocate(bytes, alignment);
}

/**
* @brief Deallocate memory pointed to by `ptr`.
*
* `ptr` must have been returned by a prior call to `allocate(bytes, alignment)` on a
* `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
* yet have been deallocated, otherwise behavior is undefined.
*
* @param ptr Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
* that was passed to the `allocate` call that returned `ptr`.
* @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
* that was passed to the `allocate` call that returned `ptr`.
*/
void deallocate_sync(void* ptr,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t)) noexcept
{
RMM_FUNC_RANGE();
do_deallocate(ptr, bytes, alignment);
}

/**
* @brief Compare this resource to another.
*
Expand Down Expand Up @@ -201,46 +240,6 @@ class host_memory_resource {
{
return this == &other;
}

#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)

public:
/**
* @brief Allocates memory on the host of size at least `bytes` bytes.
*
* The returned storage is aligned to the specified `alignment` if supported, and to
* `alignof(std::max_align_t)` otherwise.
*
* @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
*
* @param bytes The size of the allocation
* @param alignment Alignment of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* allocate_sync(std::size_t bytes, std::size_t alignment)
{
return allocate(bytes, alignment);
}

/**
* @brief Deallocate memory pointed to by `ptr`.
*
* `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
* `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
* yet have been deallocated, otherwise behavior is undefined.
*
* @param ptr Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
* that was passed to the `allocate` call that returned `ptr`.
* @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
* that was passed to the `allocate` call that returned `ptr`.
*/
void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)
{
return deallocate(ptr, bytes, alignment);
}

#endif
};

// static property checks
Expand Down
Loading