diff --git a/README.md b/README.md index 6c00e6c9a..69f3783ea 100644 --- a/README.md +++ b/README.md @@ -482,7 +482,7 @@ rmm::mr::polymorphic_allocator stream_alloc; // Constructs an adaptor that forwards all (de)allocations to `stream_alloc` on `stream`. auto adapted = rmm::mr::stream_allocator_adaptor(stream_alloc, stream); -// Allocates 100 bytes using `stream_alloc` on `stream` +// Allocates storage for 100 ints using `stream_alloc` on `stream` auto p = adapted.allocate(100); ... // Deallocates using `stream_alloc` on `stream` diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fb3f6455d..0f31cfe41 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -44,6 +44,7 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF) # This is mostly so that dependent libraries are configured in shared mode for downstream dependents # of RMM that get their common dependencies transitively. option(BUILD_SHARED_LIBS "Build RMM shared libraries" ON) +option(RMM_ENABLE_LEGACY_MR_INTERFACE "Enable legacy memory resource interface" ON) set(RMM_LOGGING_LEVEL "INFO" CACHE STRING "Choose the logging level.") @@ -54,6 +55,7 @@ message(VERBOSE "RMM: Build with NVTX support: ${RMM_NVTX}") # Set logging level. Must go before including gtests and benchmarks. Set the possible values of # build type for cmake-gui. message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'") +message(STATUS "RMM: Legacy MR interface enabled: ${RMM_ENABLE_LEGACY_MR_INTERFACE}") # cudart can be linked statically or dynamically option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) @@ -132,6 +134,11 @@ if(RMM_NVTX) target_compile_definitions(rmm PUBLIC RMM_NVTX) endif() +# Control legacy MR interface visibility +if(RMM_ENABLE_LEGACY_MR_INTERFACE) + target_compile_definitions(rmm PUBLIC RMM_ENABLE_LEGACY_MR_INTERFACE) +endif() + # ################################################################################################## # * tests and benchmarks --------------------------------------------------------------------------- diff --git a/cpp/benchmarks/async_priming/async_priming_bench.cpp b/cpp/benchmarks/async_priming/async_priming_bench.cpp index e391c77c6..c2fc27bfb 100644 --- a/cpp/benchmarks/async_priming/async_priming_bench.cpp +++ b/cpp/benchmarks/async_priming/async_priming_bench.cpp @@ -83,7 +83,7 @@ void BM_AsyncPrimingImpact(benchmark::State& state, MRFactoryFunc factory) // Deallocate all for (auto* ptr : allocations) { - mr->deallocate(ptr, allocation_size); + mr->deallocate_sync(ptr, allocation_size); } allocations.clear(); @@ -118,7 +118,7 @@ void BM_AsyncPrimingImpact(benchmark::State& state, MRFactoryFunc factory) // Clean up for next iteration for (auto* ptr : allocations) { - mr->deallocate(ptr, allocation_size); + mr->deallocate_sync(ptr, allocation_size); } allocations.clear(); } diff --git a/cpp/benchmarks/random_allocations/random_allocations.cpp b/cpp/benchmarks/random_allocations/random_allocations.cpp index 2971f7e40..d44966b22 100644 --- a/cpp/benchmarks/random_allocations/random_allocations.cpp +++ b/cpp/benchmarks/random_allocations/random_allocations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,7 +94,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, void* ptr = nullptr; if (do_alloc) { // try to allocate try { - ptr = mr.allocate(size, stream); + ptr = mr.allocate(stream, size); } catch (rmm::bad_alloc const&) { do_alloc = false; #if VERBOSE @@ -118,7 +118,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, std::size_t index = index_distribution(generator) % active_allocations; active_allocations--; allocation to_free = remove_at(allocations, index); - mr.deallocate(to_free.ptr, to_free.size, stream); + mr.deallocate(stream, to_free.ptr, to_free.size); allocation_size -= to_free.size; #if VERBOSE diff --git a/cpp/include/rmm/aligned.hpp b/cpp/include/rmm/aligned.hpp index 57e91c217..f58a66ebb 100644 --- a/cpp/include/rmm/aligned.hpp +++ b/cpp/include/rmm/aligned.hpp @@ -20,7 +20,6 @@ #include #include -#include namespace RMM_EXPORT rmm { diff --git a/cpp/include/rmm/detail/cccl_adaptors.hpp b/cpp/include/rmm/detail/cccl_adaptors.hpp index 77614848d..6803c4cbd 100644 --- a/cpp/include/rmm/detail/cccl_adaptors.hpp +++ b/cpp/include/rmm/detail/cccl_adaptors.hpp @@ -34,6 +34,7 @@ class cccl_resource_ref : public ResourceType { cccl_resource_ref(base&& other) : base(std::move(other)) {} +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE void* allocate(std::size_t bytes) { return this->allocate_sync(bytes); } void* allocate(std::size_t bytes, std::size_t alignment) @@ -50,8 +51,8 @@ class cccl_resource_ref : public ResourceType { { return this->deallocate_sync(ptr, bytes, alignment); } +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE -#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) void* allocate_sync(std::size_t bytes) { return base::allocate_sync(bytes); } void* allocate_sync(std::size_t bytes, std::size_t alignment) @@ -68,24 +69,6 @@ class cccl_resource_ref : public ResourceType { { return base::deallocate_sync(ptr, bytes, alignment); } -#else - void* allocate_sync(std::size_t bytes) { return base::allocate(bytes); } - - void* allocate_sync(std::size_t bytes, std::size_t alignment) - { - return base::allocate(bytes, alignment); - } - - void deallocate_sync(void* ptr, std::size_t bytes) noexcept - { - return base::deallocate(ptr, bytes); - } - - void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept - { - return base::deallocate(ptr, bytes, alignment); - } -#endif }; template @@ -98,8 +81,7 @@ class cccl_async_resource_ref : public ResourceType { cccl_async_resource_ref(base const& other) : base(other) {} cccl_async_resource_ref(base&& other) : base(std::move(other)) {} - // BEGINNING OF LEGACY MR METHODS - +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE void* allocate(std::size_t bytes) { return this->allocate_sync(bytes); } void* allocate(std::size_t bytes, std::size_t alignment) @@ -140,9 +122,8 @@ class cccl_async_resource_ref : public ResourceType { return this->deallocate(stream, ptr, bytes, alignment); } - // END OF LEGACY MR METHODS +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE -#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) void* allocate_sync(std::size_t bytes) { return base::allocate_sync(bytes); } void* allocate_sync(std::size_t bytes, std::size_t alignment) @@ -182,47 +163,6 @@ class cccl_async_resource_ref : public ResourceType { { return base::deallocate(stream, ptr, bytes, alignment); } -#else - void* allocate_sync(std::size_t bytes) { return base::allocate(bytes); } - - void* allocate_sync(std::size_t bytes, std::size_t alignment) - { - return base::allocate(bytes, alignment); - } - - void deallocate_sync(void* ptr, std::size_t bytes) noexcept - { - return base::deallocate(ptr, bytes); - } - - void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept - { - return base::deallocate(ptr, bytes, alignment); - } - - void* allocate(cuda_stream_view stream, std::size_t bytes) - { - return base::allocate_async(bytes, stream); - } - - void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment) - { - return base::allocate_async(bytes, alignment, stream); - } - - void deallocate(cuda_stream_view stream, void* ptr, std::size_t bytes) noexcept - { - return base::deallocate_async(ptr, bytes, stream); - } - - void deallocate(cuda_stream_view stream, - void* ptr, - std::size_t bytes, - std::size_t alignment) noexcept - { - return base::deallocate_async(ptr, bytes, alignment, stream); - } -#endif }; } // namespace detail diff --git a/cpp/include/rmm/detail/cuda_memory_resource.hpp b/cpp/include/rmm/detail/cuda_memory_resource.hpp index fedce42fe..a4319742b 100644 --- a/cpp/include/rmm/detail/cuda_memory_resource.hpp +++ b/cpp/include/rmm/detail/cuda_memory_resource.hpp @@ -18,7 +18,7 @@ #ifndef LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE #error \ "RMM requires LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE to be defined. Please add -DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE to the compiler flags (this is done automatically when using RMM via CMake)." -#endif +#endif // LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE #include @@ -28,7 +28,6 @@ namespace RMM_NAMESPACE { namespace detail { namespace polyfill { -#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) template inline constexpr bool resource = cuda::mr::synchronous_resource; template @@ -37,16 +36,6 @@ template inline constexpr bool async_resource = cuda::mr::resource; template inline constexpr bool async_resource_with = cuda::mr::resource_with; -#else // ^^^ CCCL >= 3.1 ^^^ / vvv CCCL < 3.1 vvv -template -inline constexpr bool resource = cuda::mr::resource; -template -inline constexpr bool resource_with = cuda::mr::resource_with; -template -inline constexpr bool async_resource = cuda::mr::async_resource; -template -inline constexpr bool async_resource_with = cuda::mr::async_resource_with; -#endif // CCCL < 3.1 } // namespace polyfill } // namespace detail diff --git a/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp b/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp index 091e17804..5fbea1c7e 100644 --- a/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -143,10 +143,10 @@ class aligned_resource_adaptor final : public device_memory_resource { void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { - return get_upstream_resource().allocate_async(bytes, 1, stream); + return get_upstream_resource().allocate(stream, bytes, 1); } auto const size = upstream_allocation_size(bytes); - void* pointer = get_upstream_resource().allocate_async(size, 1, stream); + void* pointer = get_upstream_resource().allocate(stream, size, 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const address = reinterpret_cast(pointer); auto const aligned_address = rmm::align_up(address, alignment_); @@ -169,7 +169,7 @@ class aligned_resource_adaptor final : public device_memory_resource { void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { - get_upstream_resource().deallocate_async(ptr, bytes, 1, stream); + get_upstream_resource().deallocate(stream, ptr, bytes, 1); } else { { lock_guard lock(mtx_); @@ -179,7 +179,7 @@ class aligned_resource_adaptor final : public device_memory_resource { pointers_.erase(iter); } } - get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream); + get_upstream_resource().deallocate(stream, ptr, upstream_allocation_size(bytes), 1); } } diff --git a/cpp/include/rmm/mr/device/arena_memory_resource.hpp b/cpp/include/rmm/mr/device/arena_memory_resource.hpp index b9c1591dc..64c196954 100644 --- a/cpp/include/rmm/mr/device/arena_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/arena_memory_resource.hpp @@ -156,14 +156,14 @@ class arena_memory_resource final : public device_memory_resource { { std::shared_lock lock(mtx_); - void* pointer = arena.allocate(bytes); + void* pointer = arena.allocate_sync(bytes); if (pointer != nullptr) { return pointer; } } { std::unique_lock lock(mtx_); defragment(); - void* pointer = arena.allocate(bytes); + void* pointer = arena.allocate_sync(bytes); if (pointer == nullptr) { if (dump_log_on_failure_) { dump_memory_log(bytes); } auto const msg = std::string("Maximum pool size exceeded (failed to allocate ") + @@ -209,7 +209,7 @@ class arena_memory_resource final : public device_memory_resource { { std::shared_lock lock(mtx_); // If the memory being freed does not belong to the arena, the following will return false. - if (arena.deallocate(ptr, bytes, stream)) { return; } + if (arena.deallocate(stream, ptr, bytes)) { return; } } { @@ -218,31 +218,31 @@ class arena_memory_resource final : public device_memory_resource { stream.synchronize_no_throw(); std::unique_lock lock(mtx_); - deallocate_from_other_arena(ptr, bytes, stream); + deallocate_from_other_arena(stream, ptr, bytes); } } /** * @brief Deallocate memory pointed to by `ptr` that was allocated in a different arena. * + * @param stream Stream on which to perform deallocation. * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the * value of `bytes` that was passed to the `allocate` call that returned `ptr`. - * @param stream Stream on which to perform deallocation. */ - void deallocate_from_other_arena(void* ptr, std::size_t bytes, cuda_stream_view stream) + void deallocate_from_other_arena(cuda_stream_view stream, void* ptr, std::size_t bytes) { if (use_per_thread_arena(stream)) { for (auto const& thread_arena : thread_arenas_) { - if (thread_arena.second->deallocate(ptr, bytes)) { return; } + if (thread_arena.second->deallocate_sync(ptr, bytes)) { return; } } } else { for (auto& stream_arena : stream_arenas_) { - if (stream_arena.second.deallocate(ptr, bytes)) { return; } + if (stream_arena.second.deallocate_sync(ptr, bytes)) { return; } } } - if (!global_arena_.deallocate(ptr, bytes)) { + if (!global_arena_.deallocate_sync(ptr, bytes)) { // It's possible to use per thread default streams along with another pool of streams. // This means that it's possible for an allocation to move from a thread or stream arena // back into the global arena during a defragmentation and then move down into another arena @@ -253,11 +253,11 @@ class arena_memory_resource final : public device_memory_resource { // arenas all the time. if (use_per_thread_arena(stream)) { for (auto& stream_arena : stream_arenas_) { - if (stream_arena.second.deallocate(ptr, bytes)) { return; } + if (stream_arena.second.deallocate_sync(ptr, bytes)) { return; } } } else { for (auto const& thread_arena : thread_arenas_) { - if (thread_arena.second->deallocate(ptr, bytes)) { return; } + if (thread_arena.second->deallocate_sync(ptr, bytes)) { return; } } } RMM_FAIL("allocation not found"); diff --git a/cpp/include/rmm/mr/device/binning_memory_resource.hpp b/cpp/include/rmm/mr/device/binning_memory_resource.hpp index fe427aee3..81f514a8b 100644 --- a/cpp/include/rmm/mr/device/binning_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/binning_memory_resource.hpp @@ -196,7 +196,7 @@ class binning_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { if (bytes <= 0) { return nullptr; } - return get_resource_ref(bytes).allocate_async(bytes, stream); + return get_resource_ref(bytes).allocate(stream, bytes); } /** @@ -209,7 +209,7 @@ class binning_memory_resource final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { - get_resource_ref(bytes).deallocate_async(ptr, bytes, stream); + get_resource_ref(bytes).deallocate(stream, ptr, bytes); } device_async_resource_ref diff --git a/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp b/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp index 8c64883ca..c4401dd08 100644 --- a/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp @@ -99,7 +99,7 @@ class cuda_async_managed_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { - return pool_.allocate(bytes, stream); + return pool_.allocate(stream, bytes); } /** @@ -112,7 +112,7 @@ class cuda_async_managed_memory_resource final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override { - pool_.deallocate(ptr, bytes, stream); + pool_.deallocate(stream, ptr, bytes); } /** diff --git a/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp b/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp index eb7e6b4d2..e81013edd 100644 --- a/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -180,7 +180,7 @@ class cuda_async_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { void* ptr{nullptr}; - ptr = pool_.allocate(bytes, stream); + ptr = pool_.allocate(stream, bytes); return ptr; } @@ -194,7 +194,7 @@ class cuda_async_memory_resource final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override { - pool_.deallocate(ptr, bytes, stream); + pool_.deallocate(stream, ptr, bytes); } /** diff --git a/cpp/include/rmm/mr/device/detail/arena.hpp b/cpp/include/rmm/mr/device/detail/arena.hpp index 1b52387a4..4fd524165 100644 --- a/cpp/include/rmm/mr/device/detail/arena.hpp +++ b/cpp/include/rmm/mr/device/detail/arena.hpp @@ -525,7 +525,7 @@ class global_arena final { ~global_arena() { std::lock_guard lock(mtx_); - upstream_mr_.deallocate(upstream_block_.pointer(), upstream_block_.size()); + upstream_mr_.deallocate_sync(upstream_block_.pointer(), upstream_block_.size()); } /** @@ -583,7 +583,7 @@ class global_arena final { * @param size The size in bytes of the allocation. * @return void* Pointer to the newly allocated memory. */ - void* allocate(std::size_t size) + void* allocate_sync(std::size_t size) { RMM_LOGGING_ASSERT(handles(size)); std::lock_guard lock(mtx_); @@ -599,17 +599,17 @@ class global_arena final { /** * @brief Deallocate memory pointed to by `ptr`. * + * @param stream Stream on which to perform deallocation. * @param ptr Pointer to be deallocated. * @param size The size in bytes of the allocation. This must be equal to the value of `size` * that was passed to the `allocate` call that returned `p`. - * @param stream Stream on which to perform deallocation. * @return bool true if the allocation is found, false otherwise. */ - bool deallocate_async(void* ptr, std::size_t size, cuda_stream_view stream) + bool deallocate(cuda_stream_view stream, void* ptr, std::size_t size) { RMM_LOGGING_ASSERT(handles(size)); stream.synchronize_no_throw(); - return deallocate(ptr, size); + return deallocate_sync(ptr, size); } /** @@ -620,7 +620,7 @@ class global_arena final { * value of `bytes` that was passed to the `allocate` call that returned `ptr`. * @return bool true if the allocation is found, false otherwise. */ - bool deallocate(void* ptr, std::size_t bytes) + bool deallocate_sync(void* ptr, std::size_t bytes) { std::lock_guard lock(mtx_); @@ -701,7 +701,7 @@ class global_arena final { */ void initialize(std::size_t size) { - upstream_block_ = {upstream_mr_.allocate(size), size}; + upstream_block_ = {upstream_mr_.allocate_sync(size), size}; superblocks_.emplace(upstream_block_.pointer(), size); } @@ -814,9 +814,9 @@ class arena { * @param size The size in bytes of the allocation. * @return void* Pointer to the newly allocated memory. */ - void* allocate(std::size_t size) + void* allocate_sync(std::size_t size) { - if (global_arena_.handles(size)) { return global_arena_.allocate(size); } + if (global_arena_.handles(size)) { return global_arena_.allocate_sync(size); } std::lock_guard lock(mtx_); return get_block(size).pointer(); } @@ -824,18 +824,16 @@ class arena { /** * @brief Deallocate memory pointed to by `ptr`, and possibly return superblocks to upstream. * + * @param stream Stream on which to perform deallocation. * @param ptr Pointer to be deallocated. * @param size The size in bytes of the allocation. This must be equal to the value of `size` * that was passed to the `allocate` call that returned `p`. - * @param stream Stream on which to perform deallocation. * @return bool true if the allocation is found, false otherwise. */ - bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) + bool deallocate(cuda_stream_view stream, void* ptr, std::size_t size) { - if (global_arena::handles(size) && global_arena_.deallocate_async(ptr, size, stream)) { - return true; - } - return deallocate(ptr, size); + if (global_arena::handles(size) && global_arena_.deallocate(stream, ptr, size)) { return true; } + return deallocate_sync(ptr, size); } /** @@ -846,10 +844,10 @@ class arena { * that was passed to the `allocate` call that returned `p`. * @return bool true if the allocation is found, false otherwise. */ - bool deallocate(void* ptr, std::size_t size) + bool deallocate_sync(void* ptr, std::size_t size) { std::lock_guard lock(mtx_); - return deallocate_from_superblock({ptr, size}); + return deallocate_from_superblock_sync({ptr, size}); } /** @@ -925,7 +923,7 @@ class arena { * @param blk The block to deallocate. * @return true if the block is found. */ - bool deallocate_from_superblock(block const& blk) + bool deallocate_from_superblock_sync(block const& blk) { auto const iter = std::find_if(superblocks_.cbegin(), superblocks_.cend(), diff --git a/cpp/include/rmm/mr/device/device_memory_resource.hpp b/cpp/include/rmm/mr/device/device_memory_resource.hpp index 7b2e0acfb..0418cdda4 100644 --- a/cpp/include/rmm/mr/device/device_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/device_memory_resource.hpp @@ -103,6 +103,7 @@ class device_memory_resource { device_memory_resource& operator=(device_memory_resource&&) noexcept = default; ///< @default_move_assignment{device_memory_resource} +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE /** * @brief Allocates memory of size at least \p bytes. * @@ -151,24 +152,6 @@ class device_memory_resource { do_deallocate(ptr, bytes, stream); } - /** - * @brief Compare this resource to another. - * - * Two device_memory_resources compare equal if and only if memory allocated - * from one device_memory_resource can be deallocated from the other and vice - * versa. - * - * By default, simply checks if \p *this and \p other refer to the same - * object, i.e., does not check if they are two objects of the same class. - * - * @param other The other resource to compare to - * @returns If the two resources are equivalent - */ - [[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept - { - return do_is_equal(other); - } - /** * @brief Allocates memory of size at least \p bytes. * @@ -293,9 +276,7 @@ class device_memory_resource { RMM_FUNC_RANGE(); do_deallocate(ptr, bytes, stream); } - -#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) - // CCCL >= 3.1 needs a different set of methods to satisfy the memory resource concepts +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE /** * @brief Allocates memory of size at least \p bytes. @@ -369,7 +350,24 @@ class device_memory_resource { { do_deallocate(ptr, rmm::align_up(bytes, alignment), stream); } -#endif // CCCL >= 3.1 + + /** + * @brief Compare this resource to another. + * + * Two device_memory_resources compare equal if and only if memory allocated + * from one device_memory_resource can be deallocated from the other and vice + * versa. + * + * By default, simply checks if \p *this and \p other refer to the same + * object, i.e., does not check if they are two objects of the same class. + * + * @param other The other resource to compare to + * @returns If the two resources are equivalent + */ + [[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept + { + return do_is_equal(other); + } /** * @brief Comparison operator with another device_memory_resource diff --git a/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp index f1948daca..70dbeab29 100644 --- a/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp @@ -165,7 +165,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource { while (true) { try { - ret = get_upstream_resource().allocate_async(bytes, stream); + ret = get_upstream_resource().allocate(stream, bytes); break; } catch (exception_type const& e) { if (!callback_(bytes, callback_arg_)) { throw; } @@ -183,7 +183,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { - get_upstream_resource().deallocate_async(ptr, bytes, stream); + get_upstream_resource().deallocate(stream, ptr, bytes); } /** diff --git a/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp b/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp index b59e5c312..2d845d5b1 100644 --- a/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -176,7 +176,7 @@ class fixed_size_memory_resource */ free_list blocks_from_upstream(cuda_stream_view stream) { - void* ptr = get_upstream_resource().allocate_async(upstream_chunk_size_, stream); + void* ptr = get_upstream_resource().allocate(stream, upstream_chunk_size_); block_type block{ptr}; upstream_blocks_.push_back(block); @@ -231,7 +231,7 @@ class fixed_size_memory_resource lock_guard lock(this->get_mutex()); for (auto block : upstream_blocks_) { - get_upstream_resource().deallocate(block.pointer(), upstream_chunk_size_); + get_upstream_resource().deallocate_sync(block.pointer(), upstream_chunk_size_); } upstream_blocks_.clear(); } diff --git a/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp b/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp index 891fcd048..02e924dda 100644 --- a/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -143,7 +143,7 @@ class limiting_resource_adaptor final : public device_memory_resource { auto const old = allocated_bytes_.fetch_add(proposed_size); if (old + proposed_size <= allocation_limit_) { try { - return get_upstream_resource().allocate_async(bytes, stream); + return get_upstream_resource().allocate(stream, bytes); } catch (...) { allocated_bytes_ -= proposed_size; throw; @@ -166,7 +166,7 @@ class limiting_resource_adaptor final : public device_memory_resource { void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { std::size_t allocated_size = align_up(bytes, alignment_); - get_upstream_resource().deallocate_async(ptr, bytes, stream); + get_upstream_resource().deallocate(stream, ptr, bytes); allocated_bytes_ -= allocated_size; } diff --git a/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp b/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp index 9723631e9..6d675fdd5 100644 --- a/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -287,7 +287,7 @@ class logging_resource_adaptor final : public device_memory_resource { void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { try { - auto const ptr = get_upstream_resource().allocate_async(bytes, stream); + auto const ptr = get_upstream_resource().allocate(stream, bytes); logger_->info("allocate,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream)); return ptr; } catch (...) { @@ -314,7 +314,7 @@ class logging_resource_adaptor final : public device_memory_resource { void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { logger_->info("free,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream)); - get_upstream_resource().deallocate_async(ptr, bytes, stream); + get_upstream_resource().deallocate(stream, ptr, bytes); } /** diff --git a/cpp/include/rmm/mr/device/owning_wrapper.hpp b/cpp/include/rmm/mr/device/owning_wrapper.hpp index 5c4737415..41fcb5ec5 100644 --- a/cpp/include/rmm/mr/device/owning_wrapper.hpp +++ b/cpp/include/rmm/mr/device/owning_wrapper.hpp @@ -170,7 +170,7 @@ class owning_wrapper : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - return wrapped().allocate(bytes, stream); + return wrapped().allocate(stream, bytes); } /** @@ -184,7 +184,7 @@ class owning_wrapper : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { - wrapped().deallocate(ptr, bytes, stream); + wrapped().deallocate(stream, ptr, bytes); } /** diff --git a/cpp/include/rmm/mr/device/polymorphic_allocator.hpp b/cpp/include/rmm/mr/device/polymorphic_allocator.hpp index 6f5e8caf6..ea5b65461 100644 --- a/cpp/include/rmm/mr/device/polymorphic_allocator.hpp +++ b/cpp/include/rmm/mr/device/polymorphic_allocator.hpp @@ -86,8 +86,7 @@ class polymorphic_allocator { */ value_type* allocate(std::size_t num, cuda_stream_view stream) { - return static_cast( - get_upstream_resource().allocate_async(num * sizeof(T), stream)); + return static_cast(get_upstream_resource().allocate(stream, num * sizeof(T))); } /** @@ -102,7 +101,7 @@ class polymorphic_allocator { */ void deallocate(value_type* ptr, std::size_t num, cuda_stream_view stream) noexcept { - get_upstream_resource().deallocate_async(ptr, num * sizeof(T), stream); + get_upstream_resource().deallocate(stream, ptr, num * sizeof(T)); } /** diff --git a/cpp/include/rmm/mr/device/pool_memory_resource.hpp b/cpp/include/rmm/mr/device/pool_memory_resource.hpp index e06e1eb7d..d3edc05e0 100644 --- a/cpp/include/rmm/mr/device/pool_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/pool_memory_resource.hpp @@ -352,7 +352,7 @@ class pool_memory_resource final * * @param size The size in bytes to allocate from the upstream resource * @param stream The stream on which the memory is to be used. - * @throws if call to allocate_async() throws + * @throws if call to allocate() throws * @return block_type The allocated block */ block_type block_from_upstream(std::size_t size, cuda_stream_view stream) @@ -361,7 +361,7 @@ class pool_memory_resource final if (size == 0) { return {}; } - void* ptr = get_upstream_resource().allocate_async(size, stream); + void* ptr = get_upstream_resource().allocate(stream, size); return *upstream_blocks_.emplace(static_cast(ptr), size, true).first; } @@ -424,7 +424,7 @@ class pool_memory_resource final lock_guard lock(this->get_mutex()); for (auto block : upstream_blocks_) { - get_upstream_resource().deallocate(block.pointer(), block.size()); + get_upstream_resource().deallocate_sync(block.pointer(), block.size()); } upstream_blocks_.clear(); #ifdef RMM_POOL_TRACK_ALLOCATIONS diff --git a/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp b/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp index bee009fa2..bf6cb3204 100644 --- a/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp @@ -92,7 +92,7 @@ class prefetch_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* ptr = get_upstream_resource().allocate_async(bytes, stream); + void* ptr = get_upstream_resource().allocate(stream, bytes); rmm::prefetch(ptr, bytes, rmm::get_current_cuda_device(), stream); return ptr; } @@ -106,7 +106,7 @@ class prefetch_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { - get_upstream_resource().deallocate_async(ptr, bytes, stream); + get_upstream_resource().deallocate(stream, ptr, bytes); } /** diff --git a/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp b/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp index f554b22ae..46241d625 100644 --- a/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp +++ b/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp @@ -75,7 +75,7 @@ class sam_headroom_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override { - void* pointer = system_mr_.allocate_async(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); + void* pointer = system_mr_.allocate(stream, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); auto const free = rmm::available_device_memory().first; auto const allocatable = free > headroom_ ? free - headroom_ : 0UL; @@ -126,7 +126,7 @@ class sam_headroom_memory_resource final : public device_memory_resource { [[maybe_unused]] std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) noexcept override { - system_mr_.deallocate_async(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); + system_mr_.deallocate(stream, ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); } /** diff --git a/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp b/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp index acededa82..a37df9a8e 100644 --- a/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp @@ -230,7 +230,7 @@ class statistics_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* ptr = get_upstream_resource().allocate_async(bytes, stream); + void* ptr = get_upstream_resource().allocate(stream, bytes); // increment the stats { @@ -253,7 +253,7 @@ class statistics_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { - get_upstream_resource().deallocate_async(ptr, bytes, stream); + get_upstream_resource().deallocate(stream, ptr, bytes); { write_lock_t lock(mtx_); diff --git a/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp index 5176e3169..3bf0ac077 100644 --- a/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp @@ -100,7 +100,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource { void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { lock_t lock(mtx); - return get_upstream_resource().allocate_async(bytes, stream); + return get_upstream_resource().allocate(stream, bytes); } /** @@ -113,7 +113,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource { void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { lock_t lock(mtx); - get_upstream_resource().deallocate_async(ptr, bytes, stream); + get_upstream_resource().deallocate(stream, ptr, bytes); } /** diff --git a/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp index 197941270..4778daf44 100644 --- a/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp +++ b/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp @@ -111,8 +111,7 @@ class thrust_allocator : public thrust::device_malloc_allocator { pointer allocate(size_type num) { cuda_set_device_raii dev{_device}; - return thrust::device_pointer_cast( - static_cast(_mr.allocate_async(num * sizeof(T), _stream))); + return thrust::device_pointer_cast(static_cast(_mr.allocate(_stream, num * sizeof(T)))); } /** @@ -125,7 +124,7 @@ class thrust_allocator : public thrust::device_malloc_allocator { void deallocate(pointer ptr, size_type num) noexcept { cuda_set_device_raii dev{_device}; - return _mr.deallocate_async(thrust::raw_pointer_cast(ptr), num * sizeof(T), _stream); + return _mr.deallocate(_stream, thrust::raw_pointer_cast(ptr), num * sizeof(T)); } /** diff --git a/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp b/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp index 37e17e2d9..86fac81d6 100644 --- a/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -207,7 +207,7 @@ class tracking_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* ptr = get_upstream_resource().allocate_async(bytes, stream); + void* ptr = get_upstream_resource().allocate(stream, bytes); // track it. { write_lock_t lock(mtx_); @@ -227,7 +227,7 @@ class tracking_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override { - get_upstream_resource().deallocate_async(ptr, bytes, stream); + get_upstream_resource().deallocate(stream, ptr, bytes); { write_lock_t lock(mtx_); diff --git a/cpp/include/rmm/mr/host/host_memory_resource.hpp b/cpp/include/rmm/mr/host/host_memory_resource.hpp index 30741fd07..bd50fd5f0 100644 --- a/cpp/include/rmm/mr/host/host_memory_resource.hpp +++ b/cpp/include/rmm/mr/host/host_memory_resource.hpp @@ -65,10 +65,11 @@ class host_memory_resource { host_memory_resource& operator=(host_memory_resource&&) noexcept = default; ///< @default_move_assignment{host_memory_resource} +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE /** * @brief Allocates memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, and to + * The returned storage is aligned to the specified `alignment` if provided, and to * `alignof(std::max_align_t)` otherwise. * * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. @@ -86,7 +87,7 @@ class host_memory_resource { /** * @brief Deallocate memory pointed to by `ptr`. * - * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `ptr` must have been returned by a prior call to `allocate(bytes, alignment)` on a * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not * yet have been deallocated, otherwise behavior is undefined. * @@ -103,6 +104,46 @@ class host_memory_resource { RMM_FUNC_RANGE(); do_deallocate(ptr, bytes, alignment); } +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE + + /** + * @brief Allocates memory on the host of size at least `bytes` bytes. + * + * The returned storage is aligned to the specified `alignment` if provided, and to + * `alignof(std::max_align_t)` otherwise. + * + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. + * + * @param bytes The size of the allocation + * @param alignment Alignment of the allocation + * @return void* Pointer to the newly allocated memory + */ + void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) + { + RMM_FUNC_RANGE(); + return do_allocate(bytes, alignment); + } + + /** + * @brief Deallocate memory pointed to by `ptr`. + * + * `ptr` must have been returned by a prior call to `allocate(bytes, alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. + * + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. + */ + void deallocate_sync(void* ptr, + std::size_t bytes, + std::size_t alignment = alignof(std::max_align_t)) noexcept + { + RMM_FUNC_RANGE(); + do_deallocate(ptr, bytes, alignment); + } /** * @brief Compare this resource to another. @@ -201,46 +242,6 @@ class host_memory_resource { { return this == &other; } - -#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) - - public: - /** - * @brief Allocates memory on the host of size at least `bytes` bytes. - * - * The returned storage is aligned to the specified `alignment` if supported, and to - * `alignof(std::max_align_t)` otherwise. - * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. - * - * @param bytes The size of the allocation - * @param alignment Alignment of the allocation - * @return void* Pointer to the newly allocated memory - */ - void* allocate_sync(std::size_t bytes, std::size_t alignment) - { - return allocate(bytes, alignment); - } - - /** - * @brief Deallocate memory pointed to by `ptr`. - * - * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a - * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not - * yet have been deallocated, otherwise behavior is undefined. - * - * @param ptr Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` - * that was passed to the `allocate` call that returned `ptr`. - * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` - * that was passed to the `allocate` call that returned `ptr`. - */ - void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) - { - return deallocate(ptr, bytes, alignment); - } - -#endif }; // static property checks diff --git a/cpp/include/rmm/mr/host/pinned_memory_resource.hpp b/cpp/include/rmm/mr/host/pinned_memory_resource.hpp index 62699da92..19a12cfe8 100644 --- a/cpp/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/cpp/include/rmm/mr/host/pinned_memory_resource.hpp @@ -49,6 +49,7 @@ class pinned_memory_resource final : public host_memory_resource { pinned_memory_resource& operator=(pinned_memory_resource&&) = default; ///< @default_move_assignment{pinned_memory_resource} +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE /** * @brief Pretend to support the allocate_async interface, falling back to stream 0 * @@ -94,6 +95,48 @@ class pinned_memory_resource final : public host_memory_resource { do_deallocate(ptr, rmm::align_up(bytes, alignment)); } + // Explicitly inherit the allocate and deallocate functions from the host_memory_resource class. + // Due to inheritance and name hiding rules, we need to declare these with "using" when we + // override allocate and deallocate for CCCL 3.1.0+ compatibility. + using host_memory_resource::allocate; + using host_memory_resource::deallocate; +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE + + /** + * @brief Pretend to support the allocate_async interface, falling back to stream 0 + * + * @throws rmm::bad_alloc When the requested `bytes` cannot be allocated on + * the specified `stream`. + * + * @param stream CUDA stream on which to perform the deallocation (ignored). + * @param bytes The size of the allocation + * @param alignment The expected alignment of the allocation + * @return void* Pointer to the newly allocated memory + */ + void* allocate(cuda_stream_view stream, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) + { + return do_allocate(bytes, alignment); + } + + /** + * @brief Pretend to support the deallocate_async interface, falling back to stream 0 + * + * @param stream CUDA stream on which to perform the deallocation (ignored). + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the + * value of `bytes` that was passed to the `allocate` call that returned `p`. + * @param alignment The alignment that was passed to the `allocate` call that returned `p` + */ + void deallocate(cuda_stream_view stream, + void* ptr, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept + { + return do_deallocate(ptr, bytes, alignment); + } + /** * @brief Enables the `cuda::mr::device_accessible` property * @@ -106,7 +149,7 @@ class pinned_memory_resource final : public host_memory_resource { * @brief Allocates pinned memory on the host of size at least `bytes` bytes. * * The returned storage is aligned to the specified `alignment` if supported, and to - * `alignof(std::max_align_t)` otherwise. + * `rmm::RMM_DEFAULT_HOST_ALIGNMENT` otherwise. * * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * @@ -114,7 +157,8 @@ class pinned_memory_resource final : public host_memory_resource { * @param alignment Alignment of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) override + void* do_allocate(std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { // don't allocate anything if the user requested zero bytes if (0 == bytes) { return nullptr; } @@ -145,56 +189,12 @@ class pinned_memory_resource final : public host_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, - std::size_t alignment = alignof(std::max_align_t)) noexcept override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept override { if (nullptr == ptr) { return; } rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } - -#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) - - public: - // Explicitly inherit the allocate and deallocate functions from the host_memory_resource class. - // Due to inheritance and name hiding rules, we need to declare these with "using" when we - // override allocate and deallocate for CCCL 3.1.0+ compatibility. - using host_memory_resource::allocate; - using host_memory_resource::deallocate; - - /** - * @brief Pretend to support the allocate_async interface, falling back to stream 0 - * - * @throws rmm::bad_alloc When the requested `bytes` cannot be allocated on - * the specified `stream`. - * - * @param stream CUDA stream on which to perform the deallocation (ignored). - * @param bytes The size of the allocation - * @param alignment The expected alignment of the allocation - * @return void* Pointer to the newly allocated memory - */ - void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment) - { - return this->allocate_async(bytes, alignment, stream); - } - - /** - * @brief Pretend to support the deallocate_async interface, falling back to stream 0 - * - * @param stream CUDA stream on which to perform the deallocation (ignored). - * @param ptr Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment The alignment that was passed to the `allocate` call that returned `p` - */ - void deallocate(cuda_stream_view stream, - void* ptr, - std::size_t bytes, - std::size_t alignment) noexcept - { - return this->deallocate_async(ptr, bytes, alignment, stream); - } - -#endif }; // static property checks diff --git a/cpp/include/rmm/mr/pinned_host_memory_resource.hpp b/cpp/include/rmm/mr/pinned_host_memory_resource.hpp index 60f943054..fa3d67c82 100644 --- a/cpp/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/cpp/include/rmm/mr/pinned_host_memory_resource.hpp @@ -46,6 +46,7 @@ namespace mr { */ class pinned_host_memory_resource { public: +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE // Disable clang-tidy complaining about the easily swappable size and alignment parameters // of allocate and deallocate // NOLINTBEGIN(bugprone-easily-swappable-parameters) @@ -66,16 +67,7 @@ class pinned_host_memory_resource { static void* allocate(std::size_t bytes, [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { - RMM_FUNC_RANGE(); - - // don't allocate anything if the user requested zero bytes - if (0 == bytes) { return nullptr; } - - return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { - void* ptr{nullptr}; - RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault), size); - return ptr; - }); + return allocate_sync(bytes, alignment); } /** @@ -89,10 +81,7 @@ class pinned_host_memory_resource { std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { - RMM_FUNC_RANGE(); - - rmm::detail::aligned_host_deallocate( - ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); + return deallocate_sync(ptr, bytes, alignment); } /** @@ -111,9 +100,7 @@ class pinned_host_memory_resource { */ static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream) { - RMM_FUNC_RANGE(); - - return allocate(bytes); + return allocate(stream, bytes); } /** @@ -135,9 +122,7 @@ class pinned_host_memory_resource { std::size_t alignment, [[maybe_unused]] cuda::stream_ref stream) { - RMM_FUNC_RANGE(); - - return allocate(bytes, alignment); + return allocate(stream, bytes, alignment); } /** @@ -153,9 +138,7 @@ class pinned_host_memory_resource { std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream) noexcept { - RMM_FUNC_RANGE(); - - return deallocate(ptr, bytes); + return deallocate(stream, ptr, bytes); } /** @@ -174,44 +157,11 @@ class pinned_host_memory_resource { std::size_t alignment, [[maybe_unused]] cuda::stream_ref stream) noexcept { - RMM_FUNC_RANGE(); - - return deallocate(ptr, bytes, alignment); + return deallocate(stream, ptr, bytes, alignment); } // NOLINTEND(bugprone-easily-swappable-parameters) +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE - /** - * @briefreturn{true if the specified resource is the same type as this resource.} - */ - bool operator==(const pinned_host_memory_resource&) const { return true; } - - /** - * @briefreturn{true if the specified resource is not the same type as this resource, otherwise - * false.} - */ - bool operator!=(const pinned_host_memory_resource&) const { return false; } - - /** - * @brief Enables the `cuda::mr::device_accessible` property - * - * This property declares that a `pinned_host_memory_resource` provides device accessible memory - */ - friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept - { - } - - /** - * @brief Enables the `cuda::mr::host_accessible` property - * - * This property declares that a `pinned_host_memory_resource` provides host accessible memory - */ - friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept - { - } - -#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) - - public: /** * @brief Allocates pinned host memory of size at least \p bytes bytes. * @@ -225,9 +175,19 @@ class pinned_host_memory_resource { * * @return Pointer to the newly allocated memory. */ - static void* allocate_sync(std::size_t bytes, std::size_t alignment) + static void* allocate_sync(std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { - return allocate(bytes, alignment); + RMM_FUNC_RANGE(); + + // don't allocate anything if the user requested zero bytes + if (0 == bytes) { return nullptr; } + + return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { + void* ptr{nullptr}; + RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault), size); + return ptr; + }); } /** @@ -237,15 +197,19 @@ class pinned_host_memory_resource { * @param bytes Size of the allocation. * @param alignment Alignment in bytes. Default alignment is used if unspecified. */ - static void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) + static void deallocate_sync(void* ptr, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { - return deallocate(ptr, bytes, alignment); + RMM_FUNC_RANGE(); + rmm::detail::aligned_host_deallocate( + ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } /** * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment. * - * @note Stream argument is ignored and behavior is identical to allocate. + * @note Stream argument is ignored and behavior is identical to allocate_sync. * * @throws rmm::out_of_memory if the requested allocation could not be fulfilled due to to a * CUDA out of memory error. @@ -257,9 +221,12 @@ class pinned_host_memory_resource { * @param alignment Alignment in bytes. * @return Pointer to the newly allocated memory. */ - static void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment) + static void* allocate(cuda_stream_view stream, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { - return allocate_async(bytes, alignment, stream); + RMM_FUNC_RANGE(); + return allocate_sync(bytes, alignment); } /** @@ -276,12 +243,40 @@ class pinned_host_memory_resource { static void deallocate(cuda_stream_view stream, void* ptr, std::size_t bytes, - std::size_t alignment) noexcept + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { - return deallocate_async(ptr, bytes, alignment, stream); + RMM_FUNC_RANGE(); + return deallocate_sync(ptr, bytes, alignment); } -#endif + /** + * @briefreturn{true if the specified resource is the same type as this resource.} + */ + bool operator==(const pinned_host_memory_resource&) const { return true; } + + /** + * @briefreturn{true if the specified resource is not the same type as this resource, otherwise + * false.} + */ + bool operator!=(const pinned_host_memory_resource&) const { return false; } + + /** + * @brief Enables the `cuda::mr::device_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides device accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept + { + } + + /** + * @brief Enables the `cuda::mr::host_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides host accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept + { + } }; static_assert(rmm::detail::polyfill::async_resource_with 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1) - /** * @brief Alias for a `cuda::mr::resource_ref` with the property * `cuda::mr::device_accessible`. @@ -72,52 +70,6 @@ using host_device_resource_ref = detail::cccl_resource_ref< using host_device_async_resource_ref = detail::cccl_async_resource_ref< cuda::mr::resource_ref>; -#else - -/** - * @brief Alias for a `cuda::mr::resource_ref` with the property - * `cuda::mr::device_accessible`. - */ -using device_resource_ref = - detail::cccl_resource_ref>; - -/** - * @brief Alias for a `cuda::mr::async_resource_ref` with the property - * `cuda::mr::device_accessible`. - */ -using device_async_resource_ref = - detail::cccl_async_resource_ref>; - -/** - * @brief Alias for a `cuda::mr::resource_ref` with the property - * `cuda::mr::host_accessible`. - */ -using host_resource_ref = - detail::cccl_resource_ref>; - -/** - * @brief Alias for a `cuda::mr::async_resource_ref` with the property - * `cuda::mr::host_accessible`. - */ -using host_async_resource_ref = - detail::cccl_async_resource_ref>; - -/** - * @brief Alias for a `cuda::mr::resource_ref` with the properties - * `cuda::mr::host_accessible` and `cuda::mr::device_accessible`. - */ -using host_device_resource_ref = detail::cccl_resource_ref< - cuda::mr::resource_ref>; - -/** - * @brief Alias for a `cuda::mr::async_resource_ref` with the properties - * `cuda::mr::host_accessible` and `cuda::mr::device_accessible`. - */ -using host_device_async_resource_ref = detail::cccl_async_resource_ref< - cuda::mr::async_resource_ref>; - -#endif - /** * @brief Convert pointer to memory resource into `device_async_resource_ref`, checking for * `nullptr` diff --git a/cpp/src/device_buffer.cpp b/cpp/src/device_buffer.cpp index c2857b756..1d6659259 100644 --- a/cpp/src/device_buffer.cpp +++ b/cpp/src/device_buffer.cpp @@ -98,12 +98,12 @@ void device_buffer::allocate_async(std::size_t bytes) { _size = bytes; _capacity = bytes; - _data = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr; + _data = (bytes > 0) ? _mr.allocate(stream(), bytes) : nullptr; } void device_buffer::deallocate_async() noexcept { - if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); } + if (capacity() > 0) { _mr.deallocate(stream(), data(), capacity()); } _size = 0; _capacity = 0; _data = nullptr; diff --git a/cpp/tests/device_check_resource_adaptor.hpp b/cpp/tests/device_check_resource_adaptor.hpp index 5bbbb9915..dcc8b898e 100644 --- a/cpp/tests/device_check_resource_adaptor.hpp +++ b/cpp/tests/device_check_resource_adaptor.hpp @@ -44,7 +44,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour { bool const is_correct_device = check_device_id(); EXPECT_TRUE(is_correct_device); - if (is_correct_device) { return get_upstream_resource().allocate_async(bytes, stream); } + if (is_correct_device) { return get_upstream_resource().allocate(stream, bytes); } return nullptr; } @@ -52,7 +52,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour { bool const is_correct_device = check_device_id(); EXPECT_TRUE(is_correct_device); - if (is_correct_device) { get_upstream_resource().deallocate_async(ptr, bytes, stream); } + if (is_correct_device) { get_upstream_resource().deallocate(stream, ptr, bytes); } } [[nodiscard]] bool do_is_equal( diff --git a/cpp/tests/logger_tests.cpp b/cpp/tests/logger_tests.cpp index 2b38792d6..34bde54f3 100644 --- a/cpp/tests/logger_tests.cpp +++ b/cpp/tests/logger_tests.cpp @@ -134,10 +134,10 @@ TEST(Adaptor, FilenameConstructor) auto const size0{100}; auto const size1{42}; - auto* ptr0 = log_mr.allocate(size0); - auto* ptr1 = log_mr.allocate(size1); - log_mr.deallocate(ptr0, size0); - log_mr.deallocate(ptr1, size1); + auto* ptr0 = log_mr.allocate_sync(size0); + auto* ptr1 = log_mr.allocate_sync(size1); + log_mr.deallocate_sync(ptr0, size0); + log_mr.deallocate_sync(ptr1, size1); log_mr.flush(); using rmm::detail::action; @@ -167,10 +167,10 @@ TEST(Adaptor, MultiSinkConstructor) auto const size0{100}; auto const size1{42}; - auto* ptr0 = log_mr.allocate(size0); - auto* ptr1 = log_mr.allocate(size1); - log_mr.deallocate(ptr0, size0); - log_mr.deallocate(ptr1, size1); + auto* ptr0 = log_mr.allocate_sync(size0); + auto* ptr1 = log_mr.allocate_sync(size1); + log_mr.deallocate_sync(ptr0, size0); + log_mr.deallocate_sync(ptr1, size1); log_mr.flush(); using rmm::detail::action; @@ -196,10 +196,10 @@ TEST(Adaptor, Factory) auto const size0{99}; auto const size1{42}; - auto* ptr0 = log_mr.allocate(size0); - log_mr.deallocate(ptr0, size0); - auto* ptr1 = log_mr.allocate(size1); - log_mr.deallocate(ptr1, size1); + auto* ptr0 = log_mr.allocate_sync(size0); + log_mr.deallocate_sync(ptr0, size0); + auto* ptr1 = log_mr.allocate_sync(size1); + log_mr.deallocate_sync(ptr1, size1); log_mr.flush(); using rmm::detail::action; @@ -235,8 +235,8 @@ TEST(Adaptor, EnvironmentPath) auto const size{100}; - auto* ptr = log_mr.allocate(size); - log_mr.deallocate(ptr, size); + auto* ptr = log_mr.allocate_sync(size); + log_mr.deallocate_sync(ptr, size); log_mr.flush(); @@ -262,10 +262,10 @@ TEST(Adaptor, AllocateFailure) auto const size0{99}; auto const size1{1_TiB}; - auto* ptr0 = log_mr.allocate(size0); - log_mr.deallocate(ptr0, size0); + auto* ptr0 = log_mr.allocate_sync(size0); + log_mr.deallocate_sync(ptr0, size0); try { - log_mr.allocate(size1); + log_mr.allocate_sync(size1); } catch (...) { } log_mr.flush(); @@ -290,8 +290,8 @@ TEST(Adaptor, STDOUT) auto const size{100}; - auto* ptr = log_mr.allocate(size); - log_mr.deallocate(ptr, size); + auto* ptr = log_mr.allocate_sync(size); + log_mr.deallocate_sync(ptr, size); std::string output = testing::internal::GetCapturedStdout(); std::string header = output.substr(0, output.find('\n')); @@ -308,8 +308,8 @@ TEST(Adaptor, STDERR) auto const size{100}; - auto* ptr = log_mr.allocate(size); - log_mr.deallocate(ptr, size); + auto* ptr = log_mr.allocate_sync(size); + log_mr.deallocate_sync(ptr, size); std::string output = testing::internal::GetCapturedStderr(); std::string header = output.substr(0, output.find('\n')); diff --git a/cpp/tests/mr/device/adaptor_tests.cpp b/cpp/tests/mr/device/adaptor_tests.cpp index a7739f0cc..b3ff1dc08 100644 --- a/cpp/tests/mr/device/adaptor_tests.cpp +++ b/cpp/tests/mr/device/adaptor_tests.cpp @@ -150,9 +150,9 @@ TYPED_TEST(AdaptorTest, GetUpstreamResource) TYPED_TEST(AdaptorTest, AllocFree) { void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = this->mr->allocate(1024)); + EXPECT_NO_THROW(ptr = this->mr->allocate_sync(1024)); EXPECT_NE(ptr, nullptr); - EXPECT_NO_THROW(this->mr->deallocate(ptr, 1024)); + EXPECT_NO_THROW(this->mr->deallocate_sync(ptr, 1024)); } } // namespace rmm::test diff --git a/cpp/tests/mr/device/aligned_mr_tests.cpp b/cpp/tests/mr/device/aligned_mr_tests.cpp index e0fb5ef0b..9c6930b89 100644 --- a/cpp/tests/mr/device/aligned_mr_tests.cpp +++ b/cpp/tests/mr/device/aligned_mr_tests.cpp @@ -80,8 +80,8 @@ TEST(AlignedTest, DefaultAllocationAlignmentPassthrough) { auto const size{5}; - EXPECT_EQ(mr.allocate(size, stream), pointer); - mr.deallocate(pointer, size, stream); + EXPECT_EQ(mr.allocate(stream, size), pointer); + mr.deallocate(stream, pointer, size); } } @@ -102,8 +102,8 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough) { auto const size{3}; - EXPECT_EQ(mr.allocate(size, stream), pointer); - mr.deallocate(pointer, size, stream); + EXPECT_EQ(mr.allocate(stream, size), pointer); + mr.deallocate(stream, pointer, size); } { @@ -111,8 +111,8 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough) void* const pointer1 = int_to_address(456); EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer1)); EXPECT_CALL(mock, do_deallocate(pointer1, size, stream)).Times(1); - EXPECT_EQ(mr.allocate(size, stream), pointer1); - mr.deallocate(pointer1, size, stream); + EXPECT_EQ(mr.allocate(stream, size), pointer1); + mr.deallocate(stream, pointer1, size); } } @@ -134,8 +134,8 @@ TEST(AlignedTest, UpstreamAddressAlreadyAligned) { auto const size{65536}; - EXPECT_EQ(mr.allocate(size, stream), pointer); - mr.deallocate(pointer, size, stream); + EXPECT_EQ(mr.allocate(stream, size), pointer); + mr.deallocate(stream, pointer, size); } } @@ -157,8 +157,8 @@ TEST(AlignedTest, AlignUpstreamAddress) { void* const expected_pointer = int_to_address(4096); auto const size{65536}; - EXPECT_EQ(mr.allocate(size, stream), expected_pointer); - mr.deallocate(expected_pointer, size, stream); + EXPECT_EQ(mr.allocate(stream, size), expected_pointer); + mr.deallocate(stream, expected_pointer, size); } } @@ -193,12 +193,12 @@ TEST(AlignedTest, AlignMultiple) auto const size1{65536}; auto const size2{73728}; auto const size3{77800}; - EXPECT_EQ(mr.allocate(size1, stream), expected_pointer1); - EXPECT_EQ(mr.allocate(size2, stream), expected_pointer2); - EXPECT_EQ(mr.allocate(size3, stream), expected_pointer3); - mr.deallocate(expected_pointer1, size1, stream); - mr.deallocate(expected_pointer2, size2, stream); - mr.deallocate(expected_pointer3, size3, stream); + EXPECT_EQ(mr.allocate(stream, size1), expected_pointer1); + EXPECT_EQ(mr.allocate(stream, size2), expected_pointer2); + EXPECT_EQ(mr.allocate(stream, size3), expected_pointer3); + mr.deallocate(stream, expected_pointer1, size1); + mr.deallocate(stream, expected_pointer2, size2); + mr.deallocate(stream, expected_pointer3, size3); } } @@ -207,9 +207,9 @@ TEST(AlignedTest, AlignRealPointer) auto const alignment{4096}; auto const threshold{65536}; aligned_real mr{rmm::mr::get_current_device_resource_ref(), alignment, threshold}; - void* alloc = mr.allocate(threshold); + void* alloc = mr.allocate_sync(threshold); EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment)); - mr.deallocate(alloc, threshold); + mr.deallocate_sync(alloc, threshold); } TEST(AlignedTest, SmallAlignmentsBumpedTo256Bytes) @@ -218,7 +218,7 @@ TEST(AlignedTest, SmallAlignmentsBumpedTo256Bytes) for (auto requested_alignment : {32UL, 64UL, 128UL}) { aligned_real mr{rmm::mr::get_current_device_resource_ref(), requested_alignment}; - void* ptr = mr.allocate(requested_alignment); + void* ptr = mr.allocate_sync(requested_alignment); // Even though we requested smaller alignment, pointer should be 256-byte // aligned for CUDA requirements @@ -226,7 +226,7 @@ TEST(AlignedTest, SmallAlignmentsBumpedTo256Bytes) // And also aligned to the originally requested alignment EXPECT_TRUE(rmm::is_pointer_aligned(ptr, requested_alignment)); - mr.deallocate(ptr, requested_alignment); + mr.deallocate_sync(ptr, requested_alignment); } } diff --git a/cpp/tests/mr/device/arena_mr_tests.cpp b/cpp/tests/mr/device/arena_mr_tests.cpp index 4a45eb01b..448f19f20 100644 --- a/cpp/tests/mr/device/arena_mr_tests.cpp +++ b/cpp/tests/mr/device/arena_mr_tests.cpp @@ -16,6 +16,7 @@ #include "../../byte_literals.hpp" +#include #include #include #include @@ -40,8 +41,23 @@ namespace rmm::test { namespace { -class mock_memory_resource { +class mock_memory_resource_interface { public: + // We must define an interface class so that we can mock methods with default arguments. + virtual void* allocate_sync(std::size_t, std::size_t) = 0; + virtual void deallocate_sync(void*, std::size_t, std::size_t) noexcept = 0; + virtual void* allocate(cuda_stream_view, + std::size_t, + std::size_t = CUDA_ALLOCATION_ALIGNMENT) = 0; + virtual void deallocate(cuda_stream_view, + void*, + std::size_t, + std::size_t = CUDA_ALLOCATION_ALIGNMENT) noexcept = 0; +}; + +class mock_memory_resource : public mock_memory_resource_interface { + public: +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE MOCK_METHOD(void*, allocate, (std::size_t, std::size_t)); MOCK_METHOD(void, deallocate, (void*, std::size_t, std::size_t), (noexcept)); MOCK_METHOD(void*, allocate_async, (std::size_t, std::size_t, cuda::stream_ref)); @@ -49,29 +65,12 @@ class mock_memory_resource { deallocate_async, (void*, std::size_t, std::size_t, cuda::stream_ref), (noexcept)); +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE - void* allocate_sync(std::size_t bytes, std::size_t alignment) - { - return allocate(bytes, alignment); - } - - void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept - { - deallocate(ptr, bytes, alignment); - } - - void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment) - { - return allocate_async(bytes, alignment, stream); - } - - void deallocate(cuda_stream_view stream, - void* ptr, - std::size_t bytes, - std::size_t alignment) noexcept - { - return deallocate_async(ptr, bytes, alignment, stream); - } + MOCK_METHOD(void*, allocate_sync, (std::size_t, std::size_t)); + MOCK_METHOD(void, deallocate_sync, (void*, std::size_t, std::size_t), (noexcept)); + MOCK_METHOD(void*, allocate, (cuda_stream_view, std::size_t, std::size_t)); + MOCK_METHOD(void, deallocate, (cuda_stream_view, void*, std::size_t, std::size_t), (noexcept)); bool operator==(mock_memory_resource const&) const noexcept { return true; } bool operator!=(mock_memory_resource const&) const { return false; } @@ -100,8 +99,8 @@ auto const fake_address4 = reinterpret_cast(superblock::minimum_size * 2) struct ArenaTest : public ::testing::Test { void SetUp() override { - EXPECT_CALL(mock_mr, allocate(arena_size, ::testing::_)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size, ::testing::_)); + EXPECT_CALL(mock_mr, allocate_sync(arena_size, ::testing::_)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock_mr, deallocate_sync(fake_address3, arena_size, ::testing::_)); global = std::make_unique(mock_mr, arena_size); per_thread = std::make_unique(*global); @@ -356,7 +355,7 @@ TEST_F(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT { auto sblk = global->acquire(256); global->release(std::move(sblk)); - auto* ptr = global->allocate(arena_size); + auto* ptr = global->allocate_sync(arena_size); EXPECT_EQ(ptr, fake_address3); } @@ -367,7 +366,7 @@ TEST_F(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT global->acquire(512); global->release(std::move(sblk)); global->release(std::move(sb2)); - auto* ptr = global->allocate(superblock::minimum_size * 2); + auto* ptr = global->allocate_sync(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } @@ -379,7 +378,7 @@ TEST_F(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT global->release(std::move(sblk)); global->release(std::move(sb3)); global->release(std::move(sb2)); - auto* ptr = global->allocate(arena_size); + auto* ptr = global->allocate_sync(arena_size); EXPECT_EQ(ptr, fake_address3); } @@ -393,38 +392,38 @@ TEST_F(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT auto sb3 = global->acquire(512); superblocks.insert(std::move(sb3)); global->release(superblocks); - auto* ptr = global->allocate(arena_size); + auto* ptr = global->allocate_sync(arena_size); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaAllocate) // NOLINT { - auto* ptr = global->allocate(superblock::minimum_size * 2); + auto* ptr = global->allocate_sync(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaAllocateExtraLarge) // NOLINT { - EXPECT_EQ(global->allocate(1_PiB), nullptr); - EXPECT_EQ(global->allocate(1_PiB), nullptr); + EXPECT_EQ(global->allocate_sync(1_PiB), nullptr); + EXPECT_EQ(global->allocate_sync(1_PiB), nullptr); } TEST_F(ArenaTest, GlobalArenaDeallocate) // NOLINT { - auto* ptr = global->allocate(superblock::minimum_size * 2); + auto* ptr = global->allocate_sync(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); - global->deallocate_async(ptr, superblock::minimum_size * 2, {}); - ptr = global->allocate(superblock::minimum_size * 2); + global->deallocate_sync(ptr, superblock::minimum_size * 2); + ptr = global->allocate_sync(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp) // NOLINT { - auto* ptr = global->allocate(superblock::minimum_size + 256); - auto* ptr2 = global->allocate(superblock::minimum_size + 512); - global->deallocate_async(ptr, superblock::minimum_size + 256, {}); - global->deallocate_async(ptr2, superblock::minimum_size + 512, {}); - EXPECT_EQ(global->allocate(arena_size), fake_address3); + auto* ptr = global->allocate_sync(superblock::minimum_size + 256); + auto* ptr2 = global->allocate_sync(superblock::minimum_size + 512); + global->deallocate_sync(ptr, superblock::minimum_size + 256); + global->deallocate_sync(ptr2, superblock::minimum_size + 512); + EXPECT_EQ(global->allocate_sync(arena_size), fake_address3); } TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT @@ -433,9 +432,9 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT auto const blk = sblk.first_fit(512); auto const blk2 = sblk.first_fit(1024); global->release(std::move(sblk)); - global->deallocate(blk.pointer(), blk.size()); - global->deallocate(blk2.pointer(), blk2.size()); - EXPECT_EQ(global->allocate(arena_size), fake_address3); + global->deallocate_sync(blk.pointer(), blk.size()); + global->deallocate_sync(blk2.pointer(), blk2.size()); + EXPECT_EQ(global->allocate_sync(arena_size), fake_address3); } /** @@ -444,46 +443,46 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT TEST_F(ArenaTest, ArenaAllocate) // NOLINT { - EXPECT_EQ(per_thread->allocate(superblock::minimum_size), fake_address3); - EXPECT_EQ(per_thread->allocate(256), fake_address4); + EXPECT_EQ(per_thread->allocate_sync(superblock::minimum_size), fake_address3); + EXPECT_EQ(per_thread->allocate_sync(256), fake_address4); } TEST_F(ArenaTest, ArenaDeallocate) // NOLINT { - auto* ptr = per_thread->allocate(superblock::minimum_size); - per_thread->deallocate(ptr, superblock::minimum_size, {}); - auto* ptr2 = per_thread->allocate(256); - per_thread->deallocate(ptr2, 256, {}); - EXPECT_EQ(per_thread->allocate(superblock::minimum_size), fake_address3); + auto* ptr = per_thread->allocate_sync(superblock::minimum_size); + per_thread->deallocate_sync(ptr, superblock::minimum_size); + auto* ptr2 = per_thread->allocate_sync(256); + per_thread->deallocate_sync(ptr2, 256); + EXPECT_EQ(per_thread->allocate_sync(superblock::minimum_size), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT { - auto* ptr = per_thread->allocate(256); - auto* ptr2 = per_thread->allocate(256); - per_thread->allocate(256); - per_thread->deallocate(ptr, 256, {}); - per_thread->deallocate(ptr2, 256, {}); - EXPECT_EQ(per_thread->allocate(512), fake_address3); + auto* ptr = per_thread->allocate_sync(256); + auto* ptr2 = per_thread->allocate_sync(256); + per_thread->allocate_sync(256); + per_thread->deallocate_sync(ptr, 256); + per_thread->deallocate_sync(ptr2, 256); + EXPECT_EQ(per_thread->allocate_sync(512), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergeNext) // NOLINT { - auto* ptr = per_thread->allocate(256); - auto* ptr2 = per_thread->allocate(256); - per_thread->allocate(256); - per_thread->deallocate(ptr2, 256, {}); - per_thread->deallocate(ptr, 256, {}); - EXPECT_EQ(per_thread->allocate(512), fake_address3); + auto* ptr = per_thread->allocate_sync(256); + auto* ptr2 = per_thread->allocate_sync(256); + per_thread->allocate_sync(256); + per_thread->deallocate_sync(ptr2, 256); + per_thread->deallocate_sync(ptr, 256); + EXPECT_EQ(per_thread->allocate_sync(512), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergePreviousAndNext) // NOLINT { - auto* ptr = per_thread->allocate(256); - auto* ptr2 = per_thread->allocate(256); - per_thread->deallocate(ptr, 256, {}); - per_thread->deallocate(ptr2, 256, {}); - EXPECT_EQ(per_thread->allocate(2_KiB), fake_address3); + auto* ptr = per_thread->allocate_sync(256); + auto* ptr2 = per_thread->allocate_sync(256); + per_thread->deallocate_sync(ptr, 256); + per_thread->deallocate_sync(ptr2, 256); + EXPECT_EQ(per_thread->allocate_sync(2_KiB), fake_address3); } TEST_F(ArenaTest, ArenaDefragment) // NOLINT @@ -491,14 +490,14 @@ TEST_F(ArenaTest, ArenaDefragment) // NOLINT std::vector pointers; std::size_t num_pointers{4}; for (std::size_t i = 0; i < num_pointers; i++) { - pointers.push_back(per_thread->allocate(superblock::minimum_size)); + pointers.push_back(per_thread->allocate_sync(superblock::minimum_size)); } for (auto* ptr : pointers) { - per_thread->deallocate(ptr, superblock::minimum_size, {}); + per_thread->deallocate_sync(ptr, superblock::minimum_size); } - EXPECT_EQ(global->allocate(arena_size), nullptr); + EXPECT_EQ(global->allocate_sync(arena_size), nullptr); per_thread->defragment(); - EXPECT_EQ(global->allocate(arena_size), fake_address3); + EXPECT_EQ(global->allocate_sync(arena_size), fake_address3); } /** @@ -531,13 +530,13 @@ TEST_F(ArenaTest, SmallMediumLarge) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) arena_mr mr(rmm::mr::get_current_device_resource_ref()); - auto* small = mr.allocate(256); - auto* medium = mr.allocate(64_MiB); + auto* small = mr.allocate_sync(256); + auto* medium = mr.allocate_sync(64_MiB); auto const free = rmm::available_device_memory().first; - auto* large = mr.allocate(free / 3); - mr.deallocate(small, 256); - mr.deallocate(medium, 64_MiB); - mr.deallocate(large, free / 3); + auto* large = mr.allocate_sync(free / 3); + mr.deallocate_sync(small, 256); + mr.deallocate_sync(medium, 64_MiB); + mr.deallocate_sync(large, free / 3); }()); } @@ -552,16 +551,16 @@ TEST_F(ArenaTest, Defragment) // NOLINT for (std::size_t i = 0; i < num_threads; ++i) { threads.emplace_back(std::thread([&] { cuda_stream stream{}; - void* ptr = mr.allocate(32_KiB, stream); - mr.deallocate(ptr, 32_KiB, stream); + void* ptr = mr.allocate(stream, 32_KiB); + mr.deallocate(stream, ptr, 32_KiB); })); } for (auto& thread : threads) { thread.join(); } - auto* ptr = mr.allocate(arena_size); - mr.deallocate(ptr, arena_size); + auto* ptr = mr.allocate_sync(arena_size); + mr.deallocate_sync(ptr, arena_size); }()); } @@ -574,26 +573,26 @@ TEST_F(ArenaTest, PerThreadToStreamDealloc) // NOLINT auto const arena_size = superblock::minimum_size * 2; arena_mr mr(rmm::mr::get_current_device_resource_ref(), arena_size); // Create an allocation from a per thread arena - void* thread_ptr = mr.allocate(256, rmm::cuda_stream_per_thread); + void* thread_ptr = mr.allocate(rmm::cuda_stream_per_thread, 256); // Create an allocation in a stream arena to force global arena // to be empty cuda_stream stream{}; - void* ptr = mr.allocate(32_KiB, stream); - mr.deallocate(ptr, 32_KiB, stream); + void* ptr = mr.allocate(stream, 32_KiB); + mr.deallocate(stream, ptr, 32_KiB); // at this point the global arena doesn't have any superblocks so // the next allocation causes defrag. Defrag causes all superblocks // from the thread and stream arena allocated above to go back to // global arena and it allocates one superblock to the stream arena. - auto* ptr1 = mr.allocate(superblock::minimum_size, rmm::cuda_stream_view{}); + auto* ptr1 = mr.allocate(rmm::cuda_stream_view{}, superblock::minimum_size); // Allocate again to make sure all superblocks from // global arena are owned by a stream arena instead of a thread arena // or the global arena. - auto* ptr2 = mr.allocate(32_KiB, rmm::cuda_stream_view{}); + auto* ptr2 = mr.allocate(rmm::cuda_stream_view{}, 32_KiB); // The original thread ptr is now owned by a stream arena so make // sure deallocation works. - mr.deallocate(thread_ptr, 256, rmm::cuda_stream_per_thread); - mr.deallocate(ptr1, superblock::minimum_size, rmm::cuda_stream_view{}); - mr.deallocate(ptr2, 32_KiB, rmm::cuda_stream_view{}); + mr.deallocate(rmm::cuda_stream_per_thread, thread_ptr, 256); + mr.deallocate(rmm::cuda_stream_view{}, ptr1, superblock::minimum_size); + mr.deallocate(rmm::cuda_stream_view{}, ptr2, 32_KiB); } TEST_F(ArenaTest, DumpLogOnFailure) // NOLINT @@ -606,8 +605,8 @@ TEST_F(ArenaTest, DumpLogOnFailure) // NOLINT threads.reserve(num_threads); for (std::size_t i = 0; i < num_threads; ++i) { threads.emplace_back([&] { - void* ptr = mr.allocate(32_KiB); - mr.deallocate(ptr, 32_KiB); + void* ptr = mr.allocate_sync(32_KiB); + mr.deallocate_sync(ptr, 32_KiB); }); } @@ -617,7 +616,7 @@ TEST_F(ArenaTest, DumpLogOnFailure) // NOLINT } // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) - EXPECT_THROW(mr.allocate(8_MiB), rmm::out_of_memory); + EXPECT_THROW(mr.allocate_sync(8_MiB), rmm::out_of_memory); struct stat file_status{}; EXPECT_EQ(stat("rmm_arena_memory_dump.log", &file_status), 0); diff --git a/cpp/tests/mr/device/binning_mr_tests.cpp b/cpp/tests/mr/device/binning_mr_tests.cpp index 409b4a3d9..cc50de469 100644 --- a/cpp/tests/mr/device/binning_mr_tests.cpp +++ b/cpp/tests/mr/device/binning_mr_tests.cpp @@ -39,9 +39,9 @@ TEST(BinningTest, ExplicitBinMR) cuda_mr cuda{}; binning_mr mr{&cuda}; mr.add_bin(1024, &cuda); - auto* ptr = mr.allocate(512); + auto* ptr = mr.allocate_sync(512); EXPECT_NE(ptr, nullptr); - mr.deallocate(ptr, 512); + mr.deallocate_sync(ptr, 512); } } // namespace rmm::test diff --git a/cpp/tests/mr/device/callback_mr_tests.cpp b/cpp/tests/mr/device/callback_mr_tests.cpp index eae3d6789..bf1cddbde 100644 --- a/cpp/tests/mr/device/callback_mr_tests.cpp +++ b/cpp/tests/mr/device/callback_mr_tests.cpp @@ -43,17 +43,17 @@ TEST(CallbackTest, TestCallbacksAreInvoked) auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) { auto base_mr = *static_cast(arg); - return base_mr.allocate_async(size, stream); + return base_mr.allocate(stream, size); }; auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) { auto base_mr = *static_cast(arg); - base_mr.deallocate_async(ptr, size, stream); + base_mr.deallocate(stream, ptr, size); }; auto mr = rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_ref, &base_ref); auto const size = std::size_t{10_MiB}; - auto* ptr = mr.allocate(size); - mr.deallocate(ptr, size); + auto* ptr = mr.allocate_sync(size); + mr.deallocate_sync(ptr, size); } TEST(CallbackTest, LoggingTest) @@ -64,19 +64,19 @@ TEST(CallbackTest, LoggingTest) auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) { std::cout << "Allocating " << size << " bytes" << std::endl; auto base_mr = *static_cast(arg); - return base_mr.allocate_async(size, stream); + return base_mr.allocate(stream, size); }; auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) { std::cout << "Deallocating " << size << " bytes" << std::endl; auto base_mr = *static_cast(arg); - base_mr.deallocate_async(ptr, size, stream); + base_mr.deallocate(stream, ptr, size); }; auto mr = rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr); auto const size = std::size_t{10_MiB}; - auto* ptr = mr.allocate(size); - mr.deallocate(ptr, size); + auto* ptr = mr.allocate_sync(size); + mr.deallocate_sync(ptr, size); auto output = testing::internal::GetCapturedStdout(); auto expect = std::string("Allocating ") + std::to_string(size) + " bytes\nDeallocating " + diff --git a/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp b/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp index 1b5e43999..193f45e4e 100644 --- a/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp +++ b/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp @@ -43,9 +43,9 @@ TEST_F(AsyncManagedMRTest, BasicAllocateDeallocate) { const auto alloc_size{100}; cuda_async_managed_mr mr{}; - void* ptr = mr.allocate(alloc_size); + void* ptr = mr.allocate_sync(alloc_size); ASSERT_NE(nullptr, ptr); - mr.deallocate(ptr, alloc_size); + mr.deallocate_sync(ptr, alloc_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } @@ -62,7 +62,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsManaged) { const auto alloc_size{1024}; cuda_async_managed_mr mr{}; - void* ptr = mr.allocate(alloc_size); + void* ptr = mr.allocate_sync(alloc_size); ASSERT_NE(nullptr, ptr); // Verify the pointer is managed memory using cudaPointerGetAttributes @@ -70,7 +70,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsManaged) RMM_CUDA_TRY(cudaPointerGetAttributes(&attrs, ptr)); EXPECT_EQ(attrs.type, cudaMemoryTypeManaged); - mr.deallocate(ptr, alloc_size); + mr.deallocate_sync(ptr, alloc_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } @@ -78,7 +78,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsAccessibleFromHost) { const auto alloc_size{sizeof(int) * 100}; cuda_async_managed_mr mr{}; - auto* ptr = static_cast(mr.allocate(alloc_size)); + auto* ptr = static_cast(mr.allocate_sync(alloc_size)); ASSERT_NE(nullptr, ptr); // Synchronize to ensure allocation is complete @@ -97,7 +97,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsAccessibleFromHost) EXPECT_EQ(ptr[50], 50); EXPECT_EQ(ptr[99], 99); - mr.deallocate(ptr, alloc_size); + mr.deallocate_sync(ptr, alloc_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } @@ -106,9 +106,9 @@ TEST_F(AsyncManagedMRTest, MultipleAllocationsAreManaged) const auto alloc_size{512}; cuda_async_managed_mr mr{}; - void* ptr1 = mr.allocate(alloc_size); - void* ptr2 = mr.allocate(alloc_size * 2); - void* ptr3 = mr.allocate(alloc_size / 2); + void* ptr1 = mr.allocate_sync(alloc_size); + void* ptr2 = mr.allocate_sync(alloc_size * 2); + void* ptr3 = mr.allocate_sync(alloc_size / 2); ASSERT_NE(nullptr, ptr1); ASSERT_NE(nullptr, ptr2); @@ -127,9 +127,9 @@ TEST_F(AsyncManagedMRTest, MultipleAllocationsAreManaged) EXPECT_EQ(attrs2.type, cudaMemoryTypeManaged); EXPECT_EQ(attrs3.type, cudaMemoryTypeManaged); - mr.deallocate(ptr1, alloc_size); - mr.deallocate(ptr2, alloc_size * 2); - mr.deallocate(ptr3, alloc_size / 2); + mr.deallocate_sync(ptr1, alloc_size); + mr.deallocate_sync(ptr2, alloc_size * 2); + mr.deallocate_sync(ptr3, alloc_size / 2); RMM_CUDA_TRY(cudaDeviceSynchronize()); } diff --git a/cpp/tests/mr/device/cuda_async_mr_tests.cpp b/cpp/tests/mr/device/cuda_async_mr_tests.cpp index 0c3db3f69..41e4adada 100644 --- a/cpp/tests/mr/device/cuda_async_mr_tests.cpp +++ b/cpp/tests/mr/device/cuda_async_mr_tests.cpp @@ -46,8 +46,8 @@ TEST_F(AsyncMRTest, ExplicitInitialPoolSize) { const auto pool_init_size{100}; cuda_async_mr mr{pool_init_size}; - void* ptr = mr.allocate(pool_init_size); - mr.deallocate(ptr, pool_init_size); + void* ptr = mr.allocate_sync(pool_init_size); + mr.deallocate_sync(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } @@ -56,8 +56,8 @@ TEST_F(AsyncMRTest, ExplicitReleaseThreshold) const auto pool_init_size{100}; const auto pool_release_threshold{1000}; cuda_async_mr mr{pool_init_size, pool_release_threshold}; - void* ptr = mr.allocate(pool_init_size); - mr.deallocate(ptr, pool_init_size); + void* ptr = mr.allocate_sync(pool_init_size); + mr.deallocate_sync(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } @@ -90,8 +90,8 @@ TEST_F(AsyncMRFabricTest, FabricHandlesSupport) cuda_async_mr mr{pool_init_size, pool_release_threshold, rmm::mr::cuda_async_memory_resource::allocation_handle_type::fabric}; - void* ptr = mr.allocate(pool_init_size); - mr.deallocate(ptr, pool_init_size); + void* ptr = mr.allocate_sync(pool_init_size); + mr.deallocate_sync(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } diff --git a/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp b/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp index 48181edfe..f8885d59f 100644 --- a/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp +++ b/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp @@ -40,7 +40,7 @@ TEST(PoolTest, UsePool) const auto pool_init_size{100}; cuda_async_view_mr mr{memPool}; void* ptr = mr.allocate(pool_init_size); - mr.deallocate(ptr, pool_init_size); + mr.deallocate_sync(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } @@ -59,7 +59,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool) const auto pool_init_size{100}; cuda_async_view_mr mr{memPool}; void* ptr = mr.allocate(pool_init_size); - mr.deallocate(ptr, pool_init_size); + mr.deallocate_sync(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } diff --git a/cpp/tests/mr/device/failure_callback_mr_tests.cpp b/cpp/tests/mr/device/failure_callback_mr_tests.cpp index 49a5f7522..c32cf6791 100644 --- a/cpp/tests/mr/device/failure_callback_mr_tests.cpp +++ b/cpp/tests/mr/device/failure_callback_mr_tests.cpp @@ -49,7 +49,7 @@ TEST(FailureCallbackTest, RetryAllocationOnce) failure_callback_adaptor<> mr{ rmm::mr::get_current_device_resource_ref(), failure_handler, &retried}; EXPECT_EQ(retried, false); - EXPECT_THROW(mr.allocate(512_GiB), std::bad_alloc); + EXPECT_THROW(mr.allocate_sync(512_GiB), std::bad_alloc); EXPECT_EQ(retried, true); } @@ -68,8 +68,8 @@ TEST(FailureCallbackTest, DifferentExceptionTypes) always_throw_memory_resource bad_alloc_mr; always_throw_memory_resource oom_mr; - EXPECT_THROW(bad_alloc_mr.allocate(1_MiB), rmm::bad_alloc); - EXPECT_THROW(oom_mr.allocate(1_MiB), rmm::out_of_memory); + EXPECT_THROW(bad_alloc_mr.allocate_sync(1_MiB), rmm::bad_alloc); + EXPECT_THROW(oom_mr.allocate_sync(1_MiB), rmm::out_of_memory); // Wrap a bad_alloc-catching callback adaptor around an MR that always throws bad_alloc: // Should retry once and then re-throw bad_alloc @@ -79,7 +79,7 @@ TEST(FailureCallbackTest, DifferentExceptionTypes) &bad_alloc_mr, failure_handler, &retried}; EXPECT_EQ(retried, false); - EXPECT_THROW(bad_alloc_callback_mr.allocate(1_MiB), rmm::bad_alloc); + EXPECT_THROW(bad_alloc_callback_mr.allocate_sync(1_MiB), rmm::bad_alloc); EXPECT_EQ(retried, true); } @@ -91,7 +91,7 @@ TEST(FailureCallbackTest, DifferentExceptionTypes) failure_callback_adaptor oom_callback_mr{ &oom_mr, failure_handler, &retried}; EXPECT_EQ(retried, false); - EXPECT_THROW(oom_callback_mr.allocate(1_MiB), rmm::out_of_memory); + EXPECT_THROW(oom_callback_mr.allocate_sync(1_MiB), rmm::out_of_memory); EXPECT_EQ(retried, true); } @@ -103,7 +103,7 @@ TEST(FailureCallbackTest, DifferentExceptionTypes) failure_callback_adaptor oom_callback_mr{ &bad_alloc_mr, failure_handler, &retried}; EXPECT_EQ(retried, false); - EXPECT_THROW(oom_callback_mr.allocate(1_MiB), rmm::bad_alloc); // bad_alloc passes through + EXPECT_THROW(oom_callback_mr.allocate_sync(1_MiB), rmm::bad_alloc); // bad_alloc passes through EXPECT_EQ(retried, false); // Does not catch / retry on anything except OOM } } diff --git a/cpp/tests/mr/device/hwdecompress_tests.cpp b/cpp/tests/mr/device/hwdecompress_tests.cpp index 9443dc345..77b9ca893 100644 --- a/cpp/tests/mr/device/hwdecompress_tests.cpp +++ b/cpp/tests/mr/device/hwdecompress_tests.cpp @@ -56,9 +56,9 @@ TEST_F(HWDecompressTest, CudaMalloc) { const auto allocation_size{100}; rmm::mr::cuda_memory_resource mr{}; - void* ptr = mr.allocate(allocation_size); + void* ptr = mr.allocate_sync(allocation_size); HWDecompressTest::check_decompress_capable(ptr); - mr.deallocate(ptr, allocation_size); + mr.deallocate_sync(ptr, allocation_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } @@ -70,9 +70,9 @@ TEST_F(HWDecompressTest, CudaMallocAsync) } const auto pool_init_size{100}; rmm::mr::cuda_async_memory_resource mr{pool_init_size}; - void* ptr = mr.allocate(pool_init_size); + void* ptr = mr.allocate_sync(pool_init_size); HWDecompressTest::check_decompress_capable(ptr); - mr.deallocate(ptr, pool_init_size); + mr.deallocate_sync(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } diff --git a/cpp/tests/mr/device/limiting_mr_tests.cpp b/cpp/tests/mr/device/limiting_mr_tests.cpp index d90da009b..ee72ba466 100644 --- a/cpp/tests/mr/device/limiting_mr_tests.cpp +++ b/cpp/tests/mr/device/limiting_mr_tests.cpp @@ -37,7 +37,7 @@ TEST(LimitingTest, TooBig) { auto const max_size{5_MiB}; limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size}; - EXPECT_THROW(mr.allocate(max_size + 1), rmm::out_of_memory); + EXPECT_THROW(mr.allocate_sync(max_size + 1), rmm::out_of_memory); } TEST(LimitingTest, UpstreamFailure) @@ -46,7 +46,7 @@ TEST(LimitingTest, UpstreamFailure) auto const max_size_2{5_MiB}; limiting_adaptor mr1{rmm::mr::get_current_device_resource_ref(), max_size_1}; limiting_adaptor mr2{&mr1, max_size_2}; - EXPECT_THROW(mr2.allocate(4_MiB), rmm::out_of_memory); + EXPECT_THROW(mr2.allocate_sync(4_MiB), rmm::out_of_memory); } TEST(LimitingTest, UnderLimitDueToFrees) @@ -54,27 +54,27 @@ TEST(LimitingTest, UnderLimitDueToFrees) auto const max_size{10_MiB}; limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size}; auto const size1{4_MiB}; - auto* ptr1 = mr.allocate(size1); + auto* ptr1 = mr.allocate_sync(size1); auto allocated_bytes = size1; EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); - auto* ptr2 = mr.allocate(size1); + auto* ptr2 = mr.allocate_sync(size1); allocated_bytes += size1; EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); - mr.deallocate(ptr1, size1); + mr.deallocate_sync(ptr1, size1); allocated_bytes -= size1; EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); // note that we don't keep track of fragmentation or things like page size // so this should fill 100% of the memory even though it is probably over. auto const size2{6_MiB}; - auto* ptr3 = mr.allocate(size2); + auto* ptr3 = mr.allocate_sync(size2); allocated_bytes += size2; EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 0); - mr.deallocate(ptr2, size1); - mr.deallocate(ptr3, size2); + mr.deallocate_sync(ptr2, size1); + mr.deallocate_sync(ptr3, size2); } TEST(LimitingTest, OverLimit) @@ -82,20 +82,20 @@ TEST(LimitingTest, OverLimit) auto const max_size{10_MiB}; limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size}; auto const size1{4_MiB}; - auto* ptr1 = mr.allocate(size1); + auto* ptr1 = mr.allocate_sync(size1); auto allocated_bytes = size1; EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); - auto* ptr2 = mr.allocate(size1); + auto* ptr2 = mr.allocate_sync(size1); allocated_bytes += size1; EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); auto const size2{3_MiB}; - EXPECT_THROW(mr.allocate(size2), rmm::out_of_memory); + EXPECT_THROW(mr.allocate_sync(size2), rmm::out_of_memory); EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); - mr.deallocate(ptr1, 4_MiB); - mr.deallocate(ptr2, 4_MiB); + mr.deallocate_sync(ptr1, 4_MiB); + mr.deallocate_sync(ptr2, 4_MiB); } } // namespace diff --git a/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp b/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp index 47961f4fb..fe515417d 100644 --- a/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp +++ b/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp @@ -214,7 +214,7 @@ TEST_P(mr_ref_test_mt, MixedRandomAllocationFreeStream) spawn(test_mixed_random_async_allocation_free, this->ref, default_max_size, this->stream.view()); } -void allocate_async_loop(rmm::device_async_resource_ref ref, +void async_allocate_loop(rmm::device_async_resource_ref ref, std::size_t num_allocations, std::list& allocations, std::mutex& mtx, @@ -229,7 +229,7 @@ void allocate_async_loop(rmm::device_async_resource_ref ref, for (std::size_t i = 0; i < num_allocations; ++i) { std::size_t size = size_distribution(generator); - void* ptr = ref.allocate_async(size, stream); + void* ptr = ref.allocate(stream, size); { std::lock_guard lock(mtx); RMM_CUDA_TRY(cudaEventRecord(event, stream.value())); @@ -242,7 +242,7 @@ void allocate_async_loop(rmm::device_async_resource_ref ref, cudaEventSynchronize(event); } -void deallocate_async_loop(rmm::device_async_resource_ref ref, +void async_deallocate_loop(rmm::device_async_resource_ref ref, std::size_t num_allocations, std::list& allocations, std::mutex& mtx, @@ -256,14 +256,14 @@ void deallocate_async_loop(rmm::device_async_resource_ref ref, RMM_CUDA_TRY(cudaStreamWaitEvent(stream.value(), event)); allocation alloc = allocations.front(); allocations.pop_front(); - ref.deallocate_async(alloc.ptr, alloc.size, stream); + ref.deallocate(stream, alloc.ptr, alloc.size); } // Work around for threads going away before cudaEvent has finished async processing cudaEventSynchronize(event); } -void test_allocate_async_free_different_threads(rmm::device_async_resource_ref ref, +void test_async_allocate_free_different_threads(rmm::device_async_resource_ref ref, rmm::cuda_stream_view streamA, rmm::cuda_stream_view streamB) { @@ -276,7 +276,7 @@ void test_allocate_async_free_different_threads(rmm::device_async_resource_ref r RMM_CUDA_TRY(cudaEventCreate(&event)); - std::thread producer(allocate_async_loop, + std::thread producer(async_allocate_loop, ref, num_allocations, std::ref(allocations), @@ -285,7 +285,7 @@ void test_allocate_async_free_different_threads(rmm::device_async_resource_ref r std::ref(event), streamA); - std::thread consumer(deallocate_async_loop, + std::thread consumer(async_deallocate_loop, ref, num_allocations, std::ref(allocations), @@ -302,25 +302,25 @@ void test_allocate_async_free_different_threads(rmm::device_async_resource_ref r TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsDefaultStream) { - test_allocate_async_free_different_threads( + test_async_allocate_free_different_threads( this->ref, rmm::cuda_stream_default, rmm::cuda_stream_default); } TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsPerThreadDefaultStream) { - test_allocate_async_free_different_threads( + test_async_allocate_free_different_threads( this->ref, rmm::cuda_stream_per_thread, rmm::cuda_stream_per_thread); } TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsSameStream) { - test_allocate_async_free_different_threads(this->ref, this->stream, this->stream); + test_async_allocate_free_different_threads(this->ref, this->stream, this->stream); } TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsDifferentStream) { rmm::cuda_stream streamB; - test_allocate_async_free_different_threads(this->ref, this->stream, streamB); + test_async_allocate_free_different_threads(this->ref, this->stream, streamB); streamB.synchronize(); } diff --git a/cpp/tests/mr/device/mr_ref_test.hpp b/cpp/tests/mr/device/mr_ref_test.hpp index aaffb8ad0..6d2f09f02 100644 --- a/cpp/tests/mr/device/mr_ref_test.hpp +++ b/cpp/tests/mr/device/mr_ref_test.hpp @@ -105,46 +105,46 @@ struct allocation { inline void test_get_current_device_resource() { EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); - void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); + void* ptr = rmm::mr::get_current_device_resource()->allocate_sync(1_MiB); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_properly_aligned(ptr)); EXPECT_TRUE(is_device_accessible_memory(ptr)); - rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); + rmm::mr::get_current_device_resource()->deallocate_sync(ptr, 1_MiB); } inline void test_get_current_device_resource_ref() { - void* ptr = rmm::mr::get_current_device_resource_ref().allocate(1_MiB); + void* ptr = rmm::mr::get_current_device_resource_ref().allocate_sync(1_MiB); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_properly_aligned(ptr)); EXPECT_TRUE(is_device_accessible_memory(ptr)); - rmm::mr::get_current_device_resource_ref().deallocate(ptr, 1_MiB); + rmm::mr::get_current_device_resource_ref().deallocate_sync(ptr, 1_MiB); } inline void test_allocate(resource_ref ref, std::size_t bytes) { try { - void* ptr = ref.allocate(bytes); + void* ptr = ref.allocate_sync(bytes); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_properly_aligned(ptr)); EXPECT_TRUE(is_device_accessible_memory(ptr)); - ref.deallocate(ptr, bytes); + ref.deallocate_sync(ptr, bytes); } catch (rmm::out_of_memory const& e) { EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos); } } -inline void test_allocate_async(rmm::device_async_resource_ref ref, +inline void test_async_allocate(rmm::device_async_resource_ref ref, std::size_t bytes, cuda_stream_view stream = {}) { try { - void* ptr = ref.allocate_async(bytes, stream); + void* ptr = ref.allocate(stream, bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_properly_aligned(ptr)); EXPECT_TRUE(is_device_accessible_memory(ptr)); - ref.deallocate_async(ptr, bytes, stream); + ref.deallocate(stream, ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } } catch (rmm::out_of_memory const& e) { EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos); @@ -155,34 +155,34 @@ inline void test_allocate_async(rmm::device_async_resource_ref ref, inline void concurrent_allocations_are_different(resource_ref ref) { const auto size{8_B}; - void* ptr1 = ref.allocate(size); - void* ptr2 = ref.allocate(size); + void* ptr1 = ref.allocate_sync(size); + void* ptr2 = ref.allocate_sync(size); EXPECT_NE(ptr1, ptr2); - ref.deallocate(ptr1, size); - ref.deallocate(ptr2, size); + ref.deallocate_sync(ptr1, size); + ref.deallocate_sync(ptr2, size); } inline void concurrent_async_allocations_are_different(rmm::device_async_resource_ref ref, cuda_stream_view stream) { const auto size{8_B}; - void* ptr1 = ref.allocate_async(size, stream); - void* ptr2 = ref.allocate_async(size, stream); + void* ptr1 = ref.allocate(stream, size); + void* ptr2 = ref.allocate(stream, size); EXPECT_NE(ptr1, ptr2); - ref.deallocate_async(ptr1, size, stream); - ref.deallocate_async(ptr2, size, stream); + ref.deallocate(stream, ptr1, size); + ref.deallocate(stream, ptr2, size); } inline void test_various_allocations(resource_ref ref) { // test allocating zero bytes on non-default stream { - void* ptr = ref.allocate(0); - EXPECT_NO_THROW(ref.deallocate(ptr, 0)); + void* ptr = ref.allocate_sync(0); + EXPECT_NO_THROW(ref.deallocate_sync(ptr, 0)); } test_allocate(ref, 4_B); @@ -193,12 +193,12 @@ inline void test_various_allocations(resource_ref ref) // should fail to allocate too much { void* ptr{nullptr}; - EXPECT_THROW(ptr = ref.allocate(1_PiB), rmm::out_of_memory); + EXPECT_THROW(ptr = ref.allocate_sync(1_PiB), rmm::out_of_memory); EXPECT_EQ(nullptr, ptr); // test e.what(); try { - ptr = ref.allocate(1_PiB); + ptr = ref.allocate_sync(1_PiB); } catch (rmm::out_of_memory const& e) { EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos); } @@ -210,26 +210,26 @@ inline void test_various_async_allocations(rmm::device_async_resource_ref ref, { // test allocating zero bytes on non-default stream { - void* ptr = ref.allocate_async(0, stream); + void* ptr = ref.allocate(stream, 0); stream.synchronize(); - EXPECT_NO_THROW(ref.deallocate_async(ptr, 0, stream)); + EXPECT_NO_THROW(ref.deallocate(stream, ptr, 0)); stream.synchronize(); } - test_allocate_async(ref, 4_B, stream); - test_allocate_async(ref, 1_KiB, stream); - test_allocate_async(ref, 1_MiB, stream); - test_allocate_async(ref, 1_GiB, stream); + test_async_allocate(ref, 4_B, stream); + test_async_allocate(ref, 1_KiB, stream); + test_async_allocate(ref, 1_MiB, stream); + test_async_allocate(ref, 1_GiB, stream); // should fail to allocate too much { void* ptr{nullptr}; - EXPECT_THROW(ptr = ref.allocate_async(1_PiB, stream), rmm::out_of_memory); + EXPECT_THROW(ptr = ref.allocate(stream, 1_PiB), rmm::out_of_memory); EXPECT_EQ(nullptr, ptr); // test e.what(); try { - ptr = ref.allocate_async(1_PiB, stream); + ptr = ref.allocate(stream, 1_PiB); } catch (rmm::out_of_memory const& e) { EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos); } @@ -249,13 +249,13 @@ inline void test_random_allocations(resource_ref ref, std::for_each( allocations.begin(), allocations.end(), [&generator, &distribution, &ref](allocation& alloc) { alloc.size = distribution(generator); - EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); + EXPECT_NO_THROW(alloc.ptr = ref.allocate_sync(alloc.size)); EXPECT_NE(nullptr, alloc.ptr); EXPECT_TRUE(is_properly_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) { - EXPECT_NO_THROW(ref.deallocate(alloc.ptr, alloc.size)); + EXPECT_NO_THROW(ref.deallocate_sync(alloc.ptr, alloc.size)); }); } @@ -274,14 +274,14 @@ inline void test_random_async_allocations(rmm::device_async_resource_ref ref, allocations.end(), [&generator, &distribution, &ref, stream](allocation& alloc) { alloc.size = distribution(generator); - EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); + EXPECT_NO_THROW(alloc.ptr = ref.allocate_sync(alloc.size)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); EXPECT_TRUE(is_properly_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) { - EXPECT_NO_THROW(ref.deallocate(alloc.ptr, alloc.size)); + EXPECT_NO_THROW(ref.deallocate_sync(alloc.ptr, alloc.size)); if (not stream.is_default()) { stream.synchronize(); } }); } @@ -315,7 +315,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref, std::size_t size = size_distribution(generator); active_allocations++; allocation_count++; - EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size)); + EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_sync(size), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); @@ -324,7 +324,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref, active_allocations--; allocation to_free = allocations[index]; allocations.erase(std::next(allocations.begin(), index)); - EXPECT_NO_THROW(ref.deallocate(to_free.ptr, to_free.size)); + EXPECT_NO_THROW(ref.deallocate_sync(to_free.ptr, to_free.size)); } } @@ -362,7 +362,7 @@ inline void test_mixed_random_async_allocation_free(rmm::device_async_resource_r std::size_t size = size_distribution(generator); active_allocations++; allocation_count++; - EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); + EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(stream, size), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); @@ -371,7 +371,7 @@ inline void test_mixed_random_async_allocation_free(rmm::device_async_resource_r active_allocations--; allocation to_free = allocations[index]; allocations.erase(std::next(allocations.begin(), index)); - EXPECT_NO_THROW(ref.deallocate_async(to_free.ptr, to_free.size, stream)); + EXPECT_NO_THROW(ref.deallocate(stream, to_free.ptr, to_free.size)); } } diff --git a/cpp/tests/mr/device/pool_mr_tests.cpp b/cpp/tests/mr/device/pool_mr_tests.cpp index 07168aaf7..96e6fc869 100644 --- a/cpp/tests/mr/device/pool_mr_tests.cpp +++ b/cpp/tests/mr/device/pool_mr_tests.cpp @@ -72,10 +72,10 @@ TEST(PoolTest, TwoLargeBuffers) auto two_large = []() { [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); pool_mr mr{rmm::mr::get_current_device_resource_ref(), rmm::percent_of_free_device_memory(50)}; - auto* ptr1 = mr.allocate(free / 4); - auto* ptr2 = mr.allocate(free / 4); - mr.deallocate(ptr1, free / 4); - mr.deallocate(ptr2, free / 4); + auto* ptr1 = mr.allocate_sync(free / 4); + auto* ptr2 = mr.allocate_sync(free / 4); + mr.deallocate_sync(ptr1, free / 4); + mr.deallocate_sync(ptr2, free / 4); }; EXPECT_NO_THROW(two_large()); } @@ -87,20 +87,20 @@ TEST(PoolTest, ForceGrowth) auto const max_size{6000}; limiting_mr limiter{&cuda, max_size}; pool_mr mr{&limiter, 0}; - EXPECT_NO_THROW(mr.allocate(1000)); - EXPECT_NO_THROW(mr.allocate(4000)); - EXPECT_NO_THROW(mr.allocate(500)); - EXPECT_THROW(mr.allocate(2000), rmm::out_of_memory); // too much + EXPECT_NO_THROW(mr.allocate_sync(1000)); + EXPECT_NO_THROW(mr.allocate_sync(4000)); + EXPECT_NO_THROW(mr.allocate_sync(500)); + EXPECT_THROW(mr.allocate_sync(2000), rmm::out_of_memory); // too much } { // with max pool size auto const max_size{6000}; limiting_mr limiter{&cuda, max_size}; pool_mr mr{&limiter, 0, 8192}; - EXPECT_NO_THROW(mr.allocate(1000)); - EXPECT_THROW(mr.allocate(4000), rmm::out_of_memory); // too much - EXPECT_NO_THROW(mr.allocate(500)); - EXPECT_NO_THROW(mr.allocate(2000)); // fits + EXPECT_NO_THROW(mr.allocate_sync(1000)); + EXPECT_THROW(mr.allocate_sync(4000), rmm::out_of_memory); // too much + EXPECT_NO_THROW(mr.allocate_sync(500)); + EXPECT_NO_THROW(mr.allocate_sync(2000)); // fits } } @@ -112,7 +112,7 @@ TEST(PoolTest, DeletedStream) EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream)); EXPECT_NO_THROW(rmm::device_buffer buff(size, cuda_stream_view{stream}, &mr)); EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream)); - EXPECT_NO_THROW(mr.allocate(size)); + EXPECT_NO_THROW(mr.allocate_sync(size)); } // Issue #527 @@ -120,7 +120,7 @@ TEST(PoolTest, InitialAndMaxPoolSizeEqual) { EXPECT_NO_THROW([]() { pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000192); - mr.allocate(1000); + mr.allocate_sync(1000); }()); } @@ -129,14 +129,14 @@ TEST(PoolTest, NonAlignedPoolSize) EXPECT_THROW( []() { pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000031, 1000192); - mr.allocate(1000); + mr.allocate_sync(1000); }(), rmm::logic_error); EXPECT_THROW( []() { pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000200); - mr.allocate(1000); + mr.allocate_sync(1000); }(), rmm::logic_error); } @@ -146,8 +146,8 @@ TEST(PoolTest, UpstreamDoesntSupportMemInfo) cuda_mr cuda; pool_mr mr1(&cuda, 0); pool_mr mr2(&mr1, 0); - auto* ptr = mr2.allocate(1024); - mr2.deallocate(ptr, 1024); + auto* ptr = mr2.allocate_sync(1024); + mr2.deallocate_sync(ptr, 1024); } TEST(PoolTest, MultidevicePool) @@ -193,14 +193,18 @@ namespace test_properties { class fake_async_resource { public: // To model `async_resource` + +#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE static void* allocate(std::size_t, std::size_t) { return nullptr; } - static void deallocate(void* ptr, std::size_t, std::size_t) {} + static void deallocate(void* ptr, std::size_t, std::size_t) noexcept {} static void* allocate_async(std::size_t, std::size_t, cuda::stream_ref) { return nullptr; } - static void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) {} + static void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) noexcept {} +#endif // RMM_ENABLE_LEGACY_MR_INTERFACE + void* allocate_sync(std::size_t, std::size_t) { return nullptr; } - void deallocate_sync(void* ptr, std::size_t, std::size_t) {} + void deallocate_sync(void* ptr, std::size_t, std::size_t) noexcept {} void* allocate(cuda_stream_view, std::size_t, std::size_t) { return nullptr; } - void deallocate(cuda_stream_view, void*, std::size_t, std::size_t) { return; } + void deallocate(cuda_stream_view, void*, std::size_t, std::size_t) noexcept { return; } bool operator==(const fake_async_resource& other) const { return true; } bool operator!=(const fake_async_resource& other) const { return false; } diff --git a/cpp/tests/mr/device/statistics_mr_tests.cpp b/cpp/tests/mr/device/statistics_mr_tests.cpp index 27b84c1fd..4667cb928 100644 --- a/cpp/tests/mr/device/statistics_mr_tests.cpp +++ b/cpp/tests/mr/device/statistics_mr_tests.cpp @@ -63,10 +63,10 @@ TEST(StatisticsTest, AllFreed) allocations.reserve(num_allocations); for (int i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } for (auto* alloc : allocations) { - mr.deallocate(alloc, ten_MiB); + mr.deallocate_sync(alloc, ten_MiB); } // Counter values should be 0 @@ -80,11 +80,11 @@ TEST(StatisticsTest, PeakAllocations) std::vector allocations; for (std::size_t i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } // Delete every other allocation for (auto&& it = allocations.begin(); it != allocations.end(); ++it) { - mr.deallocate(*it, ten_MiB); + mr.deallocate_sync(*it, ten_MiB); it = allocations.erase(it); } @@ -105,12 +105,12 @@ TEST(StatisticsTest, PeakAllocations) // Add 10 more to increase the peak for (std::size_t i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } // Deallocate all remaining for (auto& allocation : allocations) { - mr.deallocate(allocation, ten_MiB); + mr.deallocate_sync(allocation, ten_MiB); } allocations.clear(); @@ -188,7 +188,7 @@ TEST(StatisticsTest, NegativeInnerTracking) statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()}; std::vector allocations; for (std::size_t i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } EXPECT_EQ(mr.get_allocations_counter().value, 10); @@ -197,7 +197,7 @@ TEST(StatisticsTest, NegativeInnerTracking) // Add more allocations for (std::size_t i = 0; i < num_more_allocations; ++i) { - allocations.push_back(inner_mr.allocate(ten_MiB)); + allocations.push_back(inner_mr.allocate_sync(ten_MiB)); } // Check the outstanding allocations @@ -213,7 +213,7 @@ TEST(StatisticsTest, NegativeInnerTracking) // Deallocate all allocations using the inner_mr for (auto& allocation : allocations) { - inner_mr.deallocate(allocation, ten_MiB); + inner_mr.deallocate_sync(allocation, ten_MiB); } allocations.clear(); @@ -242,7 +242,7 @@ TEST(StatisticsTest, NegativeInnerTracking) TEST(StatisticsTest, Nested) { statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()}; - void* a0 = mr.allocate(ten_MiB); + void* a0 = mr.allocate_sync(ten_MiB); EXPECT_EQ(mr.get_bytes_counter().value, ten_MiB); EXPECT_EQ(mr.get_allocations_counter().value, 1); { @@ -252,12 +252,12 @@ TEST(StatisticsTest, Nested) } EXPECT_EQ(mr.get_bytes_counter().value, 0); EXPECT_EQ(mr.get_allocations_counter().value, 0); - void* a1 = mr.allocate(ten_MiB); + void* a1 = mr.allocate_sync(ten_MiB); mr.push_counters(); EXPECT_EQ(mr.get_bytes_counter().value, 0); EXPECT_EQ(mr.get_allocations_counter().value, 0); - void* a2 = mr.allocate(ten_MiB); - mr.deallocate(a2, ten_MiB); + void* a2 = mr.allocate_sync(ten_MiB); + mr.deallocate_sync(a2, ten_MiB); EXPECT_EQ(mr.get_bytes_counter().value, 0); EXPECT_EQ(mr.get_bytes_counter().peak, ten_MiB); EXPECT_EQ(mr.get_allocations_counter().value, 0); @@ -269,7 +269,7 @@ TEST(StatisticsTest, Nested) EXPECT_EQ(allocs.value, 0); EXPECT_EQ(allocs.peak, 1); } - mr.deallocate(a0, ten_MiB); + mr.deallocate_sync(a0, ten_MiB); { auto [bytes, allocs] = mr.pop_counters(); EXPECT_EQ(bytes.value, 0); @@ -277,7 +277,7 @@ TEST(StatisticsTest, Nested) EXPECT_EQ(allocs.value, 0); EXPECT_EQ(allocs.peak, 2); } - mr.deallocate(a1, ten_MiB); + mr.deallocate_sync(a1, ten_MiB); EXPECT_THROW(mr.pop_counters(), std::out_of_range); } diff --git a/cpp/tests/mr/device/system_mr_tests.cu b/cpp/tests/mr/device/system_mr_tests.cu index cb13e3da6..656ef0cae 100644 --- a/cpp/tests/mr/device/system_mr_tests.cu +++ b/cpp/tests/mr/device/system_mr_tests.cu @@ -92,22 +92,22 @@ TEST_F(SystemMRTest, FirstTouchOnCPU) { auto const free = rmm::available_device_memory().first; system_mr mr; - void* ptr = mr.allocate(size_mb); + void* ptr = mr.allocate_sync(size_mb); touch_on_cpu(ptr, size_mb); auto const free2 = rmm::available_device_memory().first; EXPECT_EQ(free, free2); - mr.deallocate(ptr, size_mb); + mr.deallocate_sync(ptr, size_mb); } TEST_F(SystemMRTest, FirstTouchOnGPU) { auto const free = rmm::available_device_memory().first; system_mr mr; - void* ptr = mr.allocate(size_mb); + void* ptr = mr.allocate_sync(size_mb); touch_on_gpu(ptr, size_mb); auto const free2 = rmm::available_device_memory().first; EXPECT_LT(free2, free); - mr.deallocate(ptr, size_mb); + mr.deallocate_sync(ptr, size_mb); } TEST_F(SystemMRTest, HeadroomMRReserveAllFreeMemory) @@ -115,9 +115,9 @@ TEST_F(SystemMRTest, HeadroomMRReserveAllFreeMemory) auto const free = rmm::available_device_memory().first; // All the free GPU memory is set as headroom, so allocation is only on the CPU. headroom_mr mr{free + size_gb}; - void* ptr = mr.allocate(size_mb); + void* ptr = mr.allocate_sync(size_mb); touch_on_cpu(ptr, size_mb); - mr.deallocate(ptr, size_mb); + mr.deallocate_sync(ptr, size_mb); } TEST_F(SystemMRTest, HeadroomMRDifferentParametersUnequal) diff --git a/cpp/tests/mr/device/tracking_mr_tests.cpp b/cpp/tests/mr/device/tracking_mr_tests.cpp index 28e8b2f00..6f08836f6 100644 --- a/cpp/tests/mr/device/tracking_mr_tests.cpp +++ b/cpp/tests/mr/device/tracking_mr_tests.cpp @@ -57,10 +57,10 @@ TEST(TrackingTest, AllFreed) std::vector allocations; allocations.reserve(num_allocations); for (int i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } for (auto* alloc : allocations) { - mr.deallocate(alloc, ten_MiB); + mr.deallocate_sync(alloc, ten_MiB); } EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); EXPECT_EQ(mr.get_allocated_bytes(), 0); @@ -72,10 +72,10 @@ TEST(TrackingTest, AllocationsLeftWithStacks) std::vector allocations; allocations.reserve(num_allocations); for (int i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } for (int i = 0; i < num_allocations; i += 2) { - mr.deallocate(allocations[i], ten_MiB); + mr.deallocate_sync(allocations[i], ten_MiB); } EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2); EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2)); @@ -90,11 +90,11 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks) std::vector allocations; allocations.reserve(num_allocations); for (int i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } for (int i = 0; i < num_allocations; i += 2) { - mr.deallocate(allocations[i], ten_MiB); + mr.deallocate_sync(allocations[i], ten_MiB); } EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2); EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2)); @@ -156,7 +156,7 @@ TEST(TrackingTest, NegativeInnerTracking) tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()}; std::vector allocations; for (std::size_t i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations); @@ -165,7 +165,7 @@ TEST(TrackingTest, NegativeInnerTracking) // Add more allocations for (std::size_t i = 0; i < num_more_allocations; ++i) { - allocations.push_back(inner_mr.allocate(ten_MiB)); + allocations.push_back(inner_mr.allocate_sync(ten_MiB)); } // Check the outstanding allocations @@ -174,7 +174,7 @@ TEST(TrackingTest, NegativeInnerTracking) // Deallocate all allocations using the inner_mr for (auto& allocation : allocations) { - inner_mr.deallocate(allocation, ten_MiB); + inner_mr.deallocate_sync(allocation, ten_MiB); } allocations.clear(); @@ -188,12 +188,12 @@ TEST(TrackingTest, DeallocWrongBytes) tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()}; std::vector allocations; for (std::size_t i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } // When deallocating, pass the wrong bytes to deallocate for (auto& allocation : allocations) { - mr.deallocate(allocation, ten_MiB / 2); + mr.deallocate_sync(allocation, ten_MiB / 2); } allocations.clear(); @@ -214,7 +214,7 @@ TEST(TrackingTest, LogOutstandingAllocations) tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()}; std::vector allocations; for (std::size_t i = 0; i < num_allocations; ++i) { - allocations.push_back(mr.allocate(ten_MiB)); + allocations.push_back(mr.allocate_sync(ten_MiB)); } rmm::default_logger().set_level(rapids_logger::level_enum::debug); @@ -225,7 +225,7 @@ TEST(TrackingTest, LogOutstandingAllocations) #endif for (auto& allocation : allocations) { - mr.deallocate(allocation, ten_MiB); + mr.deallocate_sync(allocation, ten_MiB); } rmm::default_logger().set_level(old_level); diff --git a/cpp/tests/mr/host/mr_ref_tests.cpp b/cpp/tests/mr/host/mr_ref_tests.cpp index ef9af2290..aa34b8497 100644 --- a/cpp/tests/mr/host/mr_ref_tests.cpp +++ b/cpp/tests/mr/host/mr_ref_tests.cpp @@ -93,54 +93,54 @@ TYPED_TEST(MRRefTest, SelfEquality) { EXPECT_TRUE(this->ref == this->ref); } TYPED_TEST(MRRefTest, AllocateZeroBytes) { void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = this->ref.allocate(0)); - EXPECT_NO_THROW(this->ref.deallocate(ptr, 0)); + EXPECT_NO_THROW(ptr = this->ref.allocate_sync(0)); + EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, 0)); } TYPED_TEST(MRRefTest, AllocateWord) { void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = this->ref.allocate(size_word)); + EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_word)); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_aligned(ptr)); EXPECT_FALSE(is_device_memory(ptr)); - EXPECT_NO_THROW(this->ref.deallocate(ptr, size_word)); + EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_word)); } TYPED_TEST(MRRefTest, AllocateKB) { void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = this->ref.allocate(size_kb)); + EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_kb)); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_aligned(ptr)); EXPECT_FALSE(is_device_memory(ptr)); - EXPECT_NO_THROW(this->ref.deallocate(ptr, size_kb)); + EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_kb)); } TYPED_TEST(MRRefTest, AllocateMB) { void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = this->ref.allocate(size_mb)); + EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_mb)); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_aligned(ptr)); EXPECT_FALSE(is_device_memory(ptr)); - EXPECT_NO_THROW(this->ref.deallocate(ptr, size_mb)); + EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_mb)); } TYPED_TEST(MRRefTest, AllocateGB) { void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = this->ref.allocate(size_gb)); + EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_gb)); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_aligned(ptr)); EXPECT_FALSE(is_device_memory(ptr)); - EXPECT_NO_THROW(this->ref.deallocate(ptr, size_gb)); + EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_gb)); } TYPED_TEST(MRRefTest, AllocateTooMuch) { void* ptr{nullptr}; - EXPECT_THROW(ptr = this->ref.allocate(size_pb), std::bad_alloc); + EXPECT_THROW(ptr = this->ref.allocate_sync(size_pb), std::bad_alloc); EXPECT_EQ(nullptr, ptr); } @@ -158,13 +158,13 @@ TYPED_TEST(MRRefTest, RandomAllocations) std::for_each( allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& alloc) { alloc.size = distribution(generator); - EXPECT_NO_THROW(alloc.ptr = this->ref.allocate(alloc.size)); + EXPECT_NO_THROW(alloc.ptr = this->ref.allocate_sync(alloc.size)); EXPECT_NE(nullptr, alloc.ptr); EXPECT_TRUE(is_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [this](allocation& alloc) { - EXPECT_NO_THROW(this->ref.deallocate(alloc.ptr, alloc.size)); + EXPECT_NO_THROW(this->ref.deallocate_sync(alloc.ptr, alloc.size)); }); } @@ -186,7 +186,8 @@ TYPED_TEST(MRRefTest, MixedRandomAllocationFree) constexpr std::size_t num_allocations{100}; for (std::size_t i = 0; i < num_allocations; ++i) { std::size_t allocation_size = size_distribution(generator); - EXPECT_NO_THROW(allocations.emplace_back(this->ref.allocate(allocation_size), allocation_size)); + EXPECT_NO_THROW( + allocations.emplace_back(this->ref.allocate_sync(allocation_size), allocation_size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); EXPECT_TRUE(is_aligned(new_allocation.ptr)); @@ -195,13 +196,13 @@ TYPED_TEST(MRRefTest, MixedRandomAllocationFree) if (free_front) { auto front = allocations.front(); - EXPECT_NO_THROW(this->ref.deallocate(front.ptr, front.size)); + EXPECT_NO_THROW(this->ref.deallocate_sync(front.ptr, front.size)); allocations.pop_front(); } } // free any remaining allocations for (auto alloc : allocations) { - EXPECT_NO_THROW(this->ref.deallocate(alloc.ptr, alloc.size)); + EXPECT_NO_THROW(this->ref.deallocate_sync(alloc.ptr, alloc.size)); allocations.pop_front(); } } @@ -222,9 +223,9 @@ TYPED_TEST(MRRefTest, AlignmentTest) alignment *= TestedAlignmentMultiplier) { auto allocation_size = size_distribution(generator); void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, alignment)); + EXPECT_NO_THROW(ptr = this->ref.allocate_sync(allocation_size, alignment)); EXPECT_TRUE(is_aligned(ptr, alignment)); - EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, alignment)); + EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, allocation_size, alignment)); } } } @@ -245,9 +246,9 @@ TYPED_TEST(MRRefTest, UnsupportedAlignmentTest) // alignment of `alignof(std::max_align_t)` auto const bad_alignment = alignment + 1; - EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, bad_alignment)); + EXPECT_NO_THROW(ptr = this->ref.allocate_sync(allocation_size, bad_alignment)); EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t))); - EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, bad_alignment)); + EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, allocation_size, bad_alignment)); #endif } } @@ -258,8 +259,8 @@ TEST(PinnedResource, isPinned) rmm::mr::pinned_memory_resource mr; rmm::host_resource_ref ref{mr}; void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = ref.allocate(100)); + EXPECT_NO_THROW(ptr = ref.allocate_sync(100)); EXPECT_TRUE(is_pinned_memory(ptr)); - EXPECT_NO_THROW(ref.deallocate(ptr, 100)); + EXPECT_NO_THROW(ref.deallocate_sync(ptr, 100)); } } // namespace rmm::test diff --git a/cpp/tests/mr/host/pinned_mr_tests.cpp b/cpp/tests/mr/host/pinned_mr_tests.cpp index 32d6a76e3..e193f6f66 100644 --- a/cpp/tests/mr/host/pinned_mr_tests.cpp +++ b/cpp/tests/mr/host/pinned_mr_tests.cpp @@ -37,10 +37,10 @@ TEST(PinnedMemoryResource, AllocateBytesOverload) rmm::mr::pinned_memory_resource mr; void* ptr{nullptr}; - EXPECT_NO_THROW(ptr = mr.allocate(128)); + EXPECT_NO_THROW(ptr = mr.allocate_sync(128)); EXPECT_NE(nullptr, ptr); EXPECT_TRUE(is_pinned_memory(ptr)); - EXPECT_NO_THROW(mr.deallocate(ptr, 128)); + EXPECT_NO_THROW(mr.deallocate_sync(ptr, 128)); } } // namespace rmm::test diff --git a/cpp/tests/mr/host/pinned_pool_mr_tests.cpp b/cpp/tests/mr/host/pinned_pool_mr_tests.cpp index 0a28ead7c..e1645a5de 100644 --- a/cpp/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/cpp/tests/mr/host/pinned_pool_mr_tests.cpp @@ -65,7 +65,7 @@ TEST(PinnedPoolTest, InitialAndMaxPoolSizeEqual) EXPECT_NO_THROW([]() { rmm::mr::pinned_memory_resource pinned_mr{}; pool_mr mr(pinned_mr, 1000192, 1000192); - mr.allocate(1000); + mr.allocate_sync(1000); }()); } @@ -75,7 +75,7 @@ TEST(PinnedPoolTest, NonAlignedPoolSize) []() { rmm::mr::pinned_memory_resource pinned_mr{}; pool_mr mr(pinned_mr, 1000031, 1000192); - mr.allocate(1000); + mr.allocate_sync(1000); }(), rmm::logic_error); @@ -83,7 +83,7 @@ TEST(PinnedPoolTest, NonAlignedPoolSize) []() { rmm::mr::pinned_memory_resource pinned_mr{}; pool_mr mr(pinned_mr, 1000192, 1000200); - mr.allocate(1000); + mr.allocate_sync(1000); }(), rmm::logic_error); } @@ -94,9 +94,9 @@ TEST(PinnedPoolTest, ThrowOutOfMemory) const auto initial{0}; const auto maximum{1024}; pool_mr mr{pinned_mr, initial, maximum}; - mr.allocate(1024); + mr.allocate_sync(1024); - EXPECT_THROW(mr.allocate(1024), rmm::out_of_memory); + EXPECT_THROW(mr.allocate_sync(1024), rmm::out_of_memory); } } // namespace diff --git a/python/rmm/rmm/librmm/_torch_allocator.cpp b/python/rmm/rmm/librmm/_torch_allocator.cpp index bfe94c2d0..bd9991d9a 100644 --- a/python/rmm/rmm/librmm/_torch_allocator.cpp +++ b/python/rmm/rmm/librmm/_torch_allocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,8 +40,8 @@ extern "C" void* allocate(std::size_t size, int device, void* stream) rmm::cuda_device_id const device_id{device}; rmm::cuda_set_device_raii with_device{device_id}; auto mr = rmm::mr::get_per_device_resource_ref(device_id); - return mr.allocate_async( - size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast(stream)}); + return mr.allocate( + rmm::cuda_stream_view{static_cast(stream)}, size, rmm::CUDA_ALLOCATION_ALIGNMENT); } /** @@ -57,8 +57,8 @@ extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream rmm::cuda_device_id const device_id{device}; rmm::cuda_set_device_raii with_device{device_id}; auto mr = rmm::mr::get_per_device_resource_ref(device_id); - mr.deallocate_async(ptr, - size, - rmm::CUDA_ALLOCATION_ALIGNMENT, - rmm::cuda_stream_view{static_cast(stream)}); + mr.deallocate(rmm::cuda_stream_view{static_cast(stream)}, + ptr, + size, + rmm::CUDA_ALLOCATION_ALIGNMENT); } diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd index 75e59e50d..67c5e8484 100644 --- a/python/rmm/rmm/librmm/memory_resource.pxd +++ b/python/rmm/rmm/librmm/memory_resource.pxd @@ -32,6 +32,7 @@ from rmm.librmm.memory_resource cimport device_memory_resource cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass device_memory_resource: + # Legacy functions void* allocate(size_t bytes) except + void* allocate(size_t bytes, cuda_stream_view stream) except + void deallocate(void* ptr, size_t bytes) noexcept @@ -40,6 +41,21 @@ cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ size_t bytes, cuda_stream_view stream ) noexcept + # End legacy functions + + void* allocate_sync(size_t bytes, size_t alignment) except + + void deallocate_sync(void* ptr, size_t bytes, size_t alignment) noexcept + void* allocate( + cuda_stream_view stream, + size_t bytes, + size_t alignment=256 + ) except + + void deallocate( + cuda_stream_view stream, + void* ptr, + size_t bytes, + size_t alignment=256 + ) noexcept cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: size_t percent_of_free_device_memory(int percent) except + diff --git a/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx index 0e32f017a..8921cf266 100644 --- a/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx +++ b/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx @@ -106,7 +106,7 @@ cdef class DeviceMemoryResource: """ cdef uintptr_t ptr with nogil: - ptr = self.c_obj.get().allocate(nbytes, stream.view()) + ptr = self.c_obj.get().allocate(stream.view(), nbytes) return ptr def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM): @@ -122,7 +122,7 @@ cdef class DeviceMemoryResource: Optional stream for the deallocation """ with nogil: - self.c_obj.get().deallocate((ptr), nbytes, stream.view()) + self.c_obj.get().deallocate(stream.view(), (ptr), nbytes) def __dealloc__(self): # See the __dealloc__ method on DeviceBuffer for discussion of why we must