diff --git a/README.md b/README.md
index 6c00e6c9a..69f3783ea 100644
--- a/README.md
+++ b/README.md
@@ -482,7 +482,7 @@ rmm::mr::polymorphic_allocator<int> stream_alloc;
 // Constructs an adaptor that forwards all (de)allocations to `stream_alloc` on `stream`.
 auto adapted = rmm::mr::stream_allocator_adaptor(stream_alloc, stream);
 
-// Allocates 100 bytes using `stream_alloc` on `stream`
+// Allocates storage for 100 ints using `stream_alloc` on `stream`
 auto p = adapted.allocate(100);
 ...
 // Deallocates using `stream_alloc` on `stream`
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index fb3f6455d..0f31cfe41 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -44,6 +44,7 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
 # This is mostly so that dependent libraries are configured in shared mode for downstream dependents
 # of RMM that get their common dependencies transitively.
 option(BUILD_SHARED_LIBS "Build RMM shared libraries" ON)
+option(RMM_ENABLE_LEGACY_MR_INTERFACE "Enable legacy memory resource interface" ON)
 set(RMM_LOGGING_LEVEL
     "INFO"
     CACHE STRING "Choose the logging level.")
@@ -54,6 +55,7 @@ message(VERBOSE "RMM: Build with NVTX support: ${RMM_NVTX}")
 # Set logging level. Must go before including gtests and benchmarks. Set the possible values of
 # build type for cmake-gui.
 message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
+message(STATUS "RMM: Legacy MR interface enabled: ${RMM_ENABLE_LEGACY_MR_INTERFACE}")
 
 # cudart can be linked statically or dynamically
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
@@ -132,6 +134,11 @@ if(RMM_NVTX)
   target_compile_definitions(rmm PUBLIC RMM_NVTX)
 endif()
 
+# Control legacy MR interface visibility
+if(RMM_ENABLE_LEGACY_MR_INTERFACE)
+  target_compile_definitions(rmm PUBLIC RMM_ENABLE_LEGACY_MR_INTERFACE)
+endif()
+
 # ##################################################################################################
 # * tests and benchmarks ---------------------------------------------------------------------------
 
diff --git a/cpp/benchmarks/async_priming/async_priming_bench.cpp b/cpp/benchmarks/async_priming/async_priming_bench.cpp
index e391c77c6..c2fc27bfb 100644
--- a/cpp/benchmarks/async_priming/async_priming_bench.cpp
+++ b/cpp/benchmarks/async_priming/async_priming_bench.cpp
@@ -83,7 +83,7 @@ void BM_AsyncPrimingImpact(benchmark::State& state, MRFactoryFunc factory)
 
     // Deallocate all
     for (auto* ptr : allocations) {
-      mr->deallocate(ptr, allocation_size);
+      mr->deallocate_sync(ptr, allocation_size);
     }
     allocations.clear();
 
@@ -118,7 +118,7 @@ void BM_AsyncPrimingImpact(benchmark::State& state, MRFactoryFunc factory)
 
     // Clean up for next iteration
     for (auto* ptr : allocations) {
-      mr->deallocate(ptr, allocation_size);
+      mr->deallocate_sync(ptr, allocation_size);
     }
     allocations.clear();
   }
diff --git a/cpp/benchmarks/random_allocations/random_allocations.cpp b/cpp/benchmarks/random_allocations/random_allocations.cpp
index 2971f7e40..d44966b22 100644
--- a/cpp/benchmarks/random_allocations/random_allocations.cpp
+++ b/cpp/benchmarks/random_allocations/random_allocations.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -94,7 +94,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
     void* ptr = nullptr;
     if (do_alloc) {  // try to allocate
       try {
-        ptr = mr.allocate(size, stream);
+        ptr = mr.allocate(stream, size);
       } catch (rmm::bad_alloc const&) {
         do_alloc = false;
 #if VERBOSE
@@ -118,7 +118,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
         std::size_t index = index_distribution(generator) % active_allocations;
         active_allocations--;
         allocation to_free = remove_at(allocations, index);
-        mr.deallocate(to_free.ptr, to_free.size, stream);
+        mr.deallocate(stream, to_free.ptr, to_free.size);
         allocation_size -= to_free.size;
 
 #if VERBOSE
diff --git a/cpp/include/rmm/aligned.hpp b/cpp/include/rmm/aligned.hpp
index 57e91c217..f58a66ebb 100644
--- a/cpp/include/rmm/aligned.hpp
+++ b/cpp/include/rmm/aligned.hpp
@@ -20,7 +20,6 @@
 
 #include <cassert>
 #include <cstddef>
-#include <cstdint>
 
 namespace RMM_EXPORT rmm {
 
diff --git a/cpp/include/rmm/detail/cccl_adaptors.hpp b/cpp/include/rmm/detail/cccl_adaptors.hpp
index 77614848d..6803c4cbd 100644
--- a/cpp/include/rmm/detail/cccl_adaptors.hpp
+++ b/cpp/include/rmm/detail/cccl_adaptors.hpp
@@ -34,6 +34,7 @@ class cccl_resource_ref : public ResourceType {
 
   cccl_resource_ref(base&& other) : base(std::move(other)) {}
 
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   void* allocate(std::size_t bytes) { return this->allocate_sync(bytes); }
 
   void* allocate(std::size_t bytes, std::size_t alignment)
@@ -50,8 +51,8 @@ class cccl_resource_ref : public ResourceType {
   {
     return this->deallocate_sync(ptr, bytes, alignment);
   }
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
 
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
   void* allocate_sync(std::size_t bytes) { return base::allocate_sync(bytes); }
 
   void* allocate_sync(std::size_t bytes, std::size_t alignment)
@@ -68,24 +69,6 @@ class cccl_resource_ref : public ResourceType {
   {
     return base::deallocate_sync(ptr, bytes, alignment);
   }
-#else
-  void* allocate_sync(std::size_t bytes) { return base::allocate(bytes); }
-
-  void* allocate_sync(std::size_t bytes, std::size_t alignment)
-  {
-    return base::allocate(bytes, alignment);
-  }
-
-  void deallocate_sync(void* ptr, std::size_t bytes) noexcept
-  {
-    return base::deallocate(ptr, bytes);
-  }
-
-  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept
-  {
-    return base::deallocate(ptr, bytes, alignment);
-  }
-#endif
 };
 
 template <typename ResourceType>
@@ -98,8 +81,7 @@ class cccl_async_resource_ref : public ResourceType {
   cccl_async_resource_ref(base const& other) : base(other) {}
   cccl_async_resource_ref(base&& other) : base(std::move(other)) {}
 
-  // BEGINNING OF LEGACY MR METHODS
-
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   void* allocate(std::size_t bytes) { return this->allocate_sync(bytes); }
 
   void* allocate(std::size_t bytes, std::size_t alignment)
@@ -140,9 +122,8 @@ class cccl_async_resource_ref : public ResourceType {
     return this->deallocate(stream, ptr, bytes, alignment);
   }
 
-  // END OF LEGACY MR METHODS
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
 
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
   void* allocate_sync(std::size_t bytes) { return base::allocate_sync(bytes); }
 
   void* allocate_sync(std::size_t bytes, std::size_t alignment)
@@ -182,47 +163,6 @@ class cccl_async_resource_ref : public ResourceType {
   {
     return base::deallocate(stream, ptr, bytes, alignment);
   }
-#else
-  void* allocate_sync(std::size_t bytes) { return base::allocate(bytes); }
-
-  void* allocate_sync(std::size_t bytes, std::size_t alignment)
-  {
-    return base::allocate(bytes, alignment);
-  }
-
-  void deallocate_sync(void* ptr, std::size_t bytes) noexcept
-  {
-    return base::deallocate(ptr, bytes);
-  }
-
-  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept
-  {
-    return base::deallocate(ptr, bytes, alignment);
-  }
-
-  void* allocate(cuda_stream_view stream, std::size_t bytes)
-  {
-    return base::allocate_async(bytes, stream);
-  }
-
-  void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment)
-  {
-    return base::allocate_async(bytes, alignment, stream);
-  }
-
-  void deallocate(cuda_stream_view stream, void* ptr, std::size_t bytes) noexcept
-  {
-    return base::deallocate_async(ptr, bytes, stream);
-  }
-
-  void deallocate(cuda_stream_view stream,
-                  void* ptr,
-                  std::size_t bytes,
-                  std::size_t alignment) noexcept
-  {
-    return base::deallocate_async(ptr, bytes, alignment, stream);
-  }
-#endif
 };
 
 }  // namespace detail
diff --git a/cpp/include/rmm/detail/cuda_memory_resource.hpp b/cpp/include/rmm/detail/cuda_memory_resource.hpp
index fedce42fe..a4319742b 100644
--- a/cpp/include/rmm/detail/cuda_memory_resource.hpp
+++ b/cpp/include/rmm/detail/cuda_memory_resource.hpp
@@ -18,7 +18,7 @@
 #ifndef LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE
 #error \
   "RMM requires LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE to be defined. Please add -DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE to the compiler flags (this is done automatically when using RMM via CMake)."
-#endif
+#endif  // LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE
 
 #include <rmm/detail/export.hpp>
 
@@ -28,7 +28,6 @@ namespace RMM_NAMESPACE {
 namespace detail {
 namespace polyfill {
 
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
 template <class Resource>
 inline constexpr bool resource = cuda::mr::synchronous_resource<Resource>;
 template <class Resource, class... Properties>
@@ -37,16 +36,6 @@ template <class Resource>
 inline constexpr bool async_resource = cuda::mr::resource<Resource>;
 template <class Resource, class... Properties>
 inline constexpr bool async_resource_with = cuda::mr::resource_with<Resource, Properties...>;
-#else   // ^^^ CCCL >= 3.1 ^^^ / vvv CCCL < 3.1 vvv
-template <class Resource>
-inline constexpr bool resource = cuda::mr::resource<Resource>;
-template <class Resource, class... Properties>
-inline constexpr bool resource_with = cuda::mr::resource_with<Resource, Properties...>;
-template <class Resource>
-inline constexpr bool async_resource = cuda::mr::async_resource<Resource>;
-template <class Resource, class... Properties>
-inline constexpr bool async_resource_with = cuda::mr::async_resource_with<Resource, Properties...>;
-#endif  // CCCL < 3.1
 
 }  // namespace polyfill
 }  // namespace detail
diff --git a/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp b/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp
index 091e17804..5fbea1c7e 100644
--- a/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -143,10 +143,10 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      return get_upstream_resource().allocate_async(bytes, 1, stream);
+      return get_upstream_resource().allocate(stream, bytes, 1);
     }
     auto const size = upstream_allocation_size(bytes);
-    void* pointer   = get_upstream_resource().allocate_async(size, 1, stream);
+    void* pointer   = get_upstream_resource().allocate(stream, size, 1);
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
     auto const address         = reinterpret_cast<std::size_t>(pointer);
     auto const aligned_address = rmm::align_up(address, alignment_);
@@ -169,7 +169,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      get_upstream_resource().deallocate_async(ptr, bytes, 1, stream);
+      get_upstream_resource().deallocate(stream, ptr, bytes, 1);
     } else {
       {
         lock_guard lock(mtx_);
@@ -179,7 +179,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
           pointers_.erase(iter);
         }
       }
-      get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream);
+      get_upstream_resource().deallocate(stream, ptr, upstream_allocation_size(bytes), 1);
     }
   }
 
diff --git a/cpp/include/rmm/mr/device/arena_memory_resource.hpp b/cpp/include/rmm/mr/device/arena_memory_resource.hpp
index b9c1591dc..64c196954 100644
--- a/cpp/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/arena_memory_resource.hpp
@@ -156,14 +156,14 @@ class arena_memory_resource final : public device_memory_resource {
 
     {
       std::shared_lock lock(mtx_);
-      void* pointer = arena.allocate(bytes);
+      void* pointer = arena.allocate_sync(bytes);
       if (pointer != nullptr) { return pointer; }
     }
 
     {
       std::unique_lock lock(mtx_);
       defragment();
-      void* pointer = arena.allocate(bytes);
+      void* pointer = arena.allocate_sync(bytes);
       if (pointer == nullptr) {
         if (dump_log_on_failure_) { dump_memory_log(bytes); }
         auto const msg = std::string("Maximum pool size exceeded (failed to allocate ") +
@@ -209,7 +209,7 @@ class arena_memory_resource final : public device_memory_resource {
     {
       std::shared_lock lock(mtx_);
       // If the memory being freed does not belong to the arena, the following will return false.
-      if (arena.deallocate(ptr, bytes, stream)) { return; }
+      if (arena.deallocate(stream, ptr, bytes)) { return; }
     }
 
     {
@@ -218,31 +218,31 @@ class arena_memory_resource final : public device_memory_resource {
       stream.synchronize_no_throw();
 
       std::unique_lock lock(mtx_);
-      deallocate_from_other_arena(ptr, bytes, stream);
+      deallocate_from_other_arena(stream, ptr, bytes);
     }
   }
 
   /**
    * @brief Deallocate memory pointed to by `ptr` that was allocated in a different arena.
    *
+   * @param stream Stream on which to perform deallocation.
    * @param ptr Pointer to be deallocated.
    * @param bytes The size in bytes of the allocation. This must be equal to the
    * value of `bytes` that was passed to the `allocate` call that returned `ptr`.
-   * @param stream Stream on which to perform deallocation.
    */
-  void deallocate_from_other_arena(void* ptr, std::size_t bytes, cuda_stream_view stream)
+  void deallocate_from_other_arena(cuda_stream_view stream, void* ptr, std::size_t bytes)
   {
     if (use_per_thread_arena(stream)) {
       for (auto const& thread_arena : thread_arenas_) {
-        if (thread_arena.second->deallocate(ptr, bytes)) { return; }
+        if (thread_arena.second->deallocate_sync(ptr, bytes)) { return; }
       }
     } else {
       for (auto& stream_arena : stream_arenas_) {
-        if (stream_arena.second.deallocate(ptr, bytes)) { return; }
+        if (stream_arena.second.deallocate_sync(ptr, bytes)) { return; }
       }
     }
 
-    if (!global_arena_.deallocate(ptr, bytes)) {
+    if (!global_arena_.deallocate_sync(ptr, bytes)) {
       // It's possible to use per thread default streams along with another pool of streams.
       // This means that it's possible for an allocation to move from a thread or stream arena
       // back into the global arena during a defragmentation and then move down into another arena
@@ -253,11 +253,11 @@ class arena_memory_resource final : public device_memory_resource {
       // arenas all the time.
       if (use_per_thread_arena(stream)) {
         for (auto& stream_arena : stream_arenas_) {
-          if (stream_arena.second.deallocate(ptr, bytes)) { return; }
+          if (stream_arena.second.deallocate_sync(ptr, bytes)) { return; }
         }
       } else {
         for (auto const& thread_arena : thread_arenas_) {
-          if (thread_arena.second->deallocate(ptr, bytes)) { return; }
+          if (thread_arena.second->deallocate_sync(ptr, bytes)) { return; }
         }
       }
       RMM_FAIL("allocation not found");
diff --git a/cpp/include/rmm/mr/device/binning_memory_resource.hpp b/cpp/include/rmm/mr/device/binning_memory_resource.hpp
index fe427aee3..81f514a8b 100644
--- a/cpp/include/rmm/mr/device/binning_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/binning_memory_resource.hpp
@@ -196,7 +196,7 @@ class binning_memory_resource final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     if (bytes <= 0) { return nullptr; }
-    return get_resource_ref(bytes).allocate_async(bytes, stream);
+    return get_resource_ref(bytes).allocate(stream, bytes);
   }
 
   /**
@@ -209,7 +209,7 @@ class binning_memory_resource final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
-    get_resource_ref(bytes).deallocate_async(ptr, bytes, stream);
+    get_resource_ref(bytes).deallocate(stream, ptr, bytes);
   }
 
   device_async_resource_ref
diff --git a/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp b/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp
index 8c64883ca..c4401dd08 100644
--- a/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/cuda_async_managed_memory_resource.hpp
@@ -99,7 +99,7 @@ class cuda_async_managed_memory_resource final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
   {
-    return pool_.allocate(bytes, stream);
+    return pool_.allocate(stream, bytes);
   }
 
   /**
@@ -112,7 +112,7 @@ class cuda_async_managed_memory_resource final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override
   {
-    pool_.deallocate(ptr, bytes, stream);
+    pool_.deallocate(stream, ptr, bytes);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp b/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp
index eb7e6b4d2..e81013edd 100644
--- a/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -180,7 +180,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
   {
     void* ptr{nullptr};
-    ptr = pool_.allocate(bytes, stream);
+    ptr = pool_.allocate(stream, bytes);
     return ptr;
   }
 
@@ -194,7 +194,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override
   {
-    pool_.deallocate(ptr, bytes, stream);
+    pool_.deallocate(stream, ptr, bytes);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/detail/arena.hpp b/cpp/include/rmm/mr/device/detail/arena.hpp
index 1b52387a4..4fd524165 100644
--- a/cpp/include/rmm/mr/device/detail/arena.hpp
+++ b/cpp/include/rmm/mr/device/detail/arena.hpp
@@ -525,7 +525,7 @@ class global_arena final {
   ~global_arena()
   {
     std::lock_guard lock(mtx_);
-    upstream_mr_.deallocate(upstream_block_.pointer(), upstream_block_.size());
+    upstream_mr_.deallocate_sync(upstream_block_.pointer(), upstream_block_.size());
   }
 
   /**
@@ -583,7 +583,7 @@ class global_arena final {
    * @param size The size in bytes of the allocation.
    * @return void* Pointer to the newly allocated memory.
    */
-  void* allocate(std::size_t size)
+  void* allocate_sync(std::size_t size)
   {
     RMM_LOGGING_ASSERT(handles(size));
     std::lock_guard lock(mtx_);
@@ -599,17 +599,17 @@ class global_arena final {
   /**
    * @brief Deallocate memory pointed to by `ptr`.
    *
+   * @param stream Stream on which to perform deallocation.
    * @param ptr Pointer to be deallocated.
    * @param size The size in bytes of the allocation. This must be equal to the value of `size`
    * that was passed to the `allocate` call that returned `p`.
-   * @param stream Stream on which to perform deallocation.
    * @return bool true if the allocation is found, false otherwise.
    */
-  bool deallocate_async(void* ptr, std::size_t size, cuda_stream_view stream)
+  bool deallocate(cuda_stream_view stream, void* ptr, std::size_t size)
   {
     RMM_LOGGING_ASSERT(handles(size));
     stream.synchronize_no_throw();
-    return deallocate(ptr, size);
+    return deallocate_sync(ptr, size);
   }
 
   /**
@@ -620,7 +620,7 @@ class global_arena final {
    * value of `bytes` that was passed to the `allocate` call that returned `ptr`.
    * @return bool true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* ptr, std::size_t bytes)
+  bool deallocate_sync(void* ptr, std::size_t bytes)
   {
     std::lock_guard lock(mtx_);
 
@@ -701,7 +701,7 @@ class global_arena final {
    */
   void initialize(std::size_t size)
   {
-    upstream_block_ = {upstream_mr_.allocate(size), size};
+    upstream_block_ = {upstream_mr_.allocate_sync(size), size};
     superblocks_.emplace(upstream_block_.pointer(), size);
   }
 
@@ -814,9 +814,9 @@ class arena {
    * @param size The size in bytes of the allocation.
    * @return void* Pointer to the newly allocated memory.
    */
-  void* allocate(std::size_t size)
+  void* allocate_sync(std::size_t size)
   {
-    if (global_arena_.handles(size)) { return global_arena_.allocate(size); }
+    if (global_arena_.handles(size)) { return global_arena_.allocate_sync(size); }
     std::lock_guard lock(mtx_);
     return get_block(size).pointer();
   }
@@ -824,18 +824,16 @@ class arena {
   /**
    * @brief Deallocate memory pointed to by `ptr`, and possibly return superblocks to upstream.
    *
+   * @param stream Stream on which to perform deallocation.
    * @param ptr Pointer to be deallocated.
    * @param size The size in bytes of the allocation. This must be equal to the value of `size`
    * that was passed to the `allocate` call that returned `p`.
-   * @param stream Stream on which to perform deallocation.
    * @return bool true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
+  bool deallocate(cuda_stream_view stream, void* ptr, std::size_t size)
   {
-    if (global_arena::handles(size) && global_arena_.deallocate_async(ptr, size, stream)) {
-      return true;
-    }
-    return deallocate(ptr, size);
+    if (global_arena::handles(size) && global_arena_.deallocate(stream, ptr, size)) { return true; }
+    return deallocate_sync(ptr, size);
   }
 
   /**
@@ -846,10 +844,10 @@ class arena {
    * that was passed to the `allocate` call that returned `p`.
    * @return bool true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* ptr, std::size_t size)
+  bool deallocate_sync(void* ptr, std::size_t size)
   {
     std::lock_guard lock(mtx_);
-    return deallocate_from_superblock({ptr, size});
+    return deallocate_from_superblock_sync({ptr, size});
   }
 
   /**
@@ -925,7 +923,7 @@ class arena {
    * @param blk The block to deallocate.
    * @return true if the block is found.
    */
-  bool deallocate_from_superblock(block const& blk)
+  bool deallocate_from_superblock_sync(block const& blk)
   {
     auto const iter = std::find_if(superblocks_.cbegin(),
                                    superblocks_.cend(),
diff --git a/cpp/include/rmm/mr/device/device_memory_resource.hpp b/cpp/include/rmm/mr/device/device_memory_resource.hpp
index 7b2e0acfb..0418cdda4 100644
--- a/cpp/include/rmm/mr/device/device_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/device_memory_resource.hpp
@@ -103,6 +103,7 @@ class device_memory_resource {
   device_memory_resource& operator=(device_memory_resource&&) noexcept =
     default;  ///< @default_move_assignment{device_memory_resource}
 
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   /**
    * @brief Allocates memory of size at least \p bytes.
    *
@@ -151,24 +152,6 @@ class device_memory_resource {
     do_deallocate(ptr, bytes, stream);
   }
 
-  /**
-   * @brief Compare this resource to another.
-   *
-   * Two device_memory_resources compare equal if and only if memory allocated
-   * from one device_memory_resource can be deallocated from the other and vice
-   * versa.
-   *
-   * By default, simply checks if \p *this and \p other refer to the same
-   * object, i.e., does not check if they are two objects of the same class.
-   *
-   * @param other The other resource to compare to
-   * @returns If the two resources are equivalent
-   */
-  [[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept
-  {
-    return do_is_equal(other);
-  }
-
   /**
    * @brief Allocates memory of size at least \p bytes.
    *
@@ -293,9 +276,7 @@ class device_memory_resource {
     RMM_FUNC_RANGE();
     do_deallocate(ptr, bytes, stream);
   }
-
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
-  // CCCL >= 3.1 needs a different set of methods to satisfy the memory resource concepts
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
 
   /**
    * @brief Allocates memory of size at least \p bytes.
@@ -369,7 +350,24 @@ class device_memory_resource {
   {
     do_deallocate(ptr, rmm::align_up(bytes, alignment), stream);
   }
-#endif  // CCCL >= 3.1
+
+  /**
+   * @brief Compare this resource to another.
+   *
+   * Two device_memory_resources compare equal if and only if memory allocated
+   * from one device_memory_resource can be deallocated from the other and vice
+   * versa.
+   *
+   * By default, simply checks if \p *this and \p other refer to the same
+   * object, i.e., does not check if they are two objects of the same class.
+   *
+   * @param other The other resource to compare to
+   * @returns If the two resources are equivalent
+   */
+  [[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept
+  {
+    return do_is_equal(other);
+  }
 
   /**
    * @brief Comparison operator with another device_memory_resource
diff --git a/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index f1948daca..70dbeab29 100644
--- a/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -165,7 +165,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
 
     while (true) {
       try {
-        ret = get_upstream_resource().allocate_async(bytes, stream);
+        ret = get_upstream_resource().allocate(stream, bytes);
         break;
       } catch (exception_type const& e) {
         if (!callback_(bytes, callback_arg_)) { throw; }
@@ -183,7 +183,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
-    get_upstream_resource().deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate(stream, ptr, bytes);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp b/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp
index b59e5c312..2d845d5b1 100644
--- a/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -176,7 +176,7 @@ class fixed_size_memory_resource
    */
   free_list blocks_from_upstream(cuda_stream_view stream)
   {
-    void* ptr = get_upstream_resource().allocate_async(upstream_chunk_size_, stream);
+    void* ptr = get_upstream_resource().allocate(stream, upstream_chunk_size_);
     block_type block{ptr};
     upstream_blocks_.push_back(block);
 
@@ -231,7 +231,7 @@ class fixed_size_memory_resource
     lock_guard lock(this->get_mutex());
 
     for (auto block : upstream_blocks_) {
-      get_upstream_resource().deallocate(block.pointer(), upstream_chunk_size_);
+      get_upstream_resource().deallocate_sync(block.pointer(), upstream_chunk_size_);
     }
     upstream_blocks_.clear();
   }
diff --git a/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp b/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 891fcd048..02e924dda 100644
--- a/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -143,7 +143,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
     auto const old           = allocated_bytes_.fetch_add(proposed_size);
     if (old + proposed_size <= allocation_limit_) {
       try {
-        return get_upstream_resource().allocate_async(bytes, stream);
+        return get_upstream_resource().allocate(stream, bytes);
       } catch (...) {
         allocated_bytes_ -= proposed_size;
         throw;
@@ -166,7 +166,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
     std::size_t allocated_size = align_up(bytes, alignment_);
-    get_upstream_resource().deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate(stream, ptr, bytes);
     allocated_bytes_ -= allocated_size;
   }
 
diff --git a/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp b/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp
index 9723631e9..6d675fdd5 100644
--- a/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -287,7 +287,7 @@ class logging_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     try {
-      auto const ptr = get_upstream_resource().allocate_async(bytes, stream);
+      auto const ptr = get_upstream_resource().allocate(stream, bytes);
       logger_->info("allocate,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream));
       return ptr;
     } catch (...) {
@@ -314,7 +314,7 @@ class logging_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
     logger_->info("free,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream));
-    get_upstream_resource().deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate(stream, ptr, bytes);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/owning_wrapper.hpp b/cpp/include/rmm/mr/device/owning_wrapper.hpp
index 5c4737415..41fcb5ec5 100644
--- a/cpp/include/rmm/mr/device/owning_wrapper.hpp
+++ b/cpp/include/rmm/mr/device/owning_wrapper.hpp
@@ -170,7 +170,7 @@ class owning_wrapper : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    return wrapped().allocate(bytes, stream);
+    return wrapped().allocate(stream, bytes);
   }
 
   /**
@@ -184,7 +184,7 @@ class owning_wrapper : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
-    wrapped().deallocate(ptr, bytes, stream);
+    wrapped().deallocate(stream, ptr, bytes);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/polymorphic_allocator.hpp b/cpp/include/rmm/mr/device/polymorphic_allocator.hpp
index 6f5e8caf6..ea5b65461 100644
--- a/cpp/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/cpp/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -86,8 +86,7 @@ class polymorphic_allocator {
    */
   value_type* allocate(std::size_t num, cuda_stream_view stream)
   {
-    return static_cast<value_type*>(
-      get_upstream_resource().allocate_async(num * sizeof(T), stream));
+    return static_cast<value_type*>(get_upstream_resource().allocate(stream, num * sizeof(T)));
   }
 
   /**
@@ -102,7 +101,7 @@ class polymorphic_allocator {
    */
   void deallocate(value_type* ptr, std::size_t num, cuda_stream_view stream) noexcept
   {
-    get_upstream_resource().deallocate_async(ptr, num * sizeof(T), stream);
+    get_upstream_resource().deallocate(stream, ptr, num * sizeof(T));
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/pool_memory_resource.hpp b/cpp/include/rmm/mr/device/pool_memory_resource.hpp
index e06e1eb7d..d3edc05e0 100644
--- a/cpp/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/pool_memory_resource.hpp
@@ -352,7 +352,7 @@ class pool_memory_resource final
    *
    * @param size The size in bytes to allocate from the upstream resource
    * @param stream The stream on which the memory is to be used.
-   * @throws if call to allocate_async() throws
+   * @throws if call to allocate() throws
    * @return block_type The allocated block
    */
   block_type block_from_upstream(std::size_t size, cuda_stream_view stream)
@@ -361,7 +361,7 @@ class pool_memory_resource final
 
     if (size == 0) { return {}; }
 
-    void* ptr = get_upstream_resource().allocate_async(size, stream);
+    void* ptr = get_upstream_resource().allocate(stream, size);
     return *upstream_blocks_.emplace(static_cast<char*>(ptr), size, true).first;
   }
 
@@ -424,7 +424,7 @@ class pool_memory_resource final
     lock_guard lock(this->get_mutex());
 
     for (auto block : upstream_blocks_) {
-      get_upstream_resource().deallocate(block.pointer(), block.size());
+      get_upstream_resource().deallocate_sync(block.pointer(), block.size());
     }
     upstream_blocks_.clear();
 #ifdef RMM_POOL_TRACK_ALLOCATIONS
diff --git a/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp b/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp
index bee009fa2..bf6cb3204 100644
--- a/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/prefetch_resource_adaptor.hpp
@@ -92,7 +92,7 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
+    void* ptr = get_upstream_resource().allocate(stream, bytes);
     rmm::prefetch(ptr, bytes, rmm::get_current_cuda_device(), stream);
     return ptr;
   }
@@ -106,7 +106,7 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
-    get_upstream_resource().deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate(stream, ptr, bytes);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp b/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp
index f554b22ae..46241d625 100644
--- a/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp
+++ b/cpp/include/rmm/mr/device/sam_headroom_memory_resource.hpp
@@ -75,7 +75,7 @@ class sam_headroom_memory_resource final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override
   {
-    void* pointer = system_mr_.allocate_async(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);
+    void* pointer = system_mr_.allocate(stream, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
 
     auto const free        = rmm::available_device_memory().first;
     auto const allocatable = free > headroom_ ? free - headroom_ : 0UL;
@@ -126,7 +126,7 @@ class sam_headroom_memory_resource final : public device_memory_resource {
                      [[maybe_unused]] std::size_t bytes,
                      [[maybe_unused]] cuda_stream_view stream) noexcept override
   {
-    system_mr_.deallocate_async(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);
+    system_mr_.deallocate(stream, ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp b/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp
index acededa82..a37df9a8e 100644
--- a/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -230,7 +230,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
+    void* ptr = get_upstream_resource().allocate(stream, bytes);
 
     // increment the stats
     {
@@ -253,7 +253,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
-    get_upstream_resource().deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate(stream, ptr, bytes);
 
     {
       write_lock_t lock(mtx_);
diff --git a/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
index 5176e3169..3bf0ac077 100644
--- a/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
@@ -100,7 +100,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     lock_t lock(mtx);
-    return get_upstream_resource().allocate_async(bytes, stream);
+    return get_upstream_resource().allocate(stream, bytes);
   }
 
   /**
@@ -113,7 +113,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
     lock_t lock(mtx);
-    get_upstream_resource().deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate(stream, ptr, bytes);
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index 197941270..4778daf44 100644
--- a/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -111,8 +111,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
   pointer allocate(size_type num)
   {
     cuda_set_device_raii dev{_device};
-    return thrust::device_pointer_cast(
-      static_cast<T*>(_mr.allocate_async(num * sizeof(T), _stream)));
+    return thrust::device_pointer_cast(static_cast<T*>(_mr.allocate(_stream, num * sizeof(T))));
   }
 
   /**
@@ -125,7 +124,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
   void deallocate(pointer ptr, size_type num) noexcept
   {
     cuda_set_device_raii dev{_device};
-    return _mr.deallocate_async(thrust::raw_pointer_cast(ptr), num * sizeof(T), _stream);
+    return _mr.deallocate(_stream, thrust::raw_pointer_cast(ptr), num * sizeof(T));
   }
 
   /**
diff --git a/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp b/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 37e17e2d9..86fac81d6 100644
--- a/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/cpp/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -207,7 +207,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
+    void* ptr = get_upstream_resource().allocate(stream, bytes);
     // track it.
     {
       write_lock_t lock(mtx_);
@@ -227,7 +227,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
   {
-    get_upstream_resource().deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate(stream, ptr, bytes);
     {
       write_lock_t lock(mtx_);
 
diff --git a/cpp/include/rmm/mr/host/host_memory_resource.hpp b/cpp/include/rmm/mr/host/host_memory_resource.hpp
index 30741fd07..bd50fd5f0 100644
--- a/cpp/include/rmm/mr/host/host_memory_resource.hpp
+++ b/cpp/include/rmm/mr/host/host_memory_resource.hpp
@@ -65,10 +65,11 @@ class host_memory_resource {
   host_memory_resource& operator=(host_memory_resource&&) noexcept =
     default;  ///< @default_move_assignment{host_memory_resource}
 
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   /**
    * @brief Allocates memory on the host of size at least `bytes` bytes.
    *
-   * The returned storage is aligned to the specified `alignment` if supported, and to
+   * The returned storage is aligned to the specified `alignment` if provided, and to
    * `alignof(std::max_align_t)` otherwise.
    *
    * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
@@ -86,7 +87,7 @@ class host_memory_resource {
   /**
    * @brief Deallocate memory pointed to by `ptr`.
    *
-   * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
+   * `ptr` must have been returned by a prior call to `allocate(bytes, alignment)` on a
    * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
    * yet have been deallocated, otherwise behavior is undefined.
    *
@@ -103,6 +104,46 @@ class host_memory_resource {
     RMM_FUNC_RANGE();
     do_deallocate(ptr, bytes, alignment);
   }
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
+
+  /**
+   * @brief Allocates memory on the host of size at least `bytes` bytes.
+   *
+   * The returned storage is aligned to the specified `alignment` if provided, and to
+   * `alignof(std::max_align_t)` otherwise.
+   *
+   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
+   *
+   * @param bytes The size of the allocation
+   * @param alignment Alignment of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
+  {
+    RMM_FUNC_RANGE();
+    return do_allocate(bytes, alignment);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by `ptr`.
+   *
+   * `ptr` must have been returned by a prior call to `allocate(bytes, alignment)` on a
+   * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
+   * yet have been deallocated, otherwise behavior is undefined.
+   *
+   * @param ptr Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
+   *              that was passed to the `allocate` call that returned `ptr`.
+   * @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
+   *                  that was passed to the `allocate` call that returned `ptr`.
+   */
+  void deallocate_sync(void* ptr,
+                       std::size_t bytes,
+                       std::size_t alignment = alignof(std::max_align_t)) noexcept
+  {
+    RMM_FUNC_RANGE();
+    do_deallocate(ptr, bytes, alignment);
+  }
 
   /**
    * @brief Compare this resource to another.
@@ -201,46 +242,6 @@ class host_memory_resource {
   {
     return this == &other;
   }
-
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
-
- public:
-  /**
-   * @brief Allocates memory on the host of size at least `bytes` bytes.
-   *
-   * The returned storage is aligned to the specified `alignment` if supported, and to
-   * `alignof(std::max_align_t)` otherwise.
-   *
-   * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
-   *
-   * @param bytes The size of the allocation
-   * @param alignment Alignment of the allocation
-   * @return void* Pointer to the newly allocated memory
-   */
-  void* allocate_sync(std::size_t bytes, std::size_t alignment)
-  {
-    return allocate(bytes, alignment);
-  }
-
-  /**
-   * @brief Deallocate memory pointed to by `ptr`.
-   *
-   * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
-   * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
-   * yet have been deallocated, otherwise behavior is undefined.
-   *
-   * @param ptr Pointer to be deallocated
-   * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
-   *              that was passed to the `allocate` call that returned `ptr`.
-   * @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
-   *                  that was passed to the `allocate` call that returned `ptr`.
-   */
-  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)
-  {
-    return deallocate(ptr, bytes, alignment);
-  }
-
-#endif
 };
 
 // static property checks
diff --git a/cpp/include/rmm/mr/host/pinned_memory_resource.hpp b/cpp/include/rmm/mr/host/pinned_memory_resource.hpp
index 62699da92..19a12cfe8 100644
--- a/cpp/include/rmm/mr/host/pinned_memory_resource.hpp
+++ b/cpp/include/rmm/mr/host/pinned_memory_resource.hpp
@@ -49,6 +49,7 @@ class pinned_memory_resource final : public host_memory_resource {
   pinned_memory_resource& operator=(pinned_memory_resource&&) =
     default;  ///< @default_move_assignment{pinned_memory_resource}
 
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   /**
    * @brief Pretend to support the allocate_async interface, falling back to stream 0
    *
@@ -94,6 +95,48 @@ class pinned_memory_resource final : public host_memory_resource {
     do_deallocate(ptr, rmm::align_up(bytes, alignment));
   }
 
+  // Explicitly inherit the allocate and deallocate functions from the host_memory_resource class.
+  // Due to inheritance and name hiding rules, we need to declare these with "using" when we
+  // override allocate and deallocate for CCCL 3.1.0+ compatibility.
+  using host_memory_resource::allocate;
+  using host_memory_resource::deallocate;
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
+
+  /**
+   * @brief Pretend to support the allocate_async interface, falling back to stream 0
+   *
+   * @throws rmm::bad_alloc When the requested `bytes` cannot be allocated on
+   * the specified `stream`.
+   *
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   * @param bytes The size of the allocation
+   * @param alignment The expected alignment of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate(cuda_stream_view stream,
+                 std::size_t bytes,
+                 std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
+  {
+    return do_allocate(bytes, alignment);
+  }
+
+  /**
+   * @brief Pretend to support the deallocate_async interface, falling back to stream 0
+   *
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   * @param ptr Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the
+   * value of `bytes` that was passed to the `allocate` call that returned `p`.
+   * @param alignment The alignment that was passed to the `allocate` call that returned `p`
+   */
+  void deallocate(cuda_stream_view stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
+  {
+    return do_deallocate(ptr, bytes, alignment);
+  }
+
   /**
    * @brief Enables the `cuda::mr::device_accessible` property
    *
@@ -106,7 +149,7 @@ class pinned_memory_resource final : public host_memory_resource {
    * @brief Allocates pinned memory on the host of size at least `bytes` bytes.
    *
    * The returned storage is aligned to the specified `alignment` if supported, and to
-   * `alignof(std::max_align_t)` otherwise.
+   * `rmm::RMM_DEFAULT_HOST_ALIGNMENT` otherwise.
    *
    * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
    *
@@ -114,7 +157,8 @@ class pinned_memory_resource final : public host_memory_resource {
    * @param alignment Alignment of the allocation
    * @return void* Pointer to the newly allocated memory
    */
-  void* do_allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) override
+  void* do_allocate(std::size_t bytes,
+                    std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override
   {
     // don't allocate anything if the user requested zero bytes
     if (0 == bytes) { return nullptr; }
@@ -145,56 +189,12 @@ class pinned_memory_resource final : public host_memory_resource {
    */
   void do_deallocate(void* ptr,
                      std::size_t bytes,
-                     std::size_t alignment = alignof(std::max_align_t)) noexcept override
+                     std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept override
   {
     if (nullptr == ptr) { return; }
     rmm::detail::aligned_host_deallocate(
       ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
   }
-
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
-
- public:
-  // Explicitly inherit the allocate and deallocate functions from the host_memory_resource class.
-  // Due to inheritance and name hiding rules, we need to declare these with "using" when we
-  // override allocate and deallocate for CCCL 3.1.0+ compatibility.
-  using host_memory_resource::allocate;
-  using host_memory_resource::deallocate;
-
-  /**
-   * @brief Pretend to support the allocate_async interface, falling back to stream 0
-   *
-   * @throws rmm::bad_alloc When the requested `bytes` cannot be allocated on
-   * the specified `stream`.
-   *
-   * @param stream CUDA stream on which to perform the deallocation (ignored).
-   * @param bytes The size of the allocation
-   * @param alignment The expected alignment of the allocation
-   * @return void* Pointer to the newly allocated memory
-   */
-  void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment)
-  {
-    return this->allocate_async(bytes, alignment, stream);
-  }
-
-  /**
-   * @brief Pretend to support the deallocate_async interface, falling back to stream 0
-   *
-   * @param stream CUDA stream on which to perform the deallocation (ignored).
-   * @param ptr Pointer to be deallocated
-   * @param bytes The size in bytes of the allocation. This must be equal to the
-   * value of `bytes` that was passed to the `allocate` call that returned `p`.
-   * @param alignment The alignment that was passed to the `allocate` call that returned `p`
-   */
-  void deallocate(cuda_stream_view stream,
-                  void* ptr,
-                  std::size_t bytes,
-                  std::size_t alignment) noexcept
-  {
-    return this->deallocate_async(ptr, bytes, alignment, stream);
-  }
-
-#endif
 };
 
 // static property checks
diff --git a/cpp/include/rmm/mr/pinned_host_memory_resource.hpp b/cpp/include/rmm/mr/pinned_host_memory_resource.hpp
index 60f943054..fa3d67c82 100644
--- a/cpp/include/rmm/mr/pinned_host_memory_resource.hpp
+++ b/cpp/include/rmm/mr/pinned_host_memory_resource.hpp
@@ -46,6 +46,7 @@ namespace mr {
  */
 class pinned_host_memory_resource {
  public:
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   // Disable clang-tidy complaining about the easily swappable size and alignment parameters
   // of allocate and deallocate
   // NOLINTBEGIN(bugprone-easily-swappable-parameters)
@@ -66,16 +67,7 @@ class pinned_host_memory_resource {
   static void* allocate(std::size_t bytes,
                         [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
-    RMM_FUNC_RANGE();
-
-    // don't allocate anything if the user requested zero bytes
-    if (0 == bytes) { return nullptr; }
-
-    return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) {
-      void* ptr{nullptr};
-      RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault), size);
-      return ptr;
-    });
+    return allocate_sync(bytes, alignment);
   }
 
   /**
@@ -89,10 +81,7 @@ class pinned_host_memory_resource {
                          std::size_t bytes,
                          std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
   {
-    RMM_FUNC_RANGE();
-
-    rmm::detail::aligned_host_deallocate(
-      ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
+    return deallocate_sync(ptr, bytes, alignment);
   }
 
   /**
@@ -111,9 +100,7 @@ class pinned_host_memory_resource {
    */
   static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream)
   {
-    RMM_FUNC_RANGE();
-
-    return allocate(bytes);
+    return allocate(stream, bytes);
   }
 
   /**
@@ -135,9 +122,7 @@ class pinned_host_memory_resource {
                               std::size_t alignment,
                               [[maybe_unused]] cuda::stream_ref stream)
   {
-    RMM_FUNC_RANGE();
-
-    return allocate(bytes, alignment);
+    return allocate(stream, bytes, alignment);
   }
 
   /**
@@ -153,9 +138,7 @@ class pinned_host_memory_resource {
                                std::size_t bytes,
                                [[maybe_unused]] cuda::stream_ref stream) noexcept
   {
-    RMM_FUNC_RANGE();
-
-    return deallocate(ptr, bytes);
+    return deallocate(stream, ptr, bytes);
   }
 
   /**
@@ -174,44 +157,11 @@ class pinned_host_memory_resource {
                                std::size_t alignment,
                                [[maybe_unused]] cuda::stream_ref stream) noexcept
   {
-    RMM_FUNC_RANGE();
-
-    return deallocate(ptr, bytes, alignment);
+    return deallocate(stream, ptr, bytes, alignment);
   }
   // NOLINTEND(bugprone-easily-swappable-parameters)
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
 
-  /**
-   * @briefreturn{true if the specified resource is the same type as this resource.}
-   */
-  bool operator==(const pinned_host_memory_resource&) const { return true; }
-
-  /**
-   * @briefreturn{true if the specified resource is not the same type as this resource, otherwise
-   * false.}
-   */
-  bool operator!=(const pinned_host_memory_resource&) const { return false; }
-
-  /**
-   * @brief Enables the `cuda::mr::device_accessible` property
-   *
-   * This property declares that a `pinned_host_memory_resource` provides device accessible memory
-   */
-  friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept
-  {
-  }
-
-  /**
-   * @brief Enables the `cuda::mr::host_accessible` property
-   *
-   * This property declares that a `pinned_host_memory_resource` provides host accessible memory
-   */
-  friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept
-  {
-  }
-
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
-
- public:
   /**
    * @brief Allocates pinned host memory of size at least \p bytes bytes.
    *
@@ -225,9 +175,19 @@ class pinned_host_memory_resource {
    *
    * @return Pointer to the newly allocated memory.
    */
-  static void* allocate_sync(std::size_t bytes, std::size_t alignment)
+  static void* allocate_sync(std::size_t bytes,
+                             std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
-    return allocate(bytes, alignment);
+    RMM_FUNC_RANGE();
+
+    // don't allocate anything if the user requested zero bytes
+    if (0 == bytes) { return nullptr; }
+
+    return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) {
+      void* ptr{nullptr};
+      RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault), size);
+      return ptr;
+    });
   }
 
   /**
@@ -237,15 +197,19 @@ class pinned_host_memory_resource {
    * @param bytes Size of the allocation.
    * @param alignment Alignment in bytes. Default alignment is used if unspecified.
    */
-  static void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)
+  static void deallocate_sync(void* ptr,
+                              std::size_t bytes,
+                              std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
-    return deallocate(ptr, bytes, alignment);
+    RMM_FUNC_RANGE();
+    rmm::detail::aligned_host_deallocate(
+      ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
   }
 
   /**
    * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment.
    *
-   * @note Stream argument is ignored and behavior is identical to allocate.
+   * @note Stream argument is ignored and behavior is identical to allocate_sync.
    *
    * @throws rmm::out_of_memory if the requested allocation could not be fulfilled due to to a
    * CUDA out of memory error.
@@ -257,9 +221,12 @@ class pinned_host_memory_resource {
    * @param alignment Alignment in bytes.
    * @return Pointer to the newly allocated memory.
    */
-  static void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment)
+  static void* allocate(cuda_stream_view stream,
+                        std::size_t bytes,
+                        std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
-    return allocate_async(bytes, alignment, stream);
+    RMM_FUNC_RANGE();
+    return allocate_sync(bytes, alignment);
   }
 
   /**
@@ -276,12 +243,40 @@ class pinned_host_memory_resource {
   static void deallocate(cuda_stream_view stream,
                          void* ptr,
                          std::size_t bytes,
-                         std::size_t alignment) noexcept
+                         std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
   {
-    return deallocate_async(ptr, bytes, alignment, stream);
+    RMM_FUNC_RANGE();
+    return deallocate_sync(ptr, bytes, alignment);
   }
 
-#endif
+  /**
+   * @briefreturn{true if the specified resource is the same type as this resource.}
+   */
+  bool operator==(const pinned_host_memory_resource&) const { return true; }
+
+  /**
+   * @briefreturn{true if the specified resource is not the same type as this resource, otherwise
+   * false.}
+   */
+  bool operator!=(const pinned_host_memory_resource&) const { return false; }
+
+  /**
+   * @brief Enables the `cuda::mr::device_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides device accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept
+  {
+  }
+
+  /**
+   * @brief Enables the `cuda::mr::host_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides host accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept
+  {
+  }
 };
 
 static_assert(rmm::detail::polyfill::async_resource_with<pinned_host_memory_resource,
diff --git a/cpp/include/rmm/resource_ref.hpp b/cpp/include/rmm/resource_ref.hpp
index 7dffe059f..d135b3701 100644
--- a/cpp/include/rmm/resource_ref.hpp
+++ b/cpp/include/rmm/resource_ref.hpp
@@ -28,8 +28,6 @@ namespace RMM_NAMESPACE {
  * @file
  */
 
-#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
-
 /**
  * @brief Alias for a `cuda::mr::resource_ref` with the property
  * `cuda::mr::device_accessible`.
@@ -72,52 +70,6 @@ using host_device_resource_ref = detail::cccl_resource_ref<
 using host_device_async_resource_ref = detail::cccl_async_resource_ref<
   cuda::mr::resource_ref<cuda::mr::host_accessible, cuda::mr::device_accessible>>;
 
-#else
-
-/**
- * @brief Alias for a `cuda::mr::resource_ref` with the property
- * `cuda::mr::device_accessible`.
- */
-using device_resource_ref =
-  detail::cccl_resource_ref<cuda::mr::resource_ref<cuda::mr::device_accessible>>;
-
-/**
- * @brief Alias for a `cuda::mr::async_resource_ref` with the property
- * `cuda::mr::device_accessible`.
- */
-using device_async_resource_ref =
-  detail::cccl_async_resource_ref<cuda::mr::async_resource_ref<cuda::mr::device_accessible>>;
-
-/**
- * @brief Alias for a `cuda::mr::resource_ref` with the property
- * `cuda::mr::host_accessible`.
- */
-using host_resource_ref =
-  detail::cccl_resource_ref<cuda::mr::resource_ref<cuda::mr::host_accessible>>;
-
-/**
- * @brief Alias for a `cuda::mr::async_resource_ref` with the property
- * `cuda::mr::host_accessible`.
- */
-using host_async_resource_ref =
-  detail::cccl_async_resource_ref<cuda::mr::async_resource_ref<cuda::mr::host_accessible>>;
-
-/**
- * @brief Alias for a `cuda::mr::resource_ref` with the properties
- * `cuda::mr::host_accessible` and `cuda::mr::device_accessible`.
- */
-using host_device_resource_ref = detail::cccl_resource_ref<
-  cuda::mr::resource_ref<cuda::mr::host_accessible, cuda::mr::device_accessible>>;
-
-/**
- * @brief Alias for a `cuda::mr::async_resource_ref` with the properties
- * `cuda::mr::host_accessible` and `cuda::mr::device_accessible`.
- */
-using host_device_async_resource_ref = detail::cccl_async_resource_ref<
-  cuda::mr::async_resource_ref<cuda::mr::host_accessible, cuda::mr::device_accessible>>;
-
-#endif
-
 /**
  * @brief Convert pointer to memory resource into `device_async_resource_ref`, checking for
  * `nullptr`
diff --git a/cpp/src/device_buffer.cpp b/cpp/src/device_buffer.cpp
index c2857b756..1d6659259 100644
--- a/cpp/src/device_buffer.cpp
+++ b/cpp/src/device_buffer.cpp
@@ -98,12 +98,12 @@ void device_buffer::allocate_async(std::size_t bytes)
 {
   _size     = bytes;
   _capacity = bytes;
-  _data     = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr;
+  _data     = (bytes > 0) ? _mr.allocate(stream(), bytes) : nullptr;
 }
 
 void device_buffer::deallocate_async() noexcept
 {
-  if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); }
+  if (capacity() > 0) { _mr.deallocate(stream(), data(), capacity()); }
   _size     = 0;
   _capacity = 0;
   _data     = nullptr;
diff --git a/cpp/tests/device_check_resource_adaptor.hpp b/cpp/tests/device_check_resource_adaptor.hpp
index 5bbbb9915..dcc8b898e 100644
--- a/cpp/tests/device_check_resource_adaptor.hpp
+++ b/cpp/tests/device_check_resource_adaptor.hpp
@@ -44,7 +44,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { return get_upstream_resource().allocate_async(bytes, stream); }
+    if (is_correct_device) { return get_upstream_resource().allocate(stream, bytes); }
     return nullptr;
   }
 
@@ -52,7 +52,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { get_upstream_resource().deallocate_async(ptr, bytes, stream); }
+    if (is_correct_device) { get_upstream_resource().deallocate(stream, ptr, bytes); }
   }
 
   [[nodiscard]] bool do_is_equal(
diff --git a/cpp/tests/logger_tests.cpp b/cpp/tests/logger_tests.cpp
index 2b38792d6..34bde54f3 100644
--- a/cpp/tests/logger_tests.cpp
+++ b/cpp/tests/logger_tests.cpp
@@ -134,10 +134,10 @@ TEST(Adaptor, FilenameConstructor)
   auto const size0{100};
   auto const size1{42};
 
-  auto* ptr0 = log_mr.allocate(size0);
-  auto* ptr1 = log_mr.allocate(size1);
-  log_mr.deallocate(ptr0, size0);
-  log_mr.deallocate(ptr1, size1);
+  auto* ptr0 = log_mr.allocate_sync(size0);
+  auto* ptr1 = log_mr.allocate_sync(size1);
+  log_mr.deallocate_sync(ptr0, size0);
+  log_mr.deallocate_sync(ptr1, size1);
   log_mr.flush();
 
   using rmm::detail::action;
@@ -167,10 +167,10 @@ TEST(Adaptor, MultiSinkConstructor)
   auto const size0{100};
   auto const size1{42};
 
-  auto* ptr0 = log_mr.allocate(size0);
-  auto* ptr1 = log_mr.allocate(size1);
-  log_mr.deallocate(ptr0, size0);
-  log_mr.deallocate(ptr1, size1);
+  auto* ptr0 = log_mr.allocate_sync(size0);
+  auto* ptr1 = log_mr.allocate_sync(size1);
+  log_mr.deallocate_sync(ptr0, size0);
+  log_mr.deallocate_sync(ptr1, size1);
   log_mr.flush();
 
   using rmm::detail::action;
@@ -196,10 +196,10 @@ TEST(Adaptor, Factory)
   auto const size0{99};
   auto const size1{42};
 
-  auto* ptr0 = log_mr.allocate(size0);
-  log_mr.deallocate(ptr0, size0);
-  auto* ptr1 = log_mr.allocate(size1);
-  log_mr.deallocate(ptr1, size1);
+  auto* ptr0 = log_mr.allocate_sync(size0);
+  log_mr.deallocate_sync(ptr0, size0);
+  auto* ptr1 = log_mr.allocate_sync(size1);
+  log_mr.deallocate_sync(ptr1, size1);
   log_mr.flush();
 
   using rmm::detail::action;
@@ -235,8 +235,8 @@ TEST(Adaptor, EnvironmentPath)
 
   auto const size{100};
 
-  auto* ptr = log_mr.allocate(size);
-  log_mr.deallocate(ptr, size);
+  auto* ptr = log_mr.allocate_sync(size);
+  log_mr.deallocate_sync(ptr, size);
 
   log_mr.flush();
 
@@ -262,10 +262,10 @@ TEST(Adaptor, AllocateFailure)
   auto const size0{99};
   auto const size1{1_TiB};
 
-  auto* ptr0 = log_mr.allocate(size0);
-  log_mr.deallocate(ptr0, size0);
+  auto* ptr0 = log_mr.allocate_sync(size0);
+  log_mr.deallocate_sync(ptr0, size0);
   try {
-    log_mr.allocate(size1);
+    log_mr.allocate_sync(size1);
   } catch (...) {
   }
   log_mr.flush();
@@ -290,8 +290,8 @@ TEST(Adaptor, STDOUT)
 
   auto const size{100};
 
-  auto* ptr = log_mr.allocate(size);
-  log_mr.deallocate(ptr, size);
+  auto* ptr = log_mr.allocate_sync(size);
+  log_mr.deallocate_sync(ptr, size);
 
   std::string output = testing::internal::GetCapturedStdout();
   std::string header = output.substr(0, output.find('\n'));
@@ -308,8 +308,8 @@ TEST(Adaptor, STDERR)
 
   auto const size{100};
 
-  auto* ptr = log_mr.allocate(size);
-  log_mr.deallocate(ptr, size);
+  auto* ptr = log_mr.allocate_sync(size);
+  log_mr.deallocate_sync(ptr, size);
 
   std::string output = testing::internal::GetCapturedStderr();
   std::string header = output.substr(0, output.find('\n'));
diff --git a/cpp/tests/mr/device/adaptor_tests.cpp b/cpp/tests/mr/device/adaptor_tests.cpp
index a7739f0cc..b3ff1dc08 100644
--- a/cpp/tests/mr/device/adaptor_tests.cpp
+++ b/cpp/tests/mr/device/adaptor_tests.cpp
@@ -150,9 +150,9 @@ TYPED_TEST(AdaptorTest, GetUpstreamResource)
 TYPED_TEST(AdaptorTest, AllocFree)
 {
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = this->mr->allocate(1024));
+  EXPECT_NO_THROW(ptr = this->mr->allocate_sync(1024));
   EXPECT_NE(ptr, nullptr);
-  EXPECT_NO_THROW(this->mr->deallocate(ptr, 1024));
+  EXPECT_NO_THROW(this->mr->deallocate_sync(ptr, 1024));
 }
 
 }  // namespace rmm::test
diff --git a/cpp/tests/mr/device/aligned_mr_tests.cpp b/cpp/tests/mr/device/aligned_mr_tests.cpp
index e0fb5ef0b..9c6930b89 100644
--- a/cpp/tests/mr/device/aligned_mr_tests.cpp
+++ b/cpp/tests/mr/device/aligned_mr_tests.cpp
@@ -80,8 +80,8 @@ TEST(AlignedTest, DefaultAllocationAlignmentPassthrough)
 
   {
     auto const size{5};
-    EXPECT_EQ(mr.allocate(size, stream), pointer);
-    mr.deallocate(pointer, size, stream);
+    EXPECT_EQ(mr.allocate(stream, size), pointer);
+    mr.deallocate(stream, pointer, size);
   }
 }
 
@@ -102,8 +102,8 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough)
 
   {
     auto const size{3};
-    EXPECT_EQ(mr.allocate(size, stream), pointer);
-    mr.deallocate(pointer, size, stream);
+    EXPECT_EQ(mr.allocate(stream, size), pointer);
+    mr.deallocate(stream, pointer, size);
   }
 
   {
@@ -111,8 +111,8 @@ TEST(AlignedTest, BelowAlignmentThresholdPassthrough)
     void* const pointer1 = int_to_address(456);
     EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer1));
     EXPECT_CALL(mock, do_deallocate(pointer1, size, stream)).Times(1);
-    EXPECT_EQ(mr.allocate(size, stream), pointer1);
-    mr.deallocate(pointer1, size, stream);
+    EXPECT_EQ(mr.allocate(stream, size), pointer1);
+    mr.deallocate(stream, pointer1, size);
   }
 }
 
@@ -134,8 +134,8 @@ TEST(AlignedTest, UpstreamAddressAlreadyAligned)
 
   {
     auto const size{65536};
-    EXPECT_EQ(mr.allocate(size, stream), pointer);
-    mr.deallocate(pointer, size, stream);
+    EXPECT_EQ(mr.allocate(stream, size), pointer);
+    mr.deallocate(stream, pointer, size);
   }
 }
 
@@ -157,8 +157,8 @@ TEST(AlignedTest, AlignUpstreamAddress)
   {
     void* const expected_pointer = int_to_address(4096);
     auto const size{65536};
-    EXPECT_EQ(mr.allocate(size, stream), expected_pointer);
-    mr.deallocate(expected_pointer, size, stream);
+    EXPECT_EQ(mr.allocate(stream, size), expected_pointer);
+    mr.deallocate(stream, expected_pointer, size);
   }
 }
 
@@ -193,12 +193,12 @@ TEST(AlignedTest, AlignMultiple)
     auto const size1{65536};
     auto const size2{73728};
     auto const size3{77800};
-    EXPECT_EQ(mr.allocate(size1, stream), expected_pointer1);
-    EXPECT_EQ(mr.allocate(size2, stream), expected_pointer2);
-    EXPECT_EQ(mr.allocate(size3, stream), expected_pointer3);
-    mr.deallocate(expected_pointer1, size1, stream);
-    mr.deallocate(expected_pointer2, size2, stream);
-    mr.deallocate(expected_pointer3, size3, stream);
+    EXPECT_EQ(mr.allocate(stream, size1), expected_pointer1);
+    EXPECT_EQ(mr.allocate(stream, size2), expected_pointer2);
+    EXPECT_EQ(mr.allocate(stream, size3), expected_pointer3);
+    mr.deallocate(stream, expected_pointer1, size1);
+    mr.deallocate(stream, expected_pointer2, size2);
+    mr.deallocate(stream, expected_pointer3, size3);
   }
 }
 
@@ -207,9 +207,9 @@ TEST(AlignedTest, AlignRealPointer)
   auto const alignment{4096};
   auto const threshold{65536};
   aligned_real mr{rmm::mr::get_current_device_resource_ref(), alignment, threshold};
-  void* alloc = mr.allocate(threshold);
+  void* alloc = mr.allocate_sync(threshold);
   EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment));
-  mr.deallocate(alloc, threshold);
+  mr.deallocate_sync(alloc, threshold);
 }
 
 TEST(AlignedTest, SmallAlignmentsBumpedTo256Bytes)
@@ -218,7 +218,7 @@ TEST(AlignedTest, SmallAlignmentsBumpedTo256Bytes)
   for (auto requested_alignment : {32UL, 64UL, 128UL}) {
     aligned_real mr{rmm::mr::get_current_device_resource_ref(), requested_alignment};
 
-    void* ptr = mr.allocate(requested_alignment);
+    void* ptr = mr.allocate_sync(requested_alignment);
 
     // Even though we requested smaller alignment, pointer should be 256-byte
     // aligned for CUDA requirements
@@ -226,7 +226,7 @@ TEST(AlignedTest, SmallAlignmentsBumpedTo256Bytes)
     // And also aligned to the originally requested alignment
     EXPECT_TRUE(rmm::is_pointer_aligned(ptr, requested_alignment));
 
-    mr.deallocate(ptr, requested_alignment);
+    mr.deallocate_sync(ptr, requested_alignment);
   }
 }
 
diff --git a/cpp/tests/mr/device/arena_mr_tests.cpp b/cpp/tests/mr/device/arena_mr_tests.cpp
index 4a45eb01b..448f19f20 100644
--- a/cpp/tests/mr/device/arena_mr_tests.cpp
+++ b/cpp/tests/mr/device/arena_mr_tests.cpp
@@ -16,6 +16,7 @@
 
 #include "../../byte_literals.hpp"
 
+#include <rmm/aligned.hpp>
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream.hpp>
 #include <rmm/error.hpp>
@@ -40,8 +41,23 @@
 namespace rmm::test {
 namespace {
 
-class mock_memory_resource {
+class mock_memory_resource_interface {
  public:
+  // We must define an interface class so that we can mock methods with default arguments.
+  virtual void* allocate_sync(std::size_t, std::size_t)                     = 0;
+  virtual void deallocate_sync(void*, std::size_t, std::size_t) noexcept    = 0;
+  virtual void* allocate(cuda_stream_view,
+                         std::size_t,
+                         std::size_t = CUDA_ALLOCATION_ALIGNMENT)           = 0;
+  virtual void deallocate(cuda_stream_view,
+                          void*,
+                          std::size_t,
+                          std::size_t = CUDA_ALLOCATION_ALIGNMENT) noexcept = 0;
+};
+
+class mock_memory_resource : public mock_memory_resource_interface {
+ public:
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   MOCK_METHOD(void*, allocate, (std::size_t, std::size_t));
   MOCK_METHOD(void, deallocate, (void*, std::size_t, std::size_t), (noexcept));
   MOCK_METHOD(void*, allocate_async, (std::size_t, std::size_t, cuda::stream_ref));
@@ -49,29 +65,12 @@ class mock_memory_resource {
               deallocate_async,
               (void*, std::size_t, std::size_t, cuda::stream_ref),
               (noexcept));
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
 
-  void* allocate_sync(std::size_t bytes, std::size_t alignment)
-  {
-    return allocate(bytes, alignment);
-  }
-
-  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept
-  {
-    deallocate(ptr, bytes, alignment);
-  }
-
-  void* allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment)
-  {
-    return allocate_async(bytes, alignment, stream);
-  }
-
-  void deallocate(cuda_stream_view stream,
-                  void* ptr,
-                  std::size_t bytes,
-                  std::size_t alignment) noexcept
-  {
-    return deallocate_async(ptr, bytes, alignment, stream);
-  }
+  MOCK_METHOD(void*, allocate_sync, (std::size_t, std::size_t));
+  MOCK_METHOD(void, deallocate_sync, (void*, std::size_t, std::size_t), (noexcept));
+  MOCK_METHOD(void*, allocate, (cuda_stream_view, std::size_t, std::size_t));
+  MOCK_METHOD(void, deallocate, (cuda_stream_view, void*, std::size_t, std::size_t), (noexcept));
 
   bool operator==(mock_memory_resource const&) const noexcept { return true; }
   bool operator!=(mock_memory_resource const&) const { return false; }
@@ -100,8 +99,8 @@ auto const fake_address4 = reinterpret_cast<void*>(superblock::minimum_size * 2)
 struct ArenaTest : public ::testing::Test {
   void SetUp() override
   {
-    EXPECT_CALL(mock_mr, allocate(arena_size, ::testing::_)).WillOnce(Return(fake_address3));
-    EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size, ::testing::_));
+    EXPECT_CALL(mock_mr, allocate_sync(arena_size, ::testing::_)).WillOnce(Return(fake_address3));
+    EXPECT_CALL(mock_mr, deallocate_sync(fake_address3, arena_size, ::testing::_));
 
     global     = std::make_unique<global_arena>(mock_mr, arena_size);
     per_thread = std::make_unique<arena>(*global);
@@ -356,7 +355,7 @@ TEST_F(ArenaTest, GlobalArenaReleaseMergeNext)  // NOLINT
 {
   auto sblk = global->acquire(256);
   global->release(std::move(sblk));
-  auto* ptr = global->allocate(arena_size);
+  auto* ptr = global->allocate_sync(arena_size);
   EXPECT_EQ(ptr, fake_address3);
 }
 
@@ -367,7 +366,7 @@ TEST_F(ArenaTest, GlobalArenaReleaseMergePrevious)  // NOLINT
   global->acquire(512);
   global->release(std::move(sblk));
   global->release(std::move(sb2));
-  auto* ptr = global->allocate(superblock::minimum_size * 2);
+  auto* ptr = global->allocate_sync(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
 }
 
@@ -379,7 +378,7 @@ TEST_F(ArenaTest, GlobalArenaReleaseMergePreviousAndNext)  // NOLINT
   global->release(std::move(sblk));
   global->release(std::move(sb3));
   global->release(std::move(sb2));
-  auto* ptr = global->allocate(arena_size);
+  auto* ptr = global->allocate_sync(arena_size);
   EXPECT_EQ(ptr, fake_address3);
 }
 
@@ -393,38 +392,38 @@ TEST_F(ArenaTest, GlobalArenaReleaseMultiple)  // NOLINT
   auto sb3 = global->acquire(512);
   superblocks.insert(std::move(sb3));
   global->release(superblocks);
-  auto* ptr = global->allocate(arena_size);
+  auto* ptr = global->allocate_sync(arena_size);
   EXPECT_EQ(ptr, fake_address3);
 }
 
 TEST_F(ArenaTest, GlobalArenaAllocate)  // NOLINT
 {
-  auto* ptr = global->allocate(superblock::minimum_size * 2);
+  auto* ptr = global->allocate_sync(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
 }
 
 TEST_F(ArenaTest, GlobalArenaAllocateExtraLarge)  // NOLINT
 {
-  EXPECT_EQ(global->allocate(1_PiB), nullptr);
-  EXPECT_EQ(global->allocate(1_PiB), nullptr);
+  EXPECT_EQ(global->allocate_sync(1_PiB), nullptr);
+  EXPECT_EQ(global->allocate_sync(1_PiB), nullptr);
 }
 
 TEST_F(ArenaTest, GlobalArenaDeallocate)  // NOLINT
 {
-  auto* ptr = global->allocate(superblock::minimum_size * 2);
+  auto* ptr = global->allocate_sync(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
-  global->deallocate_async(ptr, superblock::minimum_size * 2, {});
-  ptr = global->allocate(superblock::minimum_size * 2);
+  global->deallocate_sync(ptr, superblock::minimum_size * 2);
+  ptr = global->allocate_sync(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
 }
 
 TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp)  // NOLINT
 {
-  auto* ptr  = global->allocate(superblock::minimum_size + 256);
-  auto* ptr2 = global->allocate(superblock::minimum_size + 512);
-  global->deallocate_async(ptr, superblock::minimum_size + 256, {});
-  global->deallocate_async(ptr2, superblock::minimum_size + 512, {});
-  EXPECT_EQ(global->allocate(arena_size), fake_address3);
+  auto* ptr  = global->allocate_sync(superblock::minimum_size + 256);
+  auto* ptr2 = global->allocate_sync(superblock::minimum_size + 512);
+  global->deallocate_sync(ptr, superblock::minimum_size + 256);
+  global->deallocate_sync(ptr2, superblock::minimum_size + 512);
+  EXPECT_EQ(global->allocate_sync(arena_size), fake_address3);
 }
 
 TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena)  // NOLINT
@@ -433,9 +432,9 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena)  // NOLINT
   auto const blk  = sblk.first_fit(512);
   auto const blk2 = sblk.first_fit(1024);
   global->release(std::move(sblk));
-  global->deallocate(blk.pointer(), blk.size());
-  global->deallocate(blk2.pointer(), blk2.size());
-  EXPECT_EQ(global->allocate(arena_size), fake_address3);
+  global->deallocate_sync(blk.pointer(), blk.size());
+  global->deallocate_sync(blk2.pointer(), blk2.size());
+  EXPECT_EQ(global->allocate_sync(arena_size), fake_address3);
 }
 
 /**
@@ -444,46 +443,46 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena)  // NOLINT
 
 TEST_F(ArenaTest, ArenaAllocate)  // NOLINT
 {
-  EXPECT_EQ(per_thread->allocate(superblock::minimum_size), fake_address3);
-  EXPECT_EQ(per_thread->allocate(256), fake_address4);
+  EXPECT_EQ(per_thread->allocate_sync(superblock::minimum_size), fake_address3);
+  EXPECT_EQ(per_thread->allocate_sync(256), fake_address4);
 }
 
 TEST_F(ArenaTest, ArenaDeallocate)  // NOLINT
 {
-  auto* ptr = per_thread->allocate(superblock::minimum_size);
-  per_thread->deallocate(ptr, superblock::minimum_size, {});
-  auto* ptr2 = per_thread->allocate(256);
-  per_thread->deallocate(ptr2, 256, {});
-  EXPECT_EQ(per_thread->allocate(superblock::minimum_size), fake_address3);
+  auto* ptr = per_thread->allocate_sync(superblock::minimum_size);
+  per_thread->deallocate_sync(ptr, superblock::minimum_size);
+  auto* ptr2 = per_thread->allocate_sync(256);
+  per_thread->deallocate_sync(ptr2, 256);
+  EXPECT_EQ(per_thread->allocate_sync(superblock::minimum_size), fake_address3);
 }
 
 TEST_F(ArenaTest, ArenaDeallocateMergePrevious)  // NOLINT
 {
-  auto* ptr  = per_thread->allocate(256);
-  auto* ptr2 = per_thread->allocate(256);
-  per_thread->allocate(256);
-  per_thread->deallocate(ptr, 256, {});
-  per_thread->deallocate(ptr2, 256, {});
-  EXPECT_EQ(per_thread->allocate(512), fake_address3);
+  auto* ptr  = per_thread->allocate_sync(256);
+  auto* ptr2 = per_thread->allocate_sync(256);
+  per_thread->allocate_sync(256);
+  per_thread->deallocate_sync(ptr, 256);
+  per_thread->deallocate_sync(ptr2, 256);
+  EXPECT_EQ(per_thread->allocate_sync(512), fake_address3);
 }
 
 TEST_F(ArenaTest, ArenaDeallocateMergeNext)  // NOLINT
 {
-  auto* ptr  = per_thread->allocate(256);
-  auto* ptr2 = per_thread->allocate(256);
-  per_thread->allocate(256);
-  per_thread->deallocate(ptr2, 256, {});
-  per_thread->deallocate(ptr, 256, {});
-  EXPECT_EQ(per_thread->allocate(512), fake_address3);
+  auto* ptr  = per_thread->allocate_sync(256);
+  auto* ptr2 = per_thread->allocate_sync(256);
+  per_thread->allocate_sync(256);
+  per_thread->deallocate_sync(ptr2, 256);
+  per_thread->deallocate_sync(ptr, 256);
+  EXPECT_EQ(per_thread->allocate_sync(512), fake_address3);
 }
 
 TEST_F(ArenaTest, ArenaDeallocateMergePreviousAndNext)  // NOLINT
 {
-  auto* ptr  = per_thread->allocate(256);
-  auto* ptr2 = per_thread->allocate(256);
-  per_thread->deallocate(ptr, 256, {});
-  per_thread->deallocate(ptr2, 256, {});
-  EXPECT_EQ(per_thread->allocate(2_KiB), fake_address3);
+  auto* ptr  = per_thread->allocate_sync(256);
+  auto* ptr2 = per_thread->allocate_sync(256);
+  per_thread->deallocate_sync(ptr, 256);
+  per_thread->deallocate_sync(ptr2, 256);
+  EXPECT_EQ(per_thread->allocate_sync(2_KiB), fake_address3);
 }
 
 TEST_F(ArenaTest, ArenaDefragment)  // NOLINT
@@ -491,14 +490,14 @@ TEST_F(ArenaTest, ArenaDefragment)  // NOLINT
   std::vector<void*> pointers;
   std::size_t num_pointers{4};
   for (std::size_t i = 0; i < num_pointers; i++) {
-    pointers.push_back(per_thread->allocate(superblock::minimum_size));
+    pointers.push_back(per_thread->allocate_sync(superblock::minimum_size));
   }
   for (auto* ptr : pointers) {
-    per_thread->deallocate(ptr, superblock::minimum_size, {});
+    per_thread->deallocate_sync(ptr, superblock::minimum_size);
   }
-  EXPECT_EQ(global->allocate(arena_size), nullptr);
+  EXPECT_EQ(global->allocate_sync(arena_size), nullptr);
   per_thread->defragment();
-  EXPECT_EQ(global->allocate(arena_size), fake_address3);
+  EXPECT_EQ(global->allocate_sync(arena_size), fake_address3);
 }
 
 /**
@@ -531,13 +530,13 @@ TEST_F(ArenaTest, SmallMediumLarge)  // NOLINT
 {
   EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
     arena_mr mr(rmm::mr::get_current_device_resource_ref());
-    auto* small     = mr.allocate(256);
-    auto* medium    = mr.allocate(64_MiB);
+    auto* small     = mr.allocate_sync(256);
+    auto* medium    = mr.allocate_sync(64_MiB);
     auto const free = rmm::available_device_memory().first;
-    auto* large     = mr.allocate(free / 3);
-    mr.deallocate(small, 256);
-    mr.deallocate(medium, 64_MiB);
-    mr.deallocate(large, free / 3);
+    auto* large     = mr.allocate_sync(free / 3);
+    mr.deallocate_sync(small, 256);
+    mr.deallocate_sync(medium, 64_MiB);
+    mr.deallocate_sync(large, free / 3);
   }());
 }
 
@@ -552,16 +551,16 @@ TEST_F(ArenaTest, Defragment)  // NOLINT
     for (std::size_t i = 0; i < num_threads; ++i) {
       threads.emplace_back(std::thread([&] {
         cuda_stream stream{};
-        void* ptr = mr.allocate(32_KiB, stream);
-        mr.deallocate(ptr, 32_KiB, stream);
+        void* ptr = mr.allocate(stream, 32_KiB);
+        mr.deallocate(stream, ptr, 32_KiB);
       }));
     }
     for (auto& thread : threads) {
       thread.join();
     }
 
-    auto* ptr = mr.allocate(arena_size);
-    mr.deallocate(ptr, arena_size);
+    auto* ptr = mr.allocate_sync(arena_size);
+    mr.deallocate_sync(ptr, arena_size);
   }());
 }
 
@@ -574,26 +573,26 @@ TEST_F(ArenaTest, PerThreadToStreamDealloc)  // NOLINT
   auto const arena_size = superblock::minimum_size * 2;
   arena_mr mr(rmm::mr::get_current_device_resource_ref(), arena_size);
   // Create an allocation from a per thread arena
-  void* thread_ptr = mr.allocate(256, rmm::cuda_stream_per_thread);
+  void* thread_ptr = mr.allocate(rmm::cuda_stream_per_thread, 256);
   // Create an allocation in a stream arena to force global arena
   // to be empty
   cuda_stream stream{};
-  void* ptr = mr.allocate(32_KiB, stream);
-  mr.deallocate(ptr, 32_KiB, stream);
+  void* ptr = mr.allocate(stream, 32_KiB);
+  mr.deallocate(stream, ptr, 32_KiB);
   // at this point the global arena doesn't have any superblocks so
   // the next allocation causes defrag. Defrag causes all superblocks
   // from the thread and stream arena allocated above to go back to
   // global arena and it allocates one superblock to the stream arena.
-  auto* ptr1 = mr.allocate(superblock::minimum_size, rmm::cuda_stream_view{});
+  auto* ptr1 = mr.allocate(rmm::cuda_stream_view{}, superblock::minimum_size);
   // Allocate again to make sure all superblocks from
   // global arena are owned by a stream arena instead of a thread arena
   // or the global arena.
-  auto* ptr2 = mr.allocate(32_KiB, rmm::cuda_stream_view{});
+  auto* ptr2 = mr.allocate(rmm::cuda_stream_view{}, 32_KiB);
   // The original thread ptr is now owned by a stream arena so make
   // sure deallocation works.
-  mr.deallocate(thread_ptr, 256, rmm::cuda_stream_per_thread);
-  mr.deallocate(ptr1, superblock::minimum_size, rmm::cuda_stream_view{});
-  mr.deallocate(ptr2, 32_KiB, rmm::cuda_stream_view{});
+  mr.deallocate(rmm::cuda_stream_per_thread, thread_ptr, 256);
+  mr.deallocate(rmm::cuda_stream_view{}, ptr1, superblock::minimum_size);
+  mr.deallocate(rmm::cuda_stream_view{}, ptr2, 32_KiB);
 }
 
 TEST_F(ArenaTest, DumpLogOnFailure)  // NOLINT
@@ -606,8 +605,8 @@ TEST_F(ArenaTest, DumpLogOnFailure)  // NOLINT
     threads.reserve(num_threads);
     for (std::size_t i = 0; i < num_threads; ++i) {
       threads.emplace_back([&] {
-        void* ptr = mr.allocate(32_KiB);
-        mr.deallocate(ptr, 32_KiB);
+        void* ptr = mr.allocate_sync(32_KiB);
+        mr.deallocate_sync(ptr, 32_KiB);
       });
     }
 
@@ -617,7 +616,7 @@ TEST_F(ArenaTest, DumpLogOnFailure)  // NOLINT
   }
 
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(mr.allocate(8_MiB), rmm::out_of_memory);
+  EXPECT_THROW(mr.allocate_sync(8_MiB), rmm::out_of_memory);
 
   struct stat file_status{};
   EXPECT_EQ(stat("rmm_arena_memory_dump.log", &file_status), 0);
diff --git a/cpp/tests/mr/device/binning_mr_tests.cpp b/cpp/tests/mr/device/binning_mr_tests.cpp
index 409b4a3d9..cc50de469 100644
--- a/cpp/tests/mr/device/binning_mr_tests.cpp
+++ b/cpp/tests/mr/device/binning_mr_tests.cpp
@@ -39,9 +39,9 @@ TEST(BinningTest, ExplicitBinMR)
   cuda_mr cuda{};
   binning_mr mr{&cuda};
   mr.add_bin(1024, &cuda);
-  auto* ptr = mr.allocate(512);
+  auto* ptr = mr.allocate_sync(512);
   EXPECT_NE(ptr, nullptr);
-  mr.deallocate(ptr, 512);
+  mr.deallocate_sync(ptr, 512);
 }
 
 }  // namespace rmm::test
diff --git a/cpp/tests/mr/device/callback_mr_tests.cpp b/cpp/tests/mr/device/callback_mr_tests.cpp
index eae3d6789..bf1cddbde 100644
--- a/cpp/tests/mr/device/callback_mr_tests.cpp
+++ b/cpp/tests/mr/device/callback_mr_tests.cpp
@@ -43,17 +43,17 @@ TEST(CallbackTest, TestCallbacksAreInvoked)
 
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
     auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
-    return base_mr.allocate_async(size, stream);
+    return base_mr.allocate(stream, size);
   };
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
     auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
-    base_mr.deallocate_async(ptr, size, stream);
+    base_mr.deallocate(stream, ptr, size);
   };
   auto mr =
     rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_ref, &base_ref);
   auto const size = std::size_t{10_MiB};
-  auto* ptr       = mr.allocate(size);
-  mr.deallocate(ptr, size);
+  auto* ptr       = mr.allocate_sync(size);
+  mr.deallocate_sync(ptr, size);
 }
 
 TEST(CallbackTest, LoggingTest)
@@ -64,19 +64,19 @@ TEST(CallbackTest, LoggingTest)
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Allocating " << size << " bytes" << std::endl;
     auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
-    return base_mr.allocate_async(size, stream);
+    return base_mr.allocate(stream, size);
   };
 
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Deallocating " << size << " bytes" << std::endl;
     auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
-    base_mr.deallocate_async(ptr, size, stream);
+    base_mr.deallocate(stream, ptr, size);
   };
   auto mr =
     rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr);
   auto const size = std::size_t{10_MiB};
-  auto* ptr       = mr.allocate(size);
-  mr.deallocate(ptr, size);
+  auto* ptr       = mr.allocate_sync(size);
+  mr.deallocate_sync(ptr, size);
 
   auto output = testing::internal::GetCapturedStdout();
   auto expect = std::string("Allocating ") + std::to_string(size) + " bytes\nDeallocating " +
diff --git a/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp b/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp
index 1b5e43999..193f45e4e 100644
--- a/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp
+++ b/cpp/tests/mr/device/cuda_async_managed_mr_tests.cpp
@@ -43,9 +43,9 @@ TEST_F(AsyncManagedMRTest, BasicAllocateDeallocate)
 {
   const auto alloc_size{100};
   cuda_async_managed_mr mr{};
-  void* ptr = mr.allocate(alloc_size);
+  void* ptr = mr.allocate_sync(alloc_size);
   ASSERT_NE(nullptr, ptr);
-  mr.deallocate(ptr, alloc_size);
+  mr.deallocate_sync(ptr, alloc_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
@@ -62,7 +62,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsManaged)
 {
   const auto alloc_size{1024};
   cuda_async_managed_mr mr{};
-  void* ptr = mr.allocate(alloc_size);
+  void* ptr = mr.allocate_sync(alloc_size);
   ASSERT_NE(nullptr, ptr);
 
   // Verify the pointer is managed memory using cudaPointerGetAttributes
@@ -70,7 +70,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsManaged)
   RMM_CUDA_TRY(cudaPointerGetAttributes(&attrs, ptr));
   EXPECT_EQ(attrs.type, cudaMemoryTypeManaged);
 
-  mr.deallocate(ptr, alloc_size);
+  mr.deallocate_sync(ptr, alloc_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
@@ -78,7 +78,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsAccessibleFromHost)
 {
   const auto alloc_size{sizeof(int) * 100};
   cuda_async_managed_mr mr{};
-  auto* ptr = static_cast<int*>(mr.allocate(alloc_size));
+  auto* ptr = static_cast<int*>(mr.allocate_sync(alloc_size));
   ASSERT_NE(nullptr, ptr);
 
   // Synchronize to ensure allocation is complete
@@ -97,7 +97,7 @@ TEST_F(AsyncManagedMRTest, AllocatedPointerIsAccessibleFromHost)
   EXPECT_EQ(ptr[50], 50);
   EXPECT_EQ(ptr[99], 99);
 
-  mr.deallocate(ptr, alloc_size);
+  mr.deallocate_sync(ptr, alloc_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
@@ -106,9 +106,9 @@ TEST_F(AsyncManagedMRTest, MultipleAllocationsAreManaged)
   const auto alloc_size{512};
   cuda_async_managed_mr mr{};
 
-  void* ptr1 = mr.allocate(alloc_size);
-  void* ptr2 = mr.allocate(alloc_size * 2);
-  void* ptr3 = mr.allocate(alloc_size / 2);
+  void* ptr1 = mr.allocate_sync(alloc_size);
+  void* ptr2 = mr.allocate_sync(alloc_size * 2);
+  void* ptr3 = mr.allocate_sync(alloc_size / 2);
 
   ASSERT_NE(nullptr, ptr1);
   ASSERT_NE(nullptr, ptr2);
@@ -127,9 +127,9 @@ TEST_F(AsyncManagedMRTest, MultipleAllocationsAreManaged)
   EXPECT_EQ(attrs2.type, cudaMemoryTypeManaged);
   EXPECT_EQ(attrs3.type, cudaMemoryTypeManaged);
 
-  mr.deallocate(ptr1, alloc_size);
-  mr.deallocate(ptr2, alloc_size * 2);
-  mr.deallocate(ptr3, alloc_size / 2);
+  mr.deallocate_sync(ptr1, alloc_size);
+  mr.deallocate_sync(ptr2, alloc_size * 2);
+  mr.deallocate_sync(ptr3, alloc_size / 2);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
diff --git a/cpp/tests/mr/device/cuda_async_mr_tests.cpp b/cpp/tests/mr/device/cuda_async_mr_tests.cpp
index 0c3db3f69..41e4adada 100644
--- a/cpp/tests/mr/device/cuda_async_mr_tests.cpp
+++ b/cpp/tests/mr/device/cuda_async_mr_tests.cpp
@@ -46,8 +46,8 @@ TEST_F(AsyncMRTest, ExplicitInitialPoolSize)
 {
   const auto pool_init_size{100};
   cuda_async_mr mr{pool_init_size};
-  void* ptr = mr.allocate(pool_init_size);
-  mr.deallocate(ptr, pool_init_size);
+  void* ptr = mr.allocate_sync(pool_init_size);
+  mr.deallocate_sync(ptr, pool_init_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
@@ -56,8 +56,8 @@ TEST_F(AsyncMRTest, ExplicitReleaseThreshold)
   const auto pool_init_size{100};
   const auto pool_release_threshold{1000};
   cuda_async_mr mr{pool_init_size, pool_release_threshold};
-  void* ptr = mr.allocate(pool_init_size);
-  mr.deallocate(ptr, pool_init_size);
+  void* ptr = mr.allocate_sync(pool_init_size);
+  mr.deallocate_sync(ptr, pool_init_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
@@ -90,8 +90,8 @@ TEST_F(AsyncMRFabricTest, FabricHandlesSupport)
   cuda_async_mr mr{pool_init_size,
                    pool_release_threshold,
                    rmm::mr::cuda_async_memory_resource::allocation_handle_type::fabric};
-  void* ptr = mr.allocate(pool_init_size);
-  mr.deallocate(ptr, pool_init_size);
+  void* ptr = mr.allocate_sync(pool_init_size);
+  mr.deallocate_sync(ptr, pool_init_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
diff --git a/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp b/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp
index 48181edfe..f8885d59f 100644
--- a/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp
+++ b/cpp/tests/mr/device/cuda_async_view_mr_tests.cpp
@@ -40,7 +40,7 @@ TEST(PoolTest, UsePool)
   const auto pool_init_size{100};
   cuda_async_view_mr mr{memPool};
   void* ptr = mr.allocate(pool_init_size);
-  mr.deallocate(ptr, pool_init_size);
+  mr.deallocate_sync(ptr, pool_init_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
@@ -59,7 +59,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool)
     const auto pool_init_size{100};
     cuda_async_view_mr mr{memPool};
     void* ptr = mr.allocate(pool_init_size);
-    mr.deallocate(ptr, pool_init_size);
+    mr.deallocate_sync(ptr, pool_init_size);
     RMM_CUDA_TRY(cudaDeviceSynchronize());
   }
 
diff --git a/cpp/tests/mr/device/failure_callback_mr_tests.cpp b/cpp/tests/mr/device/failure_callback_mr_tests.cpp
index 49a5f7522..c32cf6791 100644
--- a/cpp/tests/mr/device/failure_callback_mr_tests.cpp
+++ b/cpp/tests/mr/device/failure_callback_mr_tests.cpp
@@ -49,7 +49,7 @@ TEST(FailureCallbackTest, RetryAllocationOnce)
   failure_callback_adaptor<> mr{
     rmm::mr::get_current_device_resource_ref(), failure_handler, &retried};
   EXPECT_EQ(retried, false);
-  EXPECT_THROW(mr.allocate(512_GiB), std::bad_alloc);
+  EXPECT_THROW(mr.allocate_sync(512_GiB), std::bad_alloc);
   EXPECT_EQ(retried, true);
 }
 
@@ -68,8 +68,8 @@ TEST(FailureCallbackTest, DifferentExceptionTypes)
   always_throw_memory_resource<rmm::bad_alloc> bad_alloc_mr;
   always_throw_memory_resource<rmm::out_of_memory> oom_mr;
 
-  EXPECT_THROW(bad_alloc_mr.allocate(1_MiB), rmm::bad_alloc);
-  EXPECT_THROW(oom_mr.allocate(1_MiB), rmm::out_of_memory);
+  EXPECT_THROW(bad_alloc_mr.allocate_sync(1_MiB), rmm::bad_alloc);
+  EXPECT_THROW(oom_mr.allocate_sync(1_MiB), rmm::out_of_memory);
 
   // Wrap a bad_alloc-catching callback adaptor around an MR that always throws bad_alloc:
   // Should retry once and then re-throw bad_alloc
@@ -79,7 +79,7 @@ TEST(FailureCallbackTest, DifferentExceptionTypes)
       &bad_alloc_mr, failure_handler, &retried};
 
     EXPECT_EQ(retried, false);
-    EXPECT_THROW(bad_alloc_callback_mr.allocate(1_MiB), rmm::bad_alloc);
+    EXPECT_THROW(bad_alloc_callback_mr.allocate_sync(1_MiB), rmm::bad_alloc);
     EXPECT_EQ(retried, true);
   }
 
@@ -91,7 +91,7 @@ TEST(FailureCallbackTest, DifferentExceptionTypes)
     failure_callback_adaptor<rmm::out_of_memory> oom_callback_mr{
       &oom_mr, failure_handler, &retried};
     EXPECT_EQ(retried, false);
-    EXPECT_THROW(oom_callback_mr.allocate(1_MiB), rmm::out_of_memory);
+    EXPECT_THROW(oom_callback_mr.allocate_sync(1_MiB), rmm::out_of_memory);
     EXPECT_EQ(retried, true);
   }
 
@@ -103,7 +103,7 @@ TEST(FailureCallbackTest, DifferentExceptionTypes)
     failure_callback_adaptor<rmm::out_of_memory> oom_callback_mr{
       &bad_alloc_mr, failure_handler, &retried};
     EXPECT_EQ(retried, false);
-    EXPECT_THROW(oom_callback_mr.allocate(1_MiB), rmm::bad_alloc);  // bad_alloc passes through
+    EXPECT_THROW(oom_callback_mr.allocate_sync(1_MiB), rmm::bad_alloc);  // bad_alloc passes through
     EXPECT_EQ(retried, false);  // Does not catch / retry on anything except OOM
   }
 }
diff --git a/cpp/tests/mr/device/hwdecompress_tests.cpp b/cpp/tests/mr/device/hwdecompress_tests.cpp
index 9443dc345..77b9ca893 100644
--- a/cpp/tests/mr/device/hwdecompress_tests.cpp
+++ b/cpp/tests/mr/device/hwdecompress_tests.cpp
@@ -56,9 +56,9 @@ TEST_F(HWDecompressTest, CudaMalloc)
 {
   const auto allocation_size{100};
   rmm::mr::cuda_memory_resource mr{};
-  void* ptr = mr.allocate(allocation_size);
+  void* ptr = mr.allocate_sync(allocation_size);
   HWDecompressTest::check_decompress_capable(ptr);
-  mr.deallocate(ptr, allocation_size);
+  mr.deallocate_sync(ptr, allocation_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
@@ -70,9 +70,9 @@ TEST_F(HWDecompressTest, CudaMallocAsync)
   }
   const auto pool_init_size{100};
   rmm::mr::cuda_async_memory_resource mr{pool_init_size};
-  void* ptr = mr.allocate(pool_init_size);
+  void* ptr = mr.allocate_sync(pool_init_size);
   HWDecompressTest::check_decompress_capable(ptr);
-  mr.deallocate(ptr, pool_init_size);
+  mr.deallocate_sync(ptr, pool_init_size);
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
diff --git a/cpp/tests/mr/device/limiting_mr_tests.cpp b/cpp/tests/mr/device/limiting_mr_tests.cpp
index d90da009b..ee72ba466 100644
--- a/cpp/tests/mr/device/limiting_mr_tests.cpp
+++ b/cpp/tests/mr/device/limiting_mr_tests.cpp
@@ -37,7 +37,7 @@ TEST(LimitingTest, TooBig)
 {
   auto const max_size{5_MiB};
   limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
-  EXPECT_THROW(mr.allocate(max_size + 1), rmm::out_of_memory);
+  EXPECT_THROW(mr.allocate_sync(max_size + 1), rmm::out_of_memory);
 }
 
 TEST(LimitingTest, UpstreamFailure)
@@ -46,7 +46,7 @@ TEST(LimitingTest, UpstreamFailure)
   auto const max_size_2{5_MiB};
   limiting_adaptor mr1{rmm::mr::get_current_device_resource_ref(), max_size_1};
   limiting_adaptor mr2{&mr1, max_size_2};
-  EXPECT_THROW(mr2.allocate(4_MiB), rmm::out_of_memory);
+  EXPECT_THROW(mr2.allocate_sync(4_MiB), rmm::out_of_memory);
 }
 
 TEST(LimitingTest, UnderLimitDueToFrees)
@@ -54,27 +54,27 @@ TEST(LimitingTest, UnderLimitDueToFrees)
   auto const max_size{10_MiB};
   limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   auto const size1{4_MiB};
-  auto* ptr1           = mr.allocate(size1);
+  auto* ptr1           = mr.allocate_sync(size1);
   auto allocated_bytes = size1;
   EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
-  auto* ptr2 = mr.allocate(size1);
+  auto* ptr2 = mr.allocate_sync(size1);
   allocated_bytes += size1;
   EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
-  mr.deallocate(ptr1, size1);
+  mr.deallocate_sync(ptr1, size1);
   allocated_bytes -= size1;
   EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
   // note that we don't keep track of fragmentation or things like page size
   // so this should fill 100% of the memory even though it is probably over.
   auto const size2{6_MiB};
-  auto* ptr3 = mr.allocate(size2);
+  auto* ptr3 = mr.allocate_sync(size2);
   allocated_bytes += size2;
   EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 0);
-  mr.deallocate(ptr2, size1);
-  mr.deallocate(ptr3, size2);
+  mr.deallocate_sync(ptr2, size1);
+  mr.deallocate_sync(ptr3, size2);
 }
 
 TEST(LimitingTest, OverLimit)
@@ -82,20 +82,20 @@ TEST(LimitingTest, OverLimit)
   auto const max_size{10_MiB};
   limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   auto const size1{4_MiB};
-  auto* ptr1           = mr.allocate(size1);
+  auto* ptr1           = mr.allocate_sync(size1);
   auto allocated_bytes = size1;
   EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
-  auto* ptr2 = mr.allocate(size1);
+  auto* ptr2 = mr.allocate_sync(size1);
   allocated_bytes += size1;
   EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
   auto const size2{3_MiB};
-  EXPECT_THROW(mr.allocate(size2), rmm::out_of_memory);
+  EXPECT_THROW(mr.allocate_sync(size2), rmm::out_of_memory);
   EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes);
   EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes);
-  mr.deallocate(ptr1, 4_MiB);
-  mr.deallocate(ptr2, 4_MiB);
+  mr.deallocate_sync(ptr1, 4_MiB);
+  mr.deallocate_sync(ptr2, 4_MiB);
 }
 
 }  // namespace
diff --git a/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp b/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp
index 47961f4fb..fe515417d 100644
--- a/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp
+++ b/cpp/tests/mr/device/mr_ref_multithreaded_tests.cpp
@@ -214,7 +214,7 @@ TEST_P(mr_ref_test_mt, MixedRandomAllocationFreeStream)
   spawn(test_mixed_random_async_allocation_free, this->ref, default_max_size, this->stream.view());
 }
 
-void allocate_async_loop(rmm::device_async_resource_ref ref,
+void async_allocate_loop(rmm::device_async_resource_ref ref,
                          std::size_t num_allocations,
                          std::list<allocation>& allocations,
                          std::mutex& mtx,
@@ -229,7 +229,7 @@ void allocate_async_loop(rmm::device_async_resource_ref ref,
 
   for (std::size_t i = 0; i < num_allocations; ++i) {
     std::size_t size = size_distribution(generator);
-    void* ptr        = ref.allocate_async(size, stream);
+    void* ptr        = ref.allocate(stream, size);
     {
       std::lock_guard<std::mutex> lock(mtx);
       RMM_CUDA_TRY(cudaEventRecord(event, stream.value()));
@@ -242,7 +242,7 @@ void allocate_async_loop(rmm::device_async_resource_ref ref,
   cudaEventSynchronize(event);
 }
 
-void deallocate_async_loop(rmm::device_async_resource_ref ref,
+void async_deallocate_loop(rmm::device_async_resource_ref ref,
                            std::size_t num_allocations,
                            std::list<allocation>& allocations,
                            std::mutex& mtx,
@@ -256,14 +256,14 @@ void deallocate_async_loop(rmm::device_async_resource_ref ref,
     RMM_CUDA_TRY(cudaStreamWaitEvent(stream.value(), event));
     allocation alloc = allocations.front();
     allocations.pop_front();
-    ref.deallocate_async(alloc.ptr, alloc.size, stream);
+    ref.deallocate(stream, alloc.ptr, alloc.size);
   }
 
   // Work around for threads going away before cudaEvent has finished async processing
   cudaEventSynchronize(event);
 }
 
-void test_allocate_async_free_different_threads(rmm::device_async_resource_ref ref,
+void test_async_allocate_free_different_threads(rmm::device_async_resource_ref ref,
                                                 rmm::cuda_stream_view streamA,
                                                 rmm::cuda_stream_view streamB)
 {
@@ -276,7 +276,7 @@ void test_allocate_async_free_different_threads(rmm::device_async_resource_ref r
 
   RMM_CUDA_TRY(cudaEventCreate(&event));
 
-  std::thread producer(allocate_async_loop,
+  std::thread producer(async_allocate_loop,
                        ref,
                        num_allocations,
                        std::ref(allocations),
@@ -285,7 +285,7 @@ void test_allocate_async_free_different_threads(rmm::device_async_resource_ref r
                        std::ref(event),
                        streamA);
 
-  std::thread consumer(deallocate_async_loop,
+  std::thread consumer(async_deallocate_loop,
                        ref,
                        num_allocations,
                        std::ref(allocations),
@@ -302,25 +302,25 @@ void test_allocate_async_free_different_threads(rmm::device_async_resource_ref r
 
 TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsDefaultStream)
 {
-  test_allocate_async_free_different_threads(
+  test_async_allocate_free_different_threads(
     this->ref, rmm::cuda_stream_default, rmm::cuda_stream_default);
 }
 
 TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsPerThreadDefaultStream)
 {
-  test_allocate_async_free_different_threads(
+  test_async_allocate_free_different_threads(
     this->ref, rmm::cuda_stream_per_thread, rmm::cuda_stream_per_thread);
 }
 
 TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsSameStream)
 {
-  test_allocate_async_free_different_threads(this->ref, this->stream, this->stream);
+  test_async_allocate_free_different_threads(this->ref, this->stream, this->stream);
 }
 
 TEST_P(mr_ref_test_mt, AllocFreeDifferentThreadsDifferentStream)
 {
   rmm::cuda_stream streamB;
-  test_allocate_async_free_different_threads(this->ref, this->stream, streamB);
+  test_async_allocate_free_different_threads(this->ref, this->stream, streamB);
   streamB.synchronize();
 }
 
diff --git a/cpp/tests/mr/device/mr_ref_test.hpp b/cpp/tests/mr/device/mr_ref_test.hpp
index aaffb8ad0..6d2f09f02 100644
--- a/cpp/tests/mr/device/mr_ref_test.hpp
+++ b/cpp/tests/mr/device/mr_ref_test.hpp
@@ -105,46 +105,46 @@ struct allocation {
 inline void test_get_current_device_resource()
 {
   EXPECT_NE(nullptr, rmm::mr::get_current_device_resource());
-  void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB);
+  void* ptr = rmm::mr::get_current_device_resource()->allocate_sync(1_MiB);
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_properly_aligned(ptr));
   EXPECT_TRUE(is_device_accessible_memory(ptr));
-  rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB);
+  rmm::mr::get_current_device_resource()->deallocate_sync(ptr, 1_MiB);
 }
 
 inline void test_get_current_device_resource_ref()
 {
-  void* ptr = rmm::mr::get_current_device_resource_ref().allocate(1_MiB);
+  void* ptr = rmm::mr::get_current_device_resource_ref().allocate_sync(1_MiB);
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_properly_aligned(ptr));
   EXPECT_TRUE(is_device_accessible_memory(ptr));
-  rmm::mr::get_current_device_resource_ref().deallocate(ptr, 1_MiB);
+  rmm::mr::get_current_device_resource_ref().deallocate_sync(ptr, 1_MiB);
 }
 
 inline void test_allocate(resource_ref ref, std::size_t bytes)
 {
   try {
-    void* ptr = ref.allocate(bytes);
+    void* ptr = ref.allocate_sync(bytes);
     EXPECT_NE(nullptr, ptr);
     EXPECT_TRUE(is_properly_aligned(ptr));
     EXPECT_TRUE(is_device_accessible_memory(ptr));
-    ref.deallocate(ptr, bytes);
+    ref.deallocate_sync(ptr, bytes);
   } catch (rmm::out_of_memory const& e) {
     EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos);
   }
 }
 
-inline void test_allocate_async(rmm::device_async_resource_ref ref,
+inline void test_async_allocate(rmm::device_async_resource_ref ref,
                                 std::size_t bytes,
                                 cuda_stream_view stream = {})
 {
   try {
-    void* ptr = ref.allocate_async(bytes, stream);
+    void* ptr = ref.allocate(stream, bytes);
     if (not stream.is_default()) { stream.synchronize(); }
     EXPECT_NE(nullptr, ptr);
     EXPECT_TRUE(is_properly_aligned(ptr));
     EXPECT_TRUE(is_device_accessible_memory(ptr));
-    ref.deallocate_async(ptr, bytes, stream);
+    ref.deallocate(stream, ptr, bytes);
     if (not stream.is_default()) { stream.synchronize(); }
   } catch (rmm::out_of_memory const& e) {
     EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos);
@@ -155,34 +155,34 @@ inline void test_allocate_async(rmm::device_async_resource_ref ref,
 inline void concurrent_allocations_are_different(resource_ref ref)
 {
   const auto size{8_B};
-  void* ptr1 = ref.allocate(size);
-  void* ptr2 = ref.allocate(size);
+  void* ptr1 = ref.allocate_sync(size);
+  void* ptr2 = ref.allocate_sync(size);
 
   EXPECT_NE(ptr1, ptr2);
 
-  ref.deallocate(ptr1, size);
-  ref.deallocate(ptr2, size);
+  ref.deallocate_sync(ptr1, size);
+  ref.deallocate_sync(ptr2, size);
 }
 
 inline void concurrent_async_allocations_are_different(rmm::device_async_resource_ref ref,
                                                        cuda_stream_view stream)
 {
   const auto size{8_B};
-  void* ptr1 = ref.allocate_async(size, stream);
-  void* ptr2 = ref.allocate_async(size, stream);
+  void* ptr1 = ref.allocate(stream, size);
+  void* ptr2 = ref.allocate(stream, size);
 
   EXPECT_NE(ptr1, ptr2);
 
-  ref.deallocate_async(ptr1, size, stream);
-  ref.deallocate_async(ptr2, size, stream);
+  ref.deallocate(stream, ptr1, size);
+  ref.deallocate(stream, ptr2, size);
 }
 
 inline void test_various_allocations(resource_ref ref)
 {
   // test allocating zero bytes on non-default stream
   {
-    void* ptr = ref.allocate(0);
-    EXPECT_NO_THROW(ref.deallocate(ptr, 0));
+    void* ptr = ref.allocate_sync(0);
+    EXPECT_NO_THROW(ref.deallocate_sync(ptr, 0));
   }
 
   test_allocate(ref, 4_B);
@@ -193,12 +193,12 @@ inline void test_various_allocations(resource_ref ref)
   // should fail to allocate too much
   {
     void* ptr{nullptr};
-    EXPECT_THROW(ptr = ref.allocate(1_PiB), rmm::out_of_memory);
+    EXPECT_THROW(ptr = ref.allocate_sync(1_PiB), rmm::out_of_memory);
     EXPECT_EQ(nullptr, ptr);
 
     // test e.what();
     try {
-      ptr = ref.allocate(1_PiB);
+      ptr = ref.allocate_sync(1_PiB);
     } catch (rmm::out_of_memory const& e) {
       EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos);
     }
@@ -210,26 +210,26 @@ inline void test_various_async_allocations(rmm::device_async_resource_ref ref,
 {
   // test allocating zero bytes on non-default stream
   {
-    void* ptr = ref.allocate_async(0, stream);
+    void* ptr = ref.allocate(stream, 0);
     stream.synchronize();
-    EXPECT_NO_THROW(ref.deallocate_async(ptr, 0, stream));
+    EXPECT_NO_THROW(ref.deallocate(stream, ptr, 0));
     stream.synchronize();
   }
 
-  test_allocate_async(ref, 4_B, stream);
-  test_allocate_async(ref, 1_KiB, stream);
-  test_allocate_async(ref, 1_MiB, stream);
-  test_allocate_async(ref, 1_GiB, stream);
+  test_async_allocate(ref, 4_B, stream);
+  test_async_allocate(ref, 1_KiB, stream);
+  test_async_allocate(ref, 1_MiB, stream);
+  test_async_allocate(ref, 1_GiB, stream);
 
   // should fail to allocate too much
   {
     void* ptr{nullptr};
-    EXPECT_THROW(ptr = ref.allocate_async(1_PiB, stream), rmm::out_of_memory);
+    EXPECT_THROW(ptr = ref.allocate(stream, 1_PiB), rmm::out_of_memory);
     EXPECT_EQ(nullptr, ptr);
 
     // test e.what();
     try {
-      ptr = ref.allocate_async(1_PiB, stream);
+      ptr = ref.allocate(stream, 1_PiB);
     } catch (rmm::out_of_memory const& e) {
       EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos);
     }
@@ -249,13 +249,13 @@ inline void test_random_allocations(resource_ref ref,
   std::for_each(
     allocations.begin(), allocations.end(), [&generator, &distribution, &ref](allocation& alloc) {
       alloc.size = distribution(generator);
-      EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size));
+      EXPECT_NO_THROW(alloc.ptr = ref.allocate_sync(alloc.size));
       EXPECT_NE(nullptr, alloc.ptr);
       EXPECT_TRUE(is_properly_aligned(alloc.ptr));
     });
 
   std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) {
-    EXPECT_NO_THROW(ref.deallocate(alloc.ptr, alloc.size));
+    EXPECT_NO_THROW(ref.deallocate_sync(alloc.ptr, alloc.size));
   });
 }
 
@@ -274,14 +274,14 @@ inline void test_random_async_allocations(rmm::device_async_resource_ref ref,
                 allocations.end(),
                 [&generator, &distribution, &ref, stream](allocation& alloc) {
                   alloc.size = distribution(generator);
-                  EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size));
+                  EXPECT_NO_THROW(alloc.ptr = ref.allocate_sync(alloc.size));
                   if (not stream.is_default()) { stream.synchronize(); }
                   EXPECT_NE(nullptr, alloc.ptr);
                   EXPECT_TRUE(is_properly_aligned(alloc.ptr));
                 });
 
   std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) {
-    EXPECT_NO_THROW(ref.deallocate(alloc.ptr, alloc.size));
+    EXPECT_NO_THROW(ref.deallocate_sync(alloc.ptr, alloc.size));
     if (not stream.is_default()) { stream.synchronize(); }
   });
 }
@@ -315,7 +315,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref,
       std::size_t size = size_distribution(generator);
       active_allocations++;
       allocation_count++;
-      EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size));
+      EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_sync(size), size));
       auto new_allocation = allocations.back();
       EXPECT_NE(nullptr, new_allocation.ptr);
       EXPECT_TRUE(is_properly_aligned(new_allocation.ptr));
@@ -324,7 +324,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref,
       active_allocations--;
       allocation to_free = allocations[index];
       allocations.erase(std::next(allocations.begin(), index));
-      EXPECT_NO_THROW(ref.deallocate(to_free.ptr, to_free.size));
+      EXPECT_NO_THROW(ref.deallocate_sync(to_free.ptr, to_free.size));
     }
   }
 
@@ -362,7 +362,7 @@ inline void test_mixed_random_async_allocation_free(rmm::device_async_resource_r
       std::size_t size = size_distribution(generator);
       active_allocations++;
       allocation_count++;
-      EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size));
+      EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(stream, size), size));
       auto new_allocation = allocations.back();
       EXPECT_NE(nullptr, new_allocation.ptr);
       EXPECT_TRUE(is_properly_aligned(new_allocation.ptr));
@@ -371,7 +371,7 @@ inline void test_mixed_random_async_allocation_free(rmm::device_async_resource_r
       active_allocations--;
       allocation to_free = allocations[index];
       allocations.erase(std::next(allocations.begin(), index));
-      EXPECT_NO_THROW(ref.deallocate_async(to_free.ptr, to_free.size, stream));
+      EXPECT_NO_THROW(ref.deallocate(stream, to_free.ptr, to_free.size));
     }
   }
 
diff --git a/cpp/tests/mr/device/pool_mr_tests.cpp b/cpp/tests/mr/device/pool_mr_tests.cpp
index 07168aaf7..96e6fc869 100644
--- a/cpp/tests/mr/device/pool_mr_tests.cpp
+++ b/cpp/tests/mr/device/pool_mr_tests.cpp
@@ -72,10 +72,10 @@ TEST(PoolTest, TwoLargeBuffers)
   auto two_large = []() {
     [[maybe_unused]] auto const [free, total] = rmm::available_device_memory();
     pool_mr mr{rmm::mr::get_current_device_resource_ref(), rmm::percent_of_free_device_memory(50)};
-    auto* ptr1 = mr.allocate(free / 4);
-    auto* ptr2 = mr.allocate(free / 4);
-    mr.deallocate(ptr1, free / 4);
-    mr.deallocate(ptr2, free / 4);
+    auto* ptr1 = mr.allocate_sync(free / 4);
+    auto* ptr2 = mr.allocate_sync(free / 4);
+    mr.deallocate_sync(ptr1, free / 4);
+    mr.deallocate_sync(ptr2, free / 4);
   };
   EXPECT_NO_THROW(two_large());
 }
@@ -87,20 +87,20 @@ TEST(PoolTest, ForceGrowth)
     auto const max_size{6000};
     limiting_mr limiter{&cuda, max_size};
     pool_mr mr{&limiter, 0};
-    EXPECT_NO_THROW(mr.allocate(1000));
-    EXPECT_NO_THROW(mr.allocate(4000));
-    EXPECT_NO_THROW(mr.allocate(500));
-    EXPECT_THROW(mr.allocate(2000), rmm::out_of_memory);  // too much
+    EXPECT_NO_THROW(mr.allocate_sync(1000));
+    EXPECT_NO_THROW(mr.allocate_sync(4000));
+    EXPECT_NO_THROW(mr.allocate_sync(500));
+    EXPECT_THROW(mr.allocate_sync(2000), rmm::out_of_memory);  // too much
   }
   {
     // with max pool size
     auto const max_size{6000};
     limiting_mr limiter{&cuda, max_size};
     pool_mr mr{&limiter, 0, 8192};
-    EXPECT_NO_THROW(mr.allocate(1000));
-    EXPECT_THROW(mr.allocate(4000), rmm::out_of_memory);  // too much
-    EXPECT_NO_THROW(mr.allocate(500));
-    EXPECT_NO_THROW(mr.allocate(2000));  // fits
+    EXPECT_NO_THROW(mr.allocate_sync(1000));
+    EXPECT_THROW(mr.allocate_sync(4000), rmm::out_of_memory);  // too much
+    EXPECT_NO_THROW(mr.allocate_sync(500));
+    EXPECT_NO_THROW(mr.allocate_sync(2000));  // fits
   }
 }
 
@@ -112,7 +112,7 @@ TEST(PoolTest, DeletedStream)
   EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
   EXPECT_NO_THROW(rmm::device_buffer buff(size, cuda_stream_view{stream}, &mr));
   EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream));
-  EXPECT_NO_THROW(mr.allocate(size));
+  EXPECT_NO_THROW(mr.allocate_sync(size));
 }
 
 // Issue #527
@@ -120,7 +120,7 @@ TEST(PoolTest, InitialAndMaxPoolSizeEqual)
 {
   EXPECT_NO_THROW([]() {
     pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000192);
-    mr.allocate(1000);
+    mr.allocate_sync(1000);
   }());
 }
 
@@ -129,14 +129,14 @@ TEST(PoolTest, NonAlignedPoolSize)
   EXPECT_THROW(
     []() {
       pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000031, 1000192);
-      mr.allocate(1000);
+      mr.allocate_sync(1000);
     }(),
     rmm::logic_error);
 
   EXPECT_THROW(
     []() {
       pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000200);
-      mr.allocate(1000);
+      mr.allocate_sync(1000);
     }(),
     rmm::logic_error);
 }
@@ -146,8 +146,8 @@ TEST(PoolTest, UpstreamDoesntSupportMemInfo)
   cuda_mr cuda;
   pool_mr mr1(&cuda, 0);
   pool_mr mr2(&mr1, 0);
-  auto* ptr = mr2.allocate(1024);
-  mr2.deallocate(ptr, 1024);
+  auto* ptr = mr2.allocate_sync(1024);
+  mr2.deallocate_sync(ptr, 1024);
 }
 
 TEST(PoolTest, MultidevicePool)
@@ -193,14 +193,18 @@ namespace test_properties {
 class fake_async_resource {
  public:
   // To model `async_resource`
+
+#ifdef RMM_ENABLE_LEGACY_MR_INTERFACE
   static void* allocate(std::size_t, std::size_t) { return nullptr; }
-  static void deallocate(void* ptr, std::size_t, std::size_t) {}
+  static void deallocate(void* ptr, std::size_t, std::size_t) noexcept {}
   static void* allocate_async(std::size_t, std::size_t, cuda::stream_ref) { return nullptr; }
-  static void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) {}
+  static void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) noexcept {}
+#endif  // RMM_ENABLE_LEGACY_MR_INTERFACE
+
   void* allocate_sync(std::size_t, std::size_t) { return nullptr; }
-  void deallocate_sync(void* ptr, std::size_t, std::size_t) {}
+  void deallocate_sync(void* ptr, std::size_t, std::size_t) noexcept {}
   void* allocate(cuda_stream_view, std::size_t, std::size_t) { return nullptr; }
-  void deallocate(cuda_stream_view, void*, std::size_t, std::size_t) { return; }
+  void deallocate(cuda_stream_view, void*, std::size_t, std::size_t) noexcept { return; }
 
   bool operator==(const fake_async_resource& other) const { return true; }
   bool operator!=(const fake_async_resource& other) const { return false; }
diff --git a/cpp/tests/mr/device/statistics_mr_tests.cpp b/cpp/tests/mr/device/statistics_mr_tests.cpp
index 27b84c1fd..4667cb928 100644
--- a/cpp/tests/mr/device/statistics_mr_tests.cpp
+++ b/cpp/tests/mr/device/statistics_mr_tests.cpp
@@ -63,10 +63,10 @@ TEST(StatisticsTest, AllFreed)
 
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
   for (auto* alloc : allocations) {
-    mr.deallocate(alloc, ten_MiB);
+    mr.deallocate_sync(alloc, ten_MiB);
   }
 
   // Counter values should be 0
@@ -80,11 +80,11 @@ TEST(StatisticsTest, PeakAllocations)
   std::vector<void*> allocations;
 
   for (std::size_t i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
   // Delete every other allocation
   for (auto&& it = allocations.begin(); it != allocations.end(); ++it) {
-    mr.deallocate(*it, ten_MiB);
+    mr.deallocate_sync(*it, ten_MiB);
     it = allocations.erase(it);
   }
 
@@ -105,12 +105,12 @@ TEST(StatisticsTest, PeakAllocations)
 
   // Add 10 more to increase the peak
   for (std::size_t i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
 
   // Deallocate all remaining
   for (auto& allocation : allocations) {
-    mr.deallocate(allocation, ten_MiB);
+    mr.deallocate_sync(allocation, ten_MiB);
   }
   allocations.clear();
 
@@ -188,7 +188,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
   statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
 
   EXPECT_EQ(mr.get_allocations_counter().value, 10);
@@ -197,7 +197,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
 
   // Add more allocations
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
-    allocations.push_back(inner_mr.allocate(ten_MiB));
+    allocations.push_back(inner_mr.allocate_sync(ten_MiB));
   }
 
   // Check the outstanding allocations
@@ -213,7 +213,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
 
   // Deallocate all allocations using the inner_mr
   for (auto& allocation : allocations) {
-    inner_mr.deallocate(allocation, ten_MiB);
+    inner_mr.deallocate_sync(allocation, ten_MiB);
   }
   allocations.clear();
 
@@ -242,7 +242,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
 TEST(StatisticsTest, Nested)
 {
   statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
-  void* a0 = mr.allocate(ten_MiB);
+  void* a0 = mr.allocate_sync(ten_MiB);
   EXPECT_EQ(mr.get_bytes_counter().value, ten_MiB);
   EXPECT_EQ(mr.get_allocations_counter().value, 1);
   {
@@ -252,12 +252,12 @@ TEST(StatisticsTest, Nested)
   }
   EXPECT_EQ(mr.get_bytes_counter().value, 0);
   EXPECT_EQ(mr.get_allocations_counter().value, 0);
-  void* a1 = mr.allocate(ten_MiB);
+  void* a1 = mr.allocate_sync(ten_MiB);
   mr.push_counters();
   EXPECT_EQ(mr.get_bytes_counter().value, 0);
   EXPECT_EQ(mr.get_allocations_counter().value, 0);
-  void* a2 = mr.allocate(ten_MiB);
-  mr.deallocate(a2, ten_MiB);
+  void* a2 = mr.allocate_sync(ten_MiB);
+  mr.deallocate_sync(a2, ten_MiB);
   EXPECT_EQ(mr.get_bytes_counter().value, 0);
   EXPECT_EQ(mr.get_bytes_counter().peak, ten_MiB);
   EXPECT_EQ(mr.get_allocations_counter().value, 0);
@@ -269,7 +269,7 @@ TEST(StatisticsTest, Nested)
     EXPECT_EQ(allocs.value, 0);
     EXPECT_EQ(allocs.peak, 1);
   }
-  mr.deallocate(a0, ten_MiB);
+  mr.deallocate_sync(a0, ten_MiB);
   {
     auto [bytes, allocs] = mr.pop_counters();
     EXPECT_EQ(bytes.value, 0);
@@ -277,7 +277,7 @@ TEST(StatisticsTest, Nested)
     EXPECT_EQ(allocs.value, 0);
     EXPECT_EQ(allocs.peak, 2);
   }
-  mr.deallocate(a1, ten_MiB);
+  mr.deallocate_sync(a1, ten_MiB);
   EXPECT_THROW(mr.pop_counters(), std::out_of_range);
 }
 
diff --git a/cpp/tests/mr/device/system_mr_tests.cu b/cpp/tests/mr/device/system_mr_tests.cu
index cb13e3da6..656ef0cae 100644
--- a/cpp/tests/mr/device/system_mr_tests.cu
+++ b/cpp/tests/mr/device/system_mr_tests.cu
@@ -92,22 +92,22 @@ TEST_F(SystemMRTest, FirstTouchOnCPU)
 {
   auto const free = rmm::available_device_memory().first;
   system_mr mr;
-  void* ptr = mr.allocate(size_mb);
+  void* ptr = mr.allocate_sync(size_mb);
   touch_on_cpu(ptr, size_mb);
   auto const free2 = rmm::available_device_memory().first;
   EXPECT_EQ(free, free2);
-  mr.deallocate(ptr, size_mb);
+  mr.deallocate_sync(ptr, size_mb);
 }
 
 TEST_F(SystemMRTest, FirstTouchOnGPU)
 {
   auto const free = rmm::available_device_memory().first;
   system_mr mr;
-  void* ptr = mr.allocate(size_mb);
+  void* ptr = mr.allocate_sync(size_mb);
   touch_on_gpu(ptr, size_mb);
   auto const free2 = rmm::available_device_memory().first;
   EXPECT_LT(free2, free);
-  mr.deallocate(ptr, size_mb);
+  mr.deallocate_sync(ptr, size_mb);
 }
 
 TEST_F(SystemMRTest, HeadroomMRReserveAllFreeMemory)
@@ -115,9 +115,9 @@ TEST_F(SystemMRTest, HeadroomMRReserveAllFreeMemory)
   auto const free = rmm::available_device_memory().first;
   // All the free GPU memory is set as headroom, so allocation is only on the CPU.
   headroom_mr mr{free + size_gb};
-  void* ptr = mr.allocate(size_mb);
+  void* ptr = mr.allocate_sync(size_mb);
   touch_on_cpu(ptr, size_mb);
-  mr.deallocate(ptr, size_mb);
+  mr.deallocate_sync(ptr, size_mb);
 }
 
 TEST_F(SystemMRTest, HeadroomMRDifferentParametersUnequal)
diff --git a/cpp/tests/mr/device/tracking_mr_tests.cpp b/cpp/tests/mr/device/tracking_mr_tests.cpp
index 28e8b2f00..6f08836f6 100644
--- a/cpp/tests/mr/device/tracking_mr_tests.cpp
+++ b/cpp/tests/mr/device/tracking_mr_tests.cpp
@@ -57,10 +57,10 @@ TEST(TrackingTest, AllFreed)
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
   for (auto* alloc : allocations) {
-    mr.deallocate(alloc, ten_MiB);
+    mr.deallocate_sync(alloc, ten_MiB);
   }
   EXPECT_EQ(mr.get_outstanding_allocations().size(), 0);
   EXPECT_EQ(mr.get_allocated_bytes(), 0);
@@ -72,10 +72,10 @@ TEST(TrackingTest, AllocationsLeftWithStacks)
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
   for (int i = 0; i < num_allocations; i += 2) {
-    mr.deallocate(allocations[i], ten_MiB);
+    mr.deallocate_sync(allocations[i], ten_MiB);
   }
   EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2);
   EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2));
@@ -90,11 +90,11 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks)
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
 
   for (int i = 0; i < num_allocations; i += 2) {
-    mr.deallocate(allocations[i], ten_MiB);
+    mr.deallocate_sync(allocations[i], ten_MiB);
   }
   EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2);
   EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2));
@@ -156,7 +156,7 @@ TEST(TrackingTest, NegativeInnerTracking)
   tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
 
   EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations);
@@ -165,7 +165,7 @@ TEST(TrackingTest, NegativeInnerTracking)
 
   // Add more allocations
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
-    allocations.push_back(inner_mr.allocate(ten_MiB));
+    allocations.push_back(inner_mr.allocate_sync(ten_MiB));
   }
 
   // Check the outstanding allocations
@@ -174,7 +174,7 @@ TEST(TrackingTest, NegativeInnerTracking)
 
   // Deallocate all allocations using the inner_mr
   for (auto& allocation : allocations) {
-    inner_mr.deallocate(allocation, ten_MiB);
+    inner_mr.deallocate_sync(allocation, ten_MiB);
   }
   allocations.clear();
 
@@ -188,12 +188,12 @@ TEST(TrackingTest, DeallocWrongBytes)
   tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
 
   // When deallocating, pass the wrong bytes to deallocate
   for (auto& allocation : allocations) {
-    mr.deallocate(allocation, ten_MiB / 2);
+    mr.deallocate_sync(allocation, ten_MiB / 2);
   }
   allocations.clear();
 
@@ -214,7 +214,7 @@ TEST(TrackingTest, LogOutstandingAllocations)
   tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
-    allocations.push_back(mr.allocate(ten_MiB));
+    allocations.push_back(mr.allocate_sync(ten_MiB));
   }
 
   rmm::default_logger().set_level(rapids_logger::level_enum::debug);
@@ -225,7 +225,7 @@ TEST(TrackingTest, LogOutstandingAllocations)
 #endif
 
   for (auto& allocation : allocations) {
-    mr.deallocate(allocation, ten_MiB);
+    mr.deallocate_sync(allocation, ten_MiB);
   }
 
   rmm::default_logger().set_level(old_level);
diff --git a/cpp/tests/mr/host/mr_ref_tests.cpp b/cpp/tests/mr/host/mr_ref_tests.cpp
index ef9af2290..aa34b8497 100644
--- a/cpp/tests/mr/host/mr_ref_tests.cpp
+++ b/cpp/tests/mr/host/mr_ref_tests.cpp
@@ -93,54 +93,54 @@ TYPED_TEST(MRRefTest, SelfEquality) { EXPECT_TRUE(this->ref == this->ref); }
 TYPED_TEST(MRRefTest, AllocateZeroBytes)
 {
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = this->ref.allocate(0));
-  EXPECT_NO_THROW(this->ref.deallocate(ptr, 0));
+  EXPECT_NO_THROW(ptr = this->ref.allocate_sync(0));
+  EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, 0));
 }
 
 TYPED_TEST(MRRefTest, AllocateWord)
 {
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = this->ref.allocate(size_word));
+  EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_word));
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_aligned(ptr));
   EXPECT_FALSE(is_device_memory(ptr));
-  EXPECT_NO_THROW(this->ref.deallocate(ptr, size_word));
+  EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_word));
 }
 
 TYPED_TEST(MRRefTest, AllocateKB)
 {
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = this->ref.allocate(size_kb));
+  EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_kb));
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_aligned(ptr));
   EXPECT_FALSE(is_device_memory(ptr));
-  EXPECT_NO_THROW(this->ref.deallocate(ptr, size_kb));
+  EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_kb));
 }
 
 TYPED_TEST(MRRefTest, AllocateMB)
 {
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = this->ref.allocate(size_mb));
+  EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_mb));
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_aligned(ptr));
   EXPECT_FALSE(is_device_memory(ptr));
-  EXPECT_NO_THROW(this->ref.deallocate(ptr, size_mb));
+  EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_mb));
 }
 
 TYPED_TEST(MRRefTest, AllocateGB)
 {
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = this->ref.allocate(size_gb));
+  EXPECT_NO_THROW(ptr = this->ref.allocate_sync(size_gb));
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_aligned(ptr));
   EXPECT_FALSE(is_device_memory(ptr));
-  EXPECT_NO_THROW(this->ref.deallocate(ptr, size_gb));
+  EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, size_gb));
 }
 
 TYPED_TEST(MRRefTest, AllocateTooMuch)
 {
   void* ptr{nullptr};
-  EXPECT_THROW(ptr = this->ref.allocate(size_pb), std::bad_alloc);
+  EXPECT_THROW(ptr = this->ref.allocate_sync(size_pb), std::bad_alloc);
   EXPECT_EQ(nullptr, ptr);
 }
 
@@ -158,13 +158,13 @@ TYPED_TEST(MRRefTest, RandomAllocations)
   std::for_each(
     allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& alloc) {
       alloc.size = distribution(generator);
-      EXPECT_NO_THROW(alloc.ptr = this->ref.allocate(alloc.size));
+      EXPECT_NO_THROW(alloc.ptr = this->ref.allocate_sync(alloc.size));
       EXPECT_NE(nullptr, alloc.ptr);
       EXPECT_TRUE(is_aligned(alloc.ptr));
     });
 
   std::for_each(allocations.begin(), allocations.end(), [this](allocation& alloc) {
-    EXPECT_NO_THROW(this->ref.deallocate(alloc.ptr, alloc.size));
+    EXPECT_NO_THROW(this->ref.deallocate_sync(alloc.ptr, alloc.size));
   });
 }
 
@@ -186,7 +186,8 @@ TYPED_TEST(MRRefTest, MixedRandomAllocationFree)
   constexpr std::size_t num_allocations{100};
   for (std::size_t i = 0; i < num_allocations; ++i) {
     std::size_t allocation_size = size_distribution(generator);
-    EXPECT_NO_THROW(allocations.emplace_back(this->ref.allocate(allocation_size), allocation_size));
+    EXPECT_NO_THROW(
+      allocations.emplace_back(this->ref.allocate_sync(allocation_size), allocation_size));
     auto new_allocation = allocations.back();
     EXPECT_NE(nullptr, new_allocation.ptr);
     EXPECT_TRUE(is_aligned(new_allocation.ptr));
@@ -195,13 +196,13 @@ TYPED_TEST(MRRefTest, MixedRandomAllocationFree)
 
     if (free_front) {
       auto front = allocations.front();
-      EXPECT_NO_THROW(this->ref.deallocate(front.ptr, front.size));
+      EXPECT_NO_THROW(this->ref.deallocate_sync(front.ptr, front.size));
       allocations.pop_front();
     }
   }
   // free any remaining allocations
   for (auto alloc : allocations) {
-    EXPECT_NO_THROW(this->ref.deallocate(alloc.ptr, alloc.size));
+    EXPECT_NO_THROW(this->ref.deallocate_sync(alloc.ptr, alloc.size));
     allocations.pop_front();
   }
 }
@@ -222,9 +223,9 @@ TYPED_TEST(MRRefTest, AlignmentTest)
          alignment *= TestedAlignmentMultiplier) {
       auto allocation_size = size_distribution(generator);
       void* ptr{nullptr};
-      EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, alignment));
+      EXPECT_NO_THROW(ptr = this->ref.allocate_sync(allocation_size, alignment));
       EXPECT_TRUE(is_aligned(ptr, alignment));
-      EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, alignment));
+      EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, allocation_size, alignment));
     }
   }
 }
@@ -245,9 +246,9 @@ TYPED_TEST(MRRefTest, UnsupportedAlignmentTest)
       // alignment of `alignof(std::max_align_t)`
       auto const bad_alignment = alignment + 1;
 
-      EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, bad_alignment));
+      EXPECT_NO_THROW(ptr = this->ref.allocate_sync(allocation_size, bad_alignment));
       EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t)));
-      EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, bad_alignment));
+      EXPECT_NO_THROW(this->ref.deallocate_sync(ptr, allocation_size, bad_alignment));
 #endif
     }
   }
@@ -258,8 +259,8 @@ TEST(PinnedResource, isPinned)
   rmm::mr::pinned_memory_resource mr;
   rmm::host_resource_ref ref{mr};
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = ref.allocate(100));
+  EXPECT_NO_THROW(ptr = ref.allocate_sync(100));
   EXPECT_TRUE(is_pinned_memory(ptr));
-  EXPECT_NO_THROW(ref.deallocate(ptr, 100));
+  EXPECT_NO_THROW(ref.deallocate_sync(ptr, 100));
 }
 }  // namespace rmm::test
diff --git a/cpp/tests/mr/host/pinned_mr_tests.cpp b/cpp/tests/mr/host/pinned_mr_tests.cpp
index 32d6a76e3..e193f6f66 100644
--- a/cpp/tests/mr/host/pinned_mr_tests.cpp
+++ b/cpp/tests/mr/host/pinned_mr_tests.cpp
@@ -37,10 +37,10 @@ TEST(PinnedMemoryResource, AllocateBytesOverload)
   rmm::mr::pinned_memory_resource mr;
 
   void* ptr{nullptr};
-  EXPECT_NO_THROW(ptr = mr.allocate(128));
+  EXPECT_NO_THROW(ptr = mr.allocate_sync(128));
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_pinned_memory(ptr));
-  EXPECT_NO_THROW(mr.deallocate(ptr, 128));
+  EXPECT_NO_THROW(mr.deallocate_sync(ptr, 128));
 }
 
 }  // namespace rmm::test
diff --git a/cpp/tests/mr/host/pinned_pool_mr_tests.cpp b/cpp/tests/mr/host/pinned_pool_mr_tests.cpp
index 0a28ead7c..e1645a5de 100644
--- a/cpp/tests/mr/host/pinned_pool_mr_tests.cpp
+++ b/cpp/tests/mr/host/pinned_pool_mr_tests.cpp
@@ -65,7 +65,7 @@ TEST(PinnedPoolTest, InitialAndMaxPoolSizeEqual)
   EXPECT_NO_THROW([]() {
     rmm::mr::pinned_memory_resource pinned_mr{};
     pool_mr mr(pinned_mr, 1000192, 1000192);
-    mr.allocate(1000);
+    mr.allocate_sync(1000);
   }());
 }
 
@@ -75,7 +75,7 @@ TEST(PinnedPoolTest, NonAlignedPoolSize)
     []() {
       rmm::mr::pinned_memory_resource pinned_mr{};
       pool_mr mr(pinned_mr, 1000031, 1000192);
-      mr.allocate(1000);
+      mr.allocate_sync(1000);
     }(),
     rmm::logic_error);
 
@@ -83,7 +83,7 @@ TEST(PinnedPoolTest, NonAlignedPoolSize)
     []() {
       rmm::mr::pinned_memory_resource pinned_mr{};
       pool_mr mr(pinned_mr, 1000192, 1000200);
-      mr.allocate(1000);
+      mr.allocate_sync(1000);
     }(),
     rmm::logic_error);
 }
@@ -94,9 +94,9 @@ TEST(PinnedPoolTest, ThrowOutOfMemory)
   const auto initial{0};
   const auto maximum{1024};
   pool_mr mr{pinned_mr, initial, maximum};
-  mr.allocate(1024);
+  mr.allocate_sync(1024);
 
-  EXPECT_THROW(mr.allocate(1024), rmm::out_of_memory);
+  EXPECT_THROW(mr.allocate_sync(1024), rmm::out_of_memory);
 }
 
 }  // namespace
diff --git a/python/rmm/rmm/librmm/_torch_allocator.cpp b/python/rmm/rmm/librmm/_torch_allocator.cpp
index bfe94c2d0..bd9991d9a 100644
--- a/python/rmm/rmm/librmm/_torch_allocator.cpp
+++ b/python/rmm/rmm/librmm/_torch_allocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,8 +40,8 @@ extern "C" void* allocate(std::size_t size, int device, void* stream)
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
   auto mr = rmm::mr::get_per_device_resource_ref(device_id);
-  return mr.allocate_async(
-    size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  return mr.allocate(
+    rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)}, size, rmm::CUDA_ALLOCATION_ALIGNMENT);
 }
 
 /**
@@ -57,8 +57,8 @@ extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
   auto mr = rmm::mr::get_per_device_resource_ref(device_id);
-  mr.deallocate_async(ptr,
-                      size,
-                      rmm::CUDA_ALLOCATION_ALIGNMENT,
-                      rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  mr.deallocate(rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)},
+                ptr,
+                size,
+                rmm::CUDA_ALLOCATION_ALIGNMENT);
 }
diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd
index 75e59e50d..67c5e8484 100644
--- a/python/rmm/rmm/librmm/memory_resource.pxd
+++ b/python/rmm/rmm/librmm/memory_resource.pxd
@@ -32,6 +32,7 @@ from rmm.librmm.memory_resource cimport device_memory_resource
 cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
         namespace "rmm::mr" nogil:
     cdef cppclass device_memory_resource:
+        # Legacy functions
         void* allocate(size_t bytes) except +
         void* allocate(size_t bytes, cuda_stream_view stream) except +
         void deallocate(void* ptr, size_t bytes) noexcept
@@ -40,6 +41,21 @@ cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
             size_t bytes,
             cuda_stream_view stream
         ) noexcept
+        # End legacy functions
+
+        void* allocate_sync(size_t bytes, size_t alignment) except +
+        void deallocate_sync(void* ptr, size_t bytes, size_t alignment) noexcept
+        void* allocate(
+            cuda_stream_view stream,
+            size_t bytes,
+            size_t alignment=256
+        ) except +
+        void deallocate(
+            cuda_stream_view stream,
+            void* ptr,
+            size_t bytes,
+            size_t alignment=256
+        ) noexcept
 
 cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
     size_t percent_of_free_device_memory(int percent) except +
diff --git a/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx
index 0e32f017a..8921cf266 100644
--- a/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx
+++ b/python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pyx
@@ -106,7 +106,7 @@ cdef class DeviceMemoryResource:
         """
         cdef uintptr_t ptr
         with nogil:
-            ptr = <uintptr_t>self.c_obj.get().allocate(nbytes, stream.view())
+            ptr = <uintptr_t>self.c_obj.get().allocate(stream.view(), nbytes)
         return ptr
 
     def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM):
@@ -122,7 +122,7 @@ cdef class DeviceMemoryResource:
             Optional stream for the deallocation
         """
         with nogil:
-            self.c_obj.get().deallocate(<void*>(ptr), nbytes, stream.view())
+            self.c_obj.get().deallocate(stream.view(), <void*>(ptr), nbytes)
 
     def __dealloc__(self):
         # See the __dealloc__ method on DeviceBuffer for discussion of why we must