44
55from __future__ import annotations
66
7+ from cuda.core.experimental._utils.cuda_utils cimport (
8+ _check_driver_error as raise_if_driver_error,
9+ )
10+
711import abc
8- import weakref
912from typing import Tuple, TypeVar, Union
1013
1114from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
2326""" A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting :attr:`Buffer.handle`."""
2427
2528
26- class Buffer :
29+ cdef class Buffer:
2730 """ Represent a handle to allocated memory.
2831
2932 This generic object provides a unified representation for how
@@ -33,34 +36,26 @@ class Buffer:
3336 Support for data interchange mechanisms are provided by DLPack.
3437 """
3538
36- class _MembersNeededForFinalize :
37- __slots__ = ("ptr" , "size" , "mr" )
38-
39- def __init__ (self , buffer_obj , ptr , size , mr ):
40- self .ptr = ptr
41- self .size = size
42- self .mr = mr
43- weakref .finalize (buffer_obj , self .close )
44-
45- def close (self , stream = None ):
46- if self .ptr and self .mr is not None :
47- self .mr .deallocate (self .ptr , self .size , stream )
48- self .ptr = 0
49- self .mr = None
50-
51- # TODO: handle ownership? (_mr could be None)
52- __slots__ = ("__weakref__" , "_mnff" )
39+ cdef:
40+ object _ptr
41+ size_t _size
42+ object _mr
5343
54- def __new__ (self , * args , ** kwargs ):
44+ def __init__ (self , *args , **kwargs ):
5545 raise RuntimeError (" Buffer objects cannot be instantiated directly. Please use MemoryResource APIs." )
5646
5747 @classmethod
58- def _init (cls , ptr : DevicePointerT , size : int , mr : MemoryResource | None = None ):
59- self = super ().__new__ (cls )
60- self ._mnff = Buffer ._MembersNeededForFinalize (self , ptr , size , mr )
48+ def _init (cls , ptr: DevicePointerT , size_t size , mr: MemoryResource | None = None ):
49+ cdef Buffer self = Buffer.__new__ (cls )
50+ self ._ptr = ptr
51+ self ._size = size
52+ self ._mr = mr
6153 return self
6254
63- def close (self , stream : Stream = None ):
55+ def __del__ (self ):
56+ self .close()
57+
58+ cpdef close(self , stream: Stream = None ):
6459 """ Deallocate this buffer asynchronously on the given stream.
6560
6661 This buffer is released back to their memory resource
@@ -72,7 +67,10 @@ def close(self, stream: Stream = None):
7267 The stream object to use for asynchronous deallocation. If None,
7368 the behavior depends on the underlying memory resource.
7469 """
75- self ._mnff .close (stream )
70+ if self ._ptr and self ._mr is not None :
71+ self ._mr.deallocate(self ._ptr, self ._size, stream)
72+ self ._ptr = 0
73+ self ._mr = None
7674
7775 @property
7876 def handle (self ) -> DevicePointerT:
@@ -83,37 +81,37 @@ def handle(self) -> DevicePointerT:
8381 This handle is a Python object. To get the memory address of the underlying C
8482 handle , call ``int(Buffer.handle )``.
8583 """
86- return self ._mnff . ptr
84+ return self._ptr
8785
8886 @property
8987 def size(self ) -> int:
9088 """Return the memory size of this buffer."""
91- return self ._mnff . size
89+ return self._size
9290
9391 @property
9492 def memory_resource(self ) -> MemoryResource:
9593 """Return the memory resource associated with this buffer."""
96- return self ._mnff . mr
94+ return self._mr
9795
9896 @property
9997 def is_device_accessible(self ) -> bool:
10098 """Return True if this buffer can be accessed by the GPU , otherwise False."""
101- if self ._mnff . mr is not None :
102- return self ._mnff . mr .is_device_accessible
99+ if self._mr is not None:
100+ return self._mr .is_device_accessible
103101 raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
104102
105103 @property
106104 def is_host_accessible(self ) -> bool:
107105 """Return True if this buffer can be accessed by the CPU , otherwise False."""
108- if self ._mnff . mr is not None :
109- return self ._mnff . mr .is_host_accessible
106+ if self._mr is not None:
107+ return self._mr .is_host_accessible
110108 raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
111109
112110 @property
113111 def device_id(self ) -> int:
114112 """Return the device ordinal of this buffer."""
115- if self ._mnff . mr is not None :
116- return self ._mnff . mr .device_id
113+ if self._mr is not None:
114+ return self._mr .device_id
117115 raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
118116
119117 def copy_to(self , dst: Buffer = None , *, stream: Stream ) -> Buffer:
@@ -134,15 +132,20 @@ def copy_to(self, dst: Buffer = None, *, stream: Stream) -> Buffer:
134132 """
135133 if stream is None:
136134 raise ValueError("stream must be provided")
135+
136+ cdef size_t src_size = self ._size
137+
137138 if dst is None:
138- if self ._mnff . mr is None :
139+ if self._mr is None:
139140 raise ValueError("a destination buffer must be provided (this buffer does not have a memory_resource )")
140- dst = self ._mnff .mr .allocate (self ._mnff .size , stream )
141- if dst ._mnff .size != self ._mnff .size :
141+ dst = self ._mr.allocate(src_size, stream)
142+
143+ cdef size_t dst_size = dst._size
144+ if dst_size != src_size:
142145 raise ValueError(
143- f"buffer sizes mismatch between src and dst (sizes are: src={ self . _mnff . size } , dst={ dst . _mnff . size } )"
146+ f"buffer sizes mismatch between src and dst (sizes are: src = {src_size }, dst = {dst_size })"
144147 )
145- handle_return (driver .cuMemcpyAsync (dst ._mnff . ptr , self ._mnff . ptr , self . _mnff . size , stream .handle ))
148+ handle_return(driver.cuMemcpyAsync(dst._ptr , self._ptr , src_size , stream.handle ))
146149 return dst
147150
148151 def copy_from(self , src: Buffer , *, stream: Stream ):
@@ -159,11 +162,15 @@ def copy_from(self, src: Buffer, *, stream: Stream):
159162 """
160163 if stream is None :
161164 raise ValueError (" stream must be provided" )
162- if src ._mnff .size != self ._mnff .size :
165+
166+ cdef size_t dst_size = self ._size
167+ cdef size_t src_size = src._size
168+
169+ if src_size != dst_size:
163170 raise ValueError (
164- f"buffer sizes mismatch between src and dst (sizes are: src={ src . _mnff . size } , dst={ self . _mnff . size } )"
171+ f" buffer sizes mismatch between src and dst (sizes are: src={src_size }, dst={dst_size })"
165172 )
166- handle_return (driver .cuMemcpyAsync (self ._mnff . ptr , src ._mnff . ptr , self . _mnff . size , stream .handle ))
173+ handle_return(driver.cuMemcpyAsync(self ._ptr , src._ptr, dst_size , stream.handle))
167174
168175 def __dlpack__ (
169176 self ,
@@ -211,7 +218,7 @@ def __release_buffer__(self, buffer: memoryview, /):
211218 raise NotImplementedError (" WIP: Buffer.__release_buffer__ hasn't been implemented yet." )
212219
213220 @staticmethod
214- def from_handle (ptr : DevicePointerT , size : int , mr : MemoryResource | None = None ) -> Buffer :
221+ def from_handle (ptr: DevicePointerT , size_t size , mr: MemoryResource | None = None ) -> Buffer:
215222 """Create a new :class:`Buffer` object from a pointer.
216223
217224 Parameters
@@ -326,23 +333,6 @@ def __init__(self, device_id: int):
326333 self ._handle = handle_return(driver.cuDeviceGetMemPool(device_id))
327334 self ._dev_id = device_id
328335
329- # Set a higher release threshold to improve performance when there are no active allocations.
330- # By default, the release threshold is 0, which means memory is immediately released back
331- # to the OS when there are no active suballocations, causing performance issues.
332- # Check current release threshold
333- current_threshold = handle_return (
334- driver .cuMemPoolGetAttribute (self ._handle , driver .CUmemPool_attribute .CU_MEMPOOL_ATTR_RELEASE_THRESHOLD )
335- )
336- # If threshold is 0 (default), set it to maximum to retain memory in the pool
337- if int (current_threshold ) == 0 :
338- handle_return (
339- driver .cuMemPoolSetAttribute (
340- self ._handle ,
341- driver .CUmemPool_attribute .CU_MEMPOOL_ATTR_RELEASE_THRESHOLD ,
342- driver .cuuint64_t (0xFFFFFFFFFFFFFFFF ),
343- )
344- )
345-
346336 def allocate (self , size: int , stream: Stream = None ) -> Buffer:
347337 """Allocate a buffer of the requested size.
348338
0 commit comments