Skip to content

Commit af2d901

Browse files
committed
Use cydriver to query memory attributes, fix managed memory handling, add tests for the attributes
Signed-off-by: Kamil Tokarski <[email protected]>
1 parent 53568b5 commit af2d901

File tree

2 files changed

+164
-25
lines changed

2 files changed

+164
-25
lines changed

cuda_core/cuda/core/experimental/_memory/_buffer.pyx

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from __future__ import annotations
66

7+
cimport cython
78
from libc.stdint cimport uintptr_t
89

910
from cuda.core.experimental._memory._device_memory_resource cimport DeviceMemoryResource
@@ -12,7 +13,9 @@ from cuda.core.experimental._memory cimport _ipc
1213
from cuda.core.experimental._stream cimport Stream_accept, Stream
1314
from cuda.core.experimental._utils.cuda_utils cimport (
1415
_check_driver_error as raise_if_driver_error,
16+
HANDLE_RETURN,
1517
)
18+
from cuda.bindings cimport cydriver
1619

1720
import abc
1821
from typing import TypeVar, Union
@@ -310,46 +313,64 @@ cdef Buffer_init_mem_attrs(Buffer self):
310313
self._mem_attrs_inited = True
311314

312315

313-
cdef int query_memory_attrs(_MemAttrs &out, uintptr_t ptr) except -1:
314-
cdef int memory_type
315-
ret, attrs = _query_memory_attrs(ptr)
316-
if ret == driver.CUresult.CUDA_ERROR_NOT_INITIALIZED:
317-
# Device class handles the cuInit call internally
318-
from cuda.core.experimental import Device as _Device
319-
_Device()
320-
ret, attrs = _query_memory_attrs(ptr)
321-
raise_if_driver_error(ret)
322-
memory_type = attrs[0]
316+
cdef int query_memory_attrs(_MemAttrs &out, uintptr_t ptr) except -1 nogil:
317+
cdef unsigned int memory_type = 0
318+
cdef int is_managed = 0
319+
cdef int device_id = 0
320+
_query_memory_attrs(memory_type, is_managed, device_id, <cydriver.CUdeviceptr>ptr)
323321

324322
if memory_type == 0:
325323
# unregistered host pointer
326324
out.is_host_accessible = True
327325
out.is_device_accessible = False
328326
out.device_id = -1
327+
# for managed memory, the memory type can be CU_MEMORYTYPE_DEVICE,
328+
# so we need to check it first not to falsely claim it is not
329+
# host accessible.
329330
elif (
330-
memory_type == driver.CUmemorytype.CU_MEMORYTYPE_HOST
331-
or memory_type == driver.CUmemorytype.CU_MEMORYTYPE_UNIFIED
331+
is_managed
332+
or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST
332333
):
333-
# TODO(ktokarski): should we compare host/device ptrs using cuPointerGetAttribute
334-
# for exceptional cases when the same data can end up with different ptrs
335-
# for host and device?
334+
# For pinned memory allocated with cudaMallocHost or paged-locked
335+
# with cudaHostRegister, the memory_type is
336+
# cydriver.CUmemorytype.CU_MEMORYTYPE_HOST.
337+
# TODO(ktokarski): In some cases, the registered memory requires
338+
# using different ptr for device and host, we could check
339+
# cuMemHostGetDevicePointer and
340+
# CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM
341+
# to double check the device accessibility.
336342
out.is_host_accessible = True
337343
out.is_device_accessible = True
338-
out.device_id = attrs[1]
339-
else:
340-
# device/texture
344+
out.device_id = device_id
345+
elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
341346
out.is_host_accessible = False
342347
out.is_device_accessible = True
343-
out.device_id = attrs[1]
348+
out.device_id = device_id
349+
else:
350+
raise ValueError(f"Unsupported memory type: {memory_type}")
344351
return 0
345352

346353

347-
cdef inline _query_memory_attrs(uintptr_t ptr):
348-
cdef tuple attrs = (
349-
driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
350-
driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
351-
)
352-
return driver.cuPointerGetAttributes(len(attrs), attrs, ptr)
354+
cdef inline int _query_memory_attrs(unsigned int& memory_type, int & is_managed, int& device_id, cydriver.CUdeviceptr ptr) except -1 nogil:
355+
cdef cydriver.CUpointer_attribute attrs[3]
356+
cdef uintptr_t vals[3]
357+
attrs[0] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE
358+
attrs[1] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_MANAGED
359+
attrs[2] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL
360+
vals[0] = <uintptr_t><void*>&memory_type
361+
vals[1] = <uintptr_t><void*>&is_managed
362+
vals[2] = <uintptr_t><void*>&device_id
363+
364+
cdef cydriver.CUresult ret
365+
ret = cydriver.cuPointerGetAttributes(3, attrs, <void**>vals, ptr)
366+
if ret == cydriver.CUresult.CUDA_ERROR_NOT_INITIALIZED:
367+
with cython.gil:
368+
# Device class handles the cuInit call internally
369+
from cuda.core.experimental import Device
370+
Device()
371+
ret = cydriver.cuPointerGetAttributes(2, attrs, <void**>vals, ptr)
372+
HANDLE_RETURN(ret)
373+
return 0
353374

354375

355376
cdef class MemoryResource:

cuda_core/tests/test_memory.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from cuda.bindings import driver
1010
except ImportError:
1111
from cuda import cuda as driver
12+
1213
try:
1314
import numpy as np
1415
except ImportError:
@@ -27,6 +28,9 @@
2728
VirtualMemoryResource,
2829
VirtualMemoryResourceOptions,
2930
)
31+
from cuda.core.experimental import (
32+
system as ccx_system,
33+
)
3034
from cuda.core.experimental._dlpack import DLDeviceType
3135
from cuda.core.experimental._memory import IPCBufferDescriptor
3236
from cuda.core.experimental._utils.cuda_utils import handle_return
@@ -235,6 +239,120 @@ def test_buffer_close():
235239
buffer_close(DummyPinnedMemoryResource(device))
236240

237241

242+
def test_buffer_external_host():
243+
a = (ctypes.c_byte * 20)()
244+
ptr = ctypes.addressof(a)
245+
buffer = Buffer.from_handle(ptr, 20, owner=ptr)
246+
assert not buffer.is_device_accessible
247+
assert buffer.is_host_accessible
248+
assert buffer.device_id == -1
249+
buffer.close()
250+
251+
252+
@pytest.mark.parametrize("change_device", [True, False])
253+
def test_buffer_external_device(change_device):
254+
n = ccx_system.num_devices
255+
if n < 1:
256+
pytest.skip("No devices found")
257+
dev_id = n - 1
258+
d = Device(dev_id)
259+
d.set_current()
260+
buffer_ = d.allocate(size=32)
261+
262+
if change_device:
263+
# let's switch to a different device if possibe
264+
# to make sure we get the original device id
265+
d = Device(0)
266+
d.set_current()
267+
268+
buffer = Buffer.from_handle(int(buffer_.handle), 32)
269+
assert buffer.is_device_accessible
270+
assert not buffer.is_host_accessible
271+
assert buffer.device_id == dev_id
272+
buffer.close()
273+
buffer_.close()
274+
275+
276+
@pytest.mark.parametrize("change_device", [True, False])
277+
def test_buffer_external_pinned_alloc(change_device):
278+
n = ccx_system.num_devices
279+
if n < 1:
280+
pytest.skip("No devices found")
281+
dev_id = n - 1
282+
d = Device(dev_id)
283+
d.set_current()
284+
mr = DummyPinnedMemoryResource(d)
285+
buffer_ = mr.allocate(size=32)
286+
287+
if change_device:
288+
# let's switch to a different device if possibe
289+
# to make sure we get the original device id
290+
d = Device(0)
291+
d.set_current()
292+
293+
buffer = Buffer.from_handle(int(buffer_.handle), 32)
294+
assert buffer.is_device_accessible
295+
assert buffer.is_host_accessible
296+
assert buffer.device_id == dev_id
297+
buffer.close()
298+
buffer_.close()
299+
300+
301+
@pytest.mark.parametrize("change_device", [True, False])
302+
def test_buffer_external_pinned_registered(change_device):
303+
n = ccx_system.num_devices
304+
if n < 1:
305+
pytest.skip("No devices found")
306+
dev_id = n - 1
307+
d = Device(dev_id)
308+
d.set_current()
309+
a = (ctypes.c_byte * 20)()
310+
ptr = ctypes.addressof(a)
311+
312+
buffer = Buffer.from_handle(ptr, 20, owner=ptr)
313+
assert not buffer.is_device_accessible
314+
assert buffer.is_host_accessible
315+
assert buffer.device_id == -1
316+
317+
handle_return(driver.cuMemHostRegister(ptr, 20, 0))
318+
if change_device:
319+
# let's switch to a different device if possibe
320+
# to make sure we get the original device id
321+
d = Device(0)
322+
d.set_current()
323+
324+
buffer = Buffer.from_handle(ptr, 20, owner=ptr)
325+
assert buffer.is_device_accessible
326+
assert buffer.is_host_accessible
327+
assert buffer.device_id == dev_id
328+
buffer.close()
329+
330+
331+
@pytest.mark.parametrize("change_device", [True, False])
332+
def test_buffer_external_managed(change_device):
333+
n = ccx_system.num_devices
334+
if n < 1:
335+
pytest.skip("No devices found")
336+
dev_id = n - 1
337+
d = Device(dev_id)
338+
d.set_current()
339+
ptr = None
340+
try:
341+
ptr = handle_return(driver.cuMemAllocManaged(32, driver.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL.value))
342+
if change_device:
343+
# let's switch to a different device if possibe
344+
# to make sure we get the original device id
345+
d = Device(0)
346+
d.set_current()
347+
buffer = Buffer.from_handle(ptr, 32)
348+
assert buffer.is_device_accessible
349+
assert buffer.is_host_accessible
350+
assert buffer.device_id == dev_id
351+
finally:
352+
if ptr is not None:
353+
handle_return(driver.cuMemFree(ptr))
354+
355+
238356
def test_buffer_dunder_dlpack():
239357
device = Device()
240358
device.set_current()

0 commit comments

Comments
 (0)