44
55from __future__ import annotations
66
7+ cimport cython
78from libc.stdint cimport uintptr_t
89
910from cuda.core.experimental._memory._device_memory_resource cimport DeviceMemoryResource
@@ -12,7 +13,9 @@ from cuda.core.experimental._memory cimport _ipc
1213from cuda.core.experimental._stream cimport Stream_accept, Stream
1314from cuda.core.experimental._utils.cuda_utils cimport (
1415 _check_driver_error as raise_if_driver_error,
16+ HANDLE_RETURN,
1517)
18+ from cuda.bindings cimport cydriver
1619
1720import abc
1821from typing import TypeVar, Union
@@ -310,46 +313,64 @@ cdef Buffer_init_mem_attrs(Buffer self):
310313 self ._mem_attrs_inited = True
311314
312315
313- cdef int query_memory_attrs(_MemAttrs & out, uintptr_t ptr) except - 1 :
314- cdef int memory_type
315- ret, attrs = _query_memory_attrs(ptr)
316- if ret == driver.CUresult.CUDA_ERROR_NOT_INITIALIZED:
317- # Device class handles the cuInit call internally
318- from cuda.core.experimental import Device as _Device
319- _Device()
320- ret, attrs = _query_memory_attrs(ptr)
321- raise_if_driver_error(ret)
322- memory_type = attrs[0 ]
316+ cdef int query_memory_attrs(_MemAttrs & out, uintptr_t ptr) except - 1 nogil:
317+ cdef unsigned int memory_type = 0
318+ cdef int is_managed = 0
319+ cdef int device_id = 0
320+ _query_memory_attrs(memory_type, is_managed, device_id, < cydriver.CUdeviceptr> ptr)
323321
324322 if memory_type == 0 :
325323 # unregistered host pointer
326324 out.is_host_accessible = True
327325 out.is_device_accessible = False
328326 out.device_id = - 1
327+ # for managed memory, the memory type can be CU_MEMORYTYPE_DEVICE,
328+ # so we need to check it first not to falsely claim it is not
329+ # host accessible.
329330 elif (
330- memory_type == driver.CUmemorytype.CU_MEMORYTYPE_HOST
331- or memory_type == driver .CUmemorytype.CU_MEMORYTYPE_UNIFIED
331+ is_managed
332+ or memory_type == cydriver .CUmemorytype.CU_MEMORYTYPE_HOST
332333 ):
333- # TODO(ktokarski): should we compare host/device ptrs using cuPointerGetAttribute
334- # for exceptional cases when the same data can end up with different ptrs
335- # for host and device?
334+ # For pinned memory allocated with cudaMallocHost or paged-locked
335+ # with cudaHostRegister, the memory_type is
336+ # cydriver.CUmemorytype.CU_MEMORYTYPE_HOST.
337+ # TODO(ktokarski): In some cases, the registered memory requires
338+ # using different ptr for device and host, we could check
339+ # cuMemHostGetDevicePointer and
340+ # CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM
341+ # to double check the device accessibility.
336342 out.is_host_accessible = True
337343 out.is_device_accessible = True
338- out.device_id = attrs[1 ]
339- else :
340- # device/texture
344+ out.device_id = device_id
345+ elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
341346 out.is_host_accessible = False
342347 out.is_device_accessible = True
343- out.device_id = attrs[1 ]
348+ out.device_id = device_id
349+ else :
350+ raise ValueError (f" Unsupported memory type: {memory_type}" )
344351 return 0
345352
346353
347- cdef inline _query_memory_attrs(uintptr_t ptr):
348- cdef tuple attrs = (
349- driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
350- driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
351- )
352- return driver.cuPointerGetAttributes(len (attrs), attrs, ptr)
354+ cdef inline int _query_memory_attrs(unsigned int & memory_type, int & is_managed, int & device_id, cydriver.CUdeviceptr ptr) except - 1 nogil:
355+ cdef cydriver.CUpointer_attribute attrs[3 ]
356+ cdef uintptr_t vals[3 ]
357+ attrs[0 ] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE
358+ attrs[1 ] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_MANAGED
359+ attrs[2 ] = cydriver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL
360+ vals[0 ] = < uintptr_t>< void * > & memory_type
361+ vals[1 ] = < uintptr_t>< void * > & is_managed
362+ vals[2 ] = < uintptr_t>< void * > & device_id
363+
364+ cdef cydriver.CUresult ret
365+ ret = cydriver.cuPointerGetAttributes(3 , attrs, < void ** > vals, ptr)
366+ if ret == cydriver.CUresult.CUDA_ERROR_NOT_INITIALIZED:
367+ with cython.gil:
368+ # Device class handles the cuInit call internally
369+ from cuda.core.experimental import Device
370+ Device()
371+ ret = cydriver.cuPointerGetAttributes(2 , attrs, < void ** > vals, ptr)
372+ HANDLE_RETURN(ret)
373+ return 0
353374
354375
355376cdef class MemoryResource:
0 commit comments