diff --git a/cuda_bindings/cuda/bindings/driver.pyx.in b/cuda_bindings/cuda/bindings/driver.pyx.in index f05485f95..c6d2e3ca2 100644 --- a/cuda_bindings/cuda/bindings/driver.pyx.in +++ b/cuda_bindings/cuda/bindings/driver.pyx.in @@ -36,6 +36,7 @@ ctypedef unsigned long long unsigned_ptr ctypedef unsigned long long unsigned_long_long_ptr ctypedef unsigned long long long_long_ptr ctypedef unsigned long long size_t_ptr +ctypedef unsigned long long long_ptr ctypedef unsigned long long float_ptr ctypedef unsigned long long double_ptr ctypedef unsigned long long void_ptr @@ -22785,7 +22786,8 @@ def cuGetErrorString(error not None : CUresult): """ cdef cydriver.CUresult cyerror = error.value cdef const char* pStr = NULL - err = cydriver.cuGetErrorString(cyerror, &pStr) + with nogil: + err = cydriver.cuGetErrorString(cyerror, &pStr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pStr if pStr != NULL else None) @@ -22820,7 +22822,8 @@ def cuGetErrorName(error not None : CUresult): """ cdef cydriver.CUresult cyerror = error.value cdef const char* pStr = NULL - err = cydriver.cuGetErrorName(cyerror, &pStr) + with nogil: + err = cydriver.cuGetErrorName(cyerror, &pStr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pStr if pStr != NULL else None) @@ -22842,7 +22845,8 @@ def cuInit(unsigned int Flags): CUresult :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH`, :py:obj:`~.CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE` """ - err = cydriver.cuInit(Flags) + with nogil: + err = cydriver.cuInit(Flags) return (_dict_CUresult[err],) {{endif}} @@ -22871,7 +22875,8 @@ def cuDriverGetVersion(): :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cudaRuntimeGetVersion` """ cdef int driverVersion = 0 - err = cydriver.cuDriverGetVersion(&driverVersion) + with nogil: + err = cydriver.cuDriverGetVersion(&driverVersion) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], driverVersion) @@ -22903,7 +22908,8 @@ def cuDeviceGet(int ordinal): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport` """ cdef CUdevice device = CUdevice() - err = cydriver.cuDeviceGet(device._pvt_ptr, ordinal) + with nogil: + err = cydriver.cuDeviceGet(device._pvt_ptr, ordinal) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], device) @@ -22931,7 +22937,8 @@ def cuDeviceGetCount(): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaGetDeviceCount` """ cdef int count = 0 - err = cydriver.cuDeviceGetCount(&count) + with nogil: + err = cydriver.cuDeviceGetCount(&count) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], count) @@ -22976,7 +22983,8 @@ def cuDeviceGetName(int length, dev): cydev = pdev pyname = b" " * length cdef char* name = pyname - err = cydriver.cuDeviceGetName(name, length, cydev) + with nogil: + err = cydriver.cuDeviceGetName(name, length, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pyname) @@ -23020,7 +23028,8 @@ def cuDeviceGetUuid(dev): pdev = int(CUdevice(dev)) cydev = pdev cdef CUuuid uuid = CUuuid() - err = cydriver.cuDeviceGetUuid(uuid._pvt_ptr, cydev) + with nogil: + err = cydriver.cuDeviceGetUuid(uuid._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], uuid) @@ -23061,7 +23070,8 @@ def cuDeviceGetUuid_v2(dev): pdev = int(CUdevice(dev)) cydev = pdev cdef CUuuid uuid = CUuuid() - err = cydriver.cuDeviceGetUuid_v2(uuid._pvt_ptr, cydev) + with nogil: + err = cydriver.cuDeviceGetUuid_v2(uuid._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], uuid) @@ -23104,7 +23114,8 @@ def cuDeviceGetLuid(dev): cydev = pdev cdef char luid[8] cdef unsigned int deviceNodeMask = 0 - err = cydriver.cuDeviceGetLuid(luid, &deviceNodeMask, cydev) + with nogil: + err = cydriver.cuDeviceGetLuid(luid, &deviceNodeMask, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], luid, deviceNodeMask) @@ -23144,7 +23155,8 @@ def cuDeviceTotalMem(dev): pdev = int(CUdevice(dev)) cydev = pdev cdef size_t numbytes = 0 - err = cydriver.cuDeviceTotalMem(&numbytes, cydev) + with nogil: + err = cydriver.cuDeviceTotalMem(&numbytes, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], numbytes) @@ -23191,7 +23203,8 @@ def cuDeviceGetTexture1DLinearMaxWidth(pformat not None : CUarray_format, unsign cydev = pdev cdef size_t maxWidthInElements = 0 cdef cydriver.CUarray_format cypformat = pformat.value - err = cydriver.cuDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cypformat, numChannels, cydev) + with nogil: + err = cydriver.cuDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cypformat, numChannels, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], maxWidthInElements) @@ -23636,7 +23649,8 @@ def cuDeviceGetAttribute(attrib not None : CUdevice_attribute, dev): cydev = pdev cdef int pi = 0 cdef cydriver.CUdevice_attribute cyattrib = attrib.value - err = cydriver.cuDeviceGetAttribute(&pi, cyattrib, cydev) + with nogil: + err = cydriver.cuDeviceGetAttribute(&pi, cyattrib, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pi) @@ -23735,7 +23749,8 @@ def cuDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, dev, int flags): cydev = pdev cynvSciSyncAttrList = utils.HelperInputVoidPtr(nvSciSyncAttrList) cdef void* cynvSciSyncAttrList_ptr = cynvSciSyncAttrList.cptr - err = cydriver.cuDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, cydev, flags) + with nogil: + err = cydriver.cuDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, cydev, flags) return (_dict_CUresult[err],) {{endif}} @@ -23786,7 +23801,8 @@ def cuDeviceSetMemPool(dev, pool): else: pdev = int(CUdevice(dev)) cydev = pdev - err = cydriver.cuDeviceSetMemPool(cydev, cypool) + with nogil: + err = cydriver.cuDeviceSetMemPool(cydev, cypool) return (_dict_CUresult[err],) {{endif}} @@ -23827,7 +23843,8 @@ def cuDeviceGetMemPool(dev): pdev = int(CUdevice(dev)) cydev = pdev cdef CUmemoryPool pool = CUmemoryPool() - err = cydriver.cuDeviceGetMemPool(pool._pvt_ptr, cydev) + with nogil: + err = cydriver.cuDeviceGetMemPool(pool._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pool) @@ -23867,7 +23884,8 @@ def cuDeviceGetDefaultMemPool(dev): pdev = int(CUdevice(dev)) cydev = pdev cdef CUmemoryPool pool_out = CUmemoryPool() - err = cydriver.cuDeviceGetDefaultMemPool(pool_out._pvt_ptr, cydev) + with nogil: + err = cydriver.cuDeviceGetDefaultMemPool(pool_out._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pool_out) @@ -23914,7 +23932,8 @@ def cuDeviceGetExecAffinitySupport(typename not None : CUexecAffinityType, dev): cydev = pdev cdef int pi = 0 cdef cydriver.CUexecAffinityType cytypename = typename.value - err = cydriver.cuDeviceGetExecAffinitySupport(&pi, cytypename, cydev) + with nogil: + err = cydriver.cuDeviceGetExecAffinitySupport(&pi, cytypename, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pi) @@ -23966,7 +23985,8 @@ def cuFlushGPUDirectRDMAWrites(target not None : CUflushGPUDirectRDMAWritesTarge """ cdef cydriver.CUflushGPUDirectRDMAWritesTarget cytarget = target.value cdef cydriver.CUflushGPUDirectRDMAWritesScope cyscope = scope.value - err = cydriver.cuFlushGPUDirectRDMAWrites(cytarget, cyscope) + with nogil: + err = cydriver.cuFlushGPUDirectRDMAWrites(cytarget, cyscope) return (_dict_CUresult[err],) {{endif}} @@ -24043,7 +24063,8 @@ def cuDeviceGetProperties(dev): pdev = int(CUdevice(dev)) cydev = pdev cdef CUdevprop prop = CUdevprop() - err = cydriver.cuDeviceGetProperties(prop._pvt_ptr, cydev) + with nogil: + err = cydriver.cuDeviceGetProperties(prop._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], prop) @@ -24091,7 +24112,8 @@ def cuDeviceComputeCapability(dev): cydev = pdev cdef int major = 0 cdef int minor = 0 - err = cydriver.cuDeviceComputeCapability(&major, &minor, cydev) + with nogil: + err = cydriver.cuDeviceComputeCapability(&major, &minor, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], major, minor) @@ -24148,7 +24170,8 @@ def cuDevicePrimaryCtxRetain(dev): pdev = int(CUdevice(dev)) cydev = pdev cdef CUcontext pctx = CUcontext() - err = cydriver.cuDevicePrimaryCtxRetain(pctx._pvt_ptr, cydev) + with nogil: + err = cydriver.cuDevicePrimaryCtxRetain(pctx._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pctx) @@ -24195,7 +24218,8 @@ def cuDevicePrimaryCtxRelease(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - err = cydriver.cuDevicePrimaryCtxRelease(cydev) + with nogil: + err = cydriver.cuDevicePrimaryCtxRelease(cydev) return (_dict_CUresult[err],) {{endif}} @@ -24308,7 +24332,8 @@ def cuDevicePrimaryCtxSetFlags(dev, unsigned int flags): else: pdev = int(CUdevice(dev)) cydev = pdev - err = cydriver.cuDevicePrimaryCtxSetFlags(cydev, flags) + with nogil: + err = cydriver.cuDevicePrimaryCtxSetFlags(cydev, flags) return (_dict_CUresult[err],) {{endif}} @@ -24350,7 +24375,8 @@ def cuDevicePrimaryCtxGetState(dev): cydev = pdev cdef unsigned int flags = 0 cdef int active = 0 - err = cydriver.cuDevicePrimaryCtxGetState(cydev, &flags, &active) + with nogil: + err = cydriver.cuDevicePrimaryCtxGetState(cydev, &flags, &active) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], flags, active) @@ -24396,7 +24422,8 @@ def cuDevicePrimaryCtxReset(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - err = cydriver.cuDevicePrimaryCtxReset(cydev) + with nogil: + err = cydriver.cuDevicePrimaryCtxReset(cydev) return (_dict_CUresult[err],) {{endif}} @@ -24538,7 +24565,8 @@ def cuCtxCreate(unsigned int flags, dev): pdev = int(CUdevice(dev)) cydev = pdev cdef CUcontext pctx = CUcontext() - err = cydriver.cuCtxCreate(pctx._pvt_ptr, flags, cydev) + with nogil: + err = cydriver.cuCtxCreate(pctx._pvt_ptr, flags, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pctx) @@ -24696,14 +24724,17 @@ def cuCtxCreate_v3(paramsArray : Optional[Tuple[CUexecAffinityParam] | List[CUex raise TypeError("Argument 'paramsArray' is not instance of type (expected Tuple[cydriver.CUexecAffinityParam,] or List[cydriver.CUexecAffinityParam,]") cdef CUcontext pctx = CUcontext() cdef cydriver.CUexecAffinityParam* cyparamsArray = NULL - if len(paramsArray) > 0: + if len(paramsArray) > 1: cyparamsArray = calloc(len(paramsArray), sizeof(cydriver.CUexecAffinityParam)) if cyparamsArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUexecAffinityParam))) for idx in range(len(paramsArray)): string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cydriver.CUexecAffinityParam)) - err = cydriver.cuCtxCreate_v3(pctx._pvt_ptr, (paramsArray[0])._pvt_ptr if len(paramsArray) == 1 else cyparamsArray, numParams, flags, cydev) - if cyparamsArray is not NULL: + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr + with nogil: + err = cydriver.cuCtxCreate_v3(pctx._pvt_ptr, cyparamsArray, numParams, flags, cydev) + if len(paramsArray) > 1 and cyparamsArray is not NULL: free(cyparamsArray) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -24886,7 +24917,8 @@ def cuCtxCreate_v4(ctxCreateParams : Optional[CUctxCreateParams], unsigned int f cydev = pdev cdef CUcontext pctx = CUcontext() cdef cydriver.CUctxCreateParams* cyctxCreateParams_ptr = ctxCreateParams._pvt_ptr if ctxCreateParams != None else NULL - err = cydriver.cuCtxCreate_v4(pctx._pvt_ptr, cyctxCreateParams_ptr, flags, cydev) + with nogil: + err = cydriver.cuCtxCreate_v4(pctx._pvt_ptr, cyctxCreateParams_ptr, flags, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pctx) @@ -24950,7 +24982,8 @@ def cuCtxDestroy(ctx): else: pctx = int(CUcontext(ctx)) cyctx = pctx - err = cydriver.cuCtxDestroy(cyctx) + with nogil: + err = cydriver.cuCtxDestroy(cyctx) return (_dict_CUresult[err],) {{endif}} @@ -24990,7 +25023,8 @@ def cuCtxPushCurrent(ctx): else: pctx = int(CUcontext(ctx)) cyctx = pctx - err = cydriver.cuCtxPushCurrent(cyctx) + with nogil: + err = cydriver.cuCtxPushCurrent(cyctx) return (_dict_CUresult[err],) {{endif}} @@ -25020,7 +25054,8 @@ def cuCtxPopCurrent(): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize` """ cdef CUcontext pctx = CUcontext() - err = cydriver.cuCtxPopCurrent(pctx._pvt_ptr) + with nogil: + err = cydriver.cuCtxPopCurrent(pctx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pctx) @@ -25064,7 +25099,8 @@ def cuCtxSetCurrent(ctx): else: pctx = int(CUcontext(ctx)) cyctx = pctx - err = cydriver.cuCtxSetCurrent(cyctx) + with nogil: + err = cydriver.cuCtxSetCurrent(cyctx) return (_dict_CUresult[err],) {{endif}} @@ -25090,7 +25126,8 @@ def cuCtxGetCurrent(): :py:obj:`~.cuCtxSetCurrent`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cudaGetDevice` """ cdef CUcontext pctx = CUcontext() - err = cydriver.cuCtxGetCurrent(pctx._pvt_ptr) + with nogil: + err = cydriver.cuCtxGetCurrent(pctx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pctx) @@ -25116,7 +25153,8 @@ def cuCtxGetDevice(): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaGetDevice` """ cdef CUdevice device = CUdevice() - err = cydriver.cuCtxGetDevice(device._pvt_ptr) + with nogil: + err = cydriver.cuCtxGetDevice(device._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], device) @@ -25143,7 +25181,8 @@ def cuCtxGetFlags(): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetCurrent`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuCtxSetFlags`, :py:obj:`~.cudaGetDeviceFlags` """ cdef unsigned int flags = 0 - err = cydriver.cuCtxGetFlags(&flags) + with nogil: + err = cydriver.cuCtxGetFlags(&flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], flags) @@ -25172,7 +25211,8 @@ def cuCtxSetFlags(unsigned int flags): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetCurrent`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cuDevicePrimaryCtxSetFlags`, """ - err = cydriver.cuCtxSetFlags(flags) + with nogil: + err = cydriver.cuCtxSetFlags(flags) return (_dict_CUresult[err],) {{endif}} @@ -25212,7 +25252,8 @@ def cuCtxGetId(ctx): pctx = int(CUcontext(ctx)) cyctx = pctx cdef unsigned long long ctxId = 0 - err = cydriver.cuCtxGetId(cyctx, &ctxId) + with nogil: + err = cydriver.cuCtxGetId(cyctx, &ctxId) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], ctxId) @@ -25241,7 +25282,8 @@ def cuCtxSynchronize(): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cudaDeviceSynchronize` """ - err = cydriver.cuCtxSynchronize() + with nogil: + err = cydriver.cuCtxSynchronize() return (_dict_CUresult[err],) {{endif}} @@ -25347,7 +25389,8 @@ def cuCtxSetLimit(limit not None : CUlimit, size_t value): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceSetLimit` """ cdef cydriver.CUlimit cylimit = limit.value - err = cydriver.cuCtxSetLimit(cylimit, value) + with nogil: + err = cydriver.cuCtxSetLimit(cylimit, value) return (_dict_CUresult[err],) {{endif}} @@ -25403,7 +25446,8 @@ def cuCtxGetLimit(limit not None : CUlimit): """ cdef size_t pvalue = 0 cdef cydriver.CUlimit cylimit = limit.value - err = cydriver.cuCtxGetLimit(&pvalue, cylimit) + with nogil: + err = cydriver.cuCtxGetLimit(&pvalue, cylimit) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pvalue) @@ -25450,7 +25494,8 @@ def cuCtxGetCacheConfig(): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig` """ cdef cydriver.CUfunc_cache pconfig - err = cydriver.cuCtxGetCacheConfig(&pconfig) + with nogil: + err = cydriver.cuCtxGetCacheConfig(&pconfig) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUfunc_cache(pconfig)) @@ -25509,7 +25554,8 @@ def cuCtxSetCacheConfig(config not None : CUfunc_cache): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cuKernelSetCacheConfig` """ cdef cydriver.CUfunc_cache cyconfig = config.value - err = cydriver.cuCtxSetCacheConfig(cyconfig) + with nogil: + err = cydriver.cuCtxSetCacheConfig(cyconfig) return (_dict_CUresult[err],) {{endif}} @@ -25554,7 +25600,8 @@ def cuCtxGetApiVersion(ctx): pctx = int(CUcontext(ctx)) cyctx = pctx cdef unsigned int version = 0 - err = cydriver.cuCtxGetApiVersion(cyctx, &version) + with nogil: + err = cydriver.cuCtxGetApiVersion(cyctx, &version) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], version) @@ -25599,7 +25646,8 @@ def cuCtxGetStreamPriorityRange(): """ cdef int leastPriority = 0 cdef int greatestPriority = 0 - err = cydriver.cuCtxGetStreamPriorityRange(&leastPriority, &greatestPriority) + with nogil: + err = cydriver.cuCtxGetStreamPriorityRange(&leastPriority, &greatestPriority) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], leastPriority, greatestPriority) @@ -25623,7 +25671,8 @@ def cuCtxResetPersistingL2Cache(): -------- :py:obj:`~.CUaccessPolicyWindow` """ - err = cydriver.cuCtxResetPersistingL2Cache() + with nogil: + err = cydriver.cuCtxResetPersistingL2Cache() return (_dict_CUresult[err],) {{endif}} @@ -25657,7 +25706,8 @@ def cuCtxGetExecAffinity(typename not None : CUexecAffinityType): """ cdef CUexecAffinityParam pExecAffinity = CUexecAffinityParam() cdef cydriver.CUexecAffinityType cytypename = typename.value - err = cydriver.cuCtxGetExecAffinity(pExecAffinity._pvt_ptr, cytypename) + with nogil: + err = cydriver.cuCtxGetExecAffinity(pExecAffinity._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pExecAffinity) @@ -25718,7 +25768,8 @@ def cuCtxRecordEvent(hCtx, hEvent): else: phCtx = int(CUcontext(hCtx)) cyhCtx = phCtx - err = cydriver.cuCtxRecordEvent(cyhCtx, cyhEvent) + with nogil: + err = cydriver.cuCtxRecordEvent(cyhCtx, cyhEvent) return (_dict_CUresult[err],) {{endif}} @@ -25776,7 +25827,8 @@ def cuCtxWaitEvent(hCtx, hEvent): else: phCtx = int(CUcontext(hCtx)) cyhCtx = phCtx - err = cydriver.cuCtxWaitEvent(cyhCtx, cyhEvent) + with nogil: + err = cydriver.cuCtxWaitEvent(cyhCtx, cyhEvent) return (_dict_CUresult[err],) {{endif}} @@ -25814,7 +25866,8 @@ def cuCtxAttach(unsigned int flags): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxDetach`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize` """ cdef CUcontext pctx = CUcontext() - err = cydriver.cuCtxAttach(pctx._pvt_ptr, flags) + with nogil: + err = cydriver.cuCtxAttach(pctx._pvt_ptr, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pctx) @@ -25857,7 +25910,8 @@ def cuCtxDetach(ctx): else: pctx = int(CUcontext(ctx)) cyctx = pctx - err = cydriver.cuCtxDetach(cyctx) + with nogil: + err = cydriver.cuCtxDetach(cyctx) return (_dict_CUresult[err],) {{endif}} @@ -25897,7 +25951,8 @@ def cuCtxGetSharedMemConfig(): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig` """ cdef cydriver.CUsharedconfig pConfig - err = cydriver.cuCtxGetSharedMemConfig(&pConfig) + with nogil: + err = cydriver.cuCtxGetSharedMemConfig(&pConfig) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUsharedconfig(pConfig)) @@ -25953,7 +26008,8 @@ def cuCtxSetSharedMemConfig(config not None : CUsharedconfig): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceSetSharedMemConfig` """ cdef cydriver.CUsharedconfig cyconfig = config.value - err = cydriver.cuCtxSetSharedMemConfig(cyconfig) + with nogil: + err = cydriver.cuCtxSetSharedMemConfig(cyconfig) return (_dict_CUresult[err],) {{endif}} @@ -25989,7 +26045,8 @@ def cuModuleLoad(char* fname): :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload` """ cdef CUmodule module = CUmodule() - err = cydriver.cuModuleLoad(module._pvt_ptr, fname) + with nogil: + err = cydriver.cuModuleLoad(module._pvt_ptr, fname) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], module) @@ -26025,7 +26082,8 @@ def cuModuleLoadData(image): cdef CUmodule module = CUmodule() cyimage = utils.HelperInputVoidPtr(image) cdef void* cyimage_ptr = cyimage.cptr - err = cydriver.cuModuleLoadData(module._pvt_ptr, cyimage_ptr) + with nogil: + err = cydriver.cuModuleLoadData(module._pvt_ptr, cyimage_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], module) @@ -26076,7 +26134,9 @@ def cuModuleLoadDataEx(image, unsigned int numOptions, options : Optional[Tuple[ cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)] pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist) - err = cydriver.cuModuleLoadDataEx(module._pvt_ptr, cyimage_ptr, numOptions, cyoptions.data(), voidStarHelperoptionValues.cptr) + cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr + with nogil: + err = cydriver.cuModuleLoadDataEx(module._pvt_ptr, cyimage_ptr, numOptions, cyoptions.data(), cyoptionValues_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], module) @@ -26118,7 +26178,8 @@ def cuModuleLoadFatBinary(fatCubin): cdef CUmodule module = CUmodule() cyfatCubin = utils.HelperInputVoidPtr(fatCubin) cdef void* cyfatCubin_ptr = cyfatCubin.cptr - err = cydriver.cuModuleLoadFatBinary(module._pvt_ptr, cyfatCubin_ptr) + with nogil: + err = cydriver.cuModuleLoadFatBinary(module._pvt_ptr, cyfatCubin_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], module) @@ -26157,7 +26218,8 @@ def cuModuleUnload(hmod): else: phmod = int(CUmodule(hmod)) cyhmod = phmod - err = cydriver.cuModuleUnload(cyhmod) + with nogil: + err = cydriver.cuModuleUnload(cyhmod) return (_dict_CUresult[err],) {{endif}} @@ -26182,7 +26244,8 @@ def cuModuleGetLoadingMode(): :py:obj:`~.cuModuleLoad`, """ cdef cydriver.CUmoduleLoadingMode mode - err = cydriver.cuModuleGetLoadingMode(&mode) + with nogil: + err = cydriver.cuModuleGetLoadingMode(&mode) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUmoduleLoadingMode(mode)) @@ -26226,7 +26289,8 @@ def cuModuleGetFunction(hmod, char* name): phmod = int(CUmodule(hmod)) cyhmod = phmod cdef CUfunction hfunc = CUfunction() - err = cydriver.cuModuleGetFunction(hfunc._pvt_ptr, cyhmod, name) + with nogil: + err = cydriver.cuModuleGetFunction(hfunc._pvt_ptr, cyhmod, name) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], hfunc) @@ -26261,7 +26325,8 @@ def cuModuleGetFunctionCount(mod): pmod = int(CUmodule(mod)) cymod = pmod cdef unsigned int count = 0 - err = cydriver.cuModuleGetFunctionCount(&count, cymod) + with nogil: + err = cydriver.cuModuleGetFunctionCount(&count, cymod) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], count) @@ -26315,7 +26380,8 @@ def cuModuleEnumerateFunctions(unsigned int numFunctions, mod): cyfunctions = calloc(numFunctions, sizeof(cydriver.CUfunction)) if cyfunctions is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(numFunctions) + 'x' + str(sizeof(cydriver.CUfunction))) - err = cydriver.cuModuleEnumerateFunctions(cyfunctions, numFunctions, cymod) + with nogil: + err = cydriver.cuModuleEnumerateFunctions(cyfunctions, numFunctions, cymod) if CUresult(err) == CUresult(0): pyfunctions = [CUfunction(init_value=cyfunctions[idx]) for idx in range(numFunctions)] if cyfunctions is not NULL: @@ -26367,7 +26433,8 @@ def cuModuleGetGlobal(hmod, char* name): cyhmod = phmod cdef CUdeviceptr dptr = CUdeviceptr() cdef size_t numbytes = 0 - err = cydriver.cuModuleGetGlobal(dptr._pvt_ptr, &numbytes, cyhmod, name) + with nogil: + err = cydriver.cuModuleGetGlobal(dptr._pvt_ptr, &numbytes, cyhmod, name) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], dptr, numbytes) @@ -26435,8 +26502,10 @@ def cuLinkCreate(unsigned int numOptions, options : Optional[Tuple[CUjit_option] cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)] pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr cdef CUlinkState stateOut = CUlinkState() - err = cydriver.cuLinkCreate(numOptions, cyoptions.data(), voidStarHelperoptionValues.cptr, stateOut._pvt_ptr) + with nogil: + err = cydriver.cuLinkCreate(numOptions, cyoptions.data(), cyoptionValues_ptr, stateOut._pvt_ptr) stateOut._keepalive.append(voidStarHelperoptionValues) for option in pylist: stateOut._keepalive.append(option) @@ -26513,7 +26582,9 @@ def cuLinkAddData(state, typename not None : CUjitInputType, data, size_t size, cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)] pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist) - err = cydriver.cuLinkAddData(cystate, cytypename, cydata_ptr, size, name, numOptions, cyoptions.data(), voidStarHelperoptionValues.cptr) + cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr + with nogil: + err = cydriver.cuLinkAddData(cystate, cytypename, cydata_ptr, size, name, numOptions, cyoptions.data(), cyoptionValues_ptr) return (_dict_CUresult[err],) {{endif}} @@ -26581,7 +26652,9 @@ def cuLinkAddFile(state, typename not None : CUjitInputType, char* path, unsigne cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)] pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist) - err = cydriver.cuLinkAddFile(cystate, cytypename, path, numOptions, cyoptions.data(), voidStarHelperoptionValues.cptr) + cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr + with nogil: + err = cydriver.cuLinkAddFile(cystate, cytypename, path, numOptions, cyoptions.data(), cyoptionValues_ptr) return (_dict_CUresult[err],) {{endif}} @@ -26625,7 +26698,8 @@ def cuLinkComplete(state): cystate = pstate cdef void_ptr cubinOut = 0 cdef size_t sizeOut = 0 - err = cydriver.cuLinkComplete(cystate, &cubinOut, &sizeOut) + with nogil: + err = cydriver.cuLinkComplete(cystate, &cubinOut, &sizeOut) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], cubinOut, sizeOut) @@ -26659,7 +26733,8 @@ def cuLinkDestroy(state): else: pstate = int(CUlinkState(state)) cystate = pstate - err = cydriver.cuLinkDestroy(cystate) + with nogil: + err = cydriver.cuLinkDestroy(cystate) return (_dict_CUresult[err],) {{endif}} @@ -26705,7 +26780,8 @@ def cuModuleGetTexRef(hmod, char* name): phmod = int(CUmodule(hmod)) cyhmod = phmod cdef CUtexref pTexRef = CUtexref() - err = cydriver.cuModuleGetTexRef(pTexRef._pvt_ptr, cyhmod, name) + with nogil: + err = cydriver.cuModuleGetTexRef(pTexRef._pvt_ptr, cyhmod, name) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pTexRef) @@ -26751,7 +26827,8 @@ def cuModuleGetSurfRef(hmod, char* name): phmod = int(CUmodule(hmod)) cyhmod = phmod cdef CUsurfref pSurfRef = CUsurfref() - err = cydriver.cuModuleGetSurfRef(pSurfRef._pvt_ptr, cyhmod, name) + with nogil: + err = cydriver.cuModuleGetSurfRef(pSurfRef._pvt_ptr, cyhmod, name) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pSurfRef) @@ -26839,14 +26916,17 @@ def cuLibraryLoadData(code, jitOptions : Optional[Tuple[CUjit_option] | List[CUj cdef vector[cydriver.CUjit_option] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)] pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) cdef vector[cydriver.CUlibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)] pylist = [utils.HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) - err = cydriver.cuLibraryLoadData(library._pvt_ptr, cycode_ptr, cyjitOptions.data(), voidStarHelperjitOptionsValues.cptr, numJitOptions, cylibraryOptions.data(), voidStarHelperlibraryOptionValues.cptr, numLibraryOptions) + with nogil: + err = cydriver.cuLibraryLoadData(library._pvt_ptr, cycode_ptr, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], library) @@ -26933,14 +27013,17 @@ def cuLibraryLoadFromFile(char* fileName, jitOptions : Optional[Tuple[CUjit_opti cdef vector[cydriver.CUjit_option] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)] pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) cdef vector[cydriver.CUlibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)] pylist = [utils.HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) - err = cydriver.cuLibraryLoadFromFile(library._pvt_ptr, fileName, cyjitOptions.data(), voidStarHelperjitOptionsValues.cptr, numJitOptions, cylibraryOptions.data(), voidStarHelperlibraryOptionValues.cptr, numLibraryOptions) + with nogil: + err = cydriver.cuLibraryLoadFromFile(library._pvt_ptr, fileName, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], library) @@ -26976,7 +27059,8 @@ def cuLibraryUnload(library): else: plibrary = int(CUlibrary(library)) cylibrary = plibrary - err = cydriver.cuLibraryUnload(cylibrary) + with nogil: + err = cydriver.cuLibraryUnload(cylibrary) return (_dict_CUresult[err],) {{endif}} @@ -27017,7 +27101,8 @@ def cuLibraryGetKernel(library, char* name): plibrary = int(CUlibrary(library)) cylibrary = plibrary cdef CUkernel pKernel = CUkernel() - err = cydriver.cuLibraryGetKernel(pKernel._pvt_ptr, cylibrary, name) + with nogil: + err = cydriver.cuLibraryGetKernel(pKernel._pvt_ptr, cylibrary, name) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pKernel) @@ -27052,7 +27137,8 @@ def cuLibraryGetKernelCount(lib): plib = int(CUlibrary(lib)) cylib = plib cdef unsigned int count = 0 - err = cydriver.cuLibraryGetKernelCount(&count, cylib) + with nogil: + err = cydriver.cuLibraryGetKernelCount(&count, cylib) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], count) @@ -27100,7 +27186,8 @@ def cuLibraryEnumerateKernels(unsigned int numKernels, lib): cykernels = calloc(numKernels, sizeof(cydriver.CUkernel)) if cykernels is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cydriver.CUkernel))) - err = cydriver.cuLibraryEnumerateKernels(cykernels, numKernels, cylib) + with nogil: + err = cydriver.cuLibraryEnumerateKernels(cykernels, numKernels, cylib) if CUresult(err) == CUresult(0): pykernels = [CUkernel(init_value=cykernels[idx]) for idx in range(numKernels)] if cykernels is not NULL: @@ -27145,7 +27232,8 @@ def cuLibraryGetModule(library): plibrary = int(CUlibrary(library)) cylibrary = plibrary cdef CUmodule pMod = CUmodule() - err = cydriver.cuLibraryGetModule(pMod._pvt_ptr, cylibrary) + with nogil: + err = cydriver.cuLibraryGetModule(pMod._pvt_ptr, cylibrary) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pMod) @@ -27186,7 +27274,8 @@ def cuKernelGetFunction(kernel): pkernel = int(CUkernel(kernel)) cykernel = pkernel cdef CUfunction pFunc = CUfunction() - err = cydriver.cuKernelGetFunction(pFunc._pvt_ptr, cykernel) + with nogil: + err = cydriver.cuKernelGetFunction(pFunc._pvt_ptr, cykernel) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pFunc) @@ -27226,7 +27315,8 @@ def cuKernelGetLibrary(kernel): pkernel = int(CUkernel(kernel)) cykernel = pkernel cdef CUlibrary pLib = CUlibrary() - err = cydriver.cuKernelGetLibrary(pLib._pvt_ptr, cykernel) + with nogil: + err = cydriver.cuKernelGetLibrary(pLib._pvt_ptr, cykernel) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pLib) @@ -27274,7 +27364,8 @@ def cuLibraryGetGlobal(library, char* name): cylibrary = plibrary cdef CUdeviceptr dptr = CUdeviceptr() cdef size_t numbytes = 0 - err = cydriver.cuLibraryGetGlobal(dptr._pvt_ptr, &numbytes, cylibrary, name) + with nogil: + err = cydriver.cuLibraryGetGlobal(dptr._pvt_ptr, &numbytes, cylibrary, name) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], dptr, numbytes) @@ -27324,7 +27415,8 @@ def cuLibraryGetManaged(library, char* name): cylibrary = plibrary cdef CUdeviceptr dptr = CUdeviceptr() cdef size_t numbytes = 0 - err = cydriver.cuLibraryGetManaged(dptr._pvt_ptr, &numbytes, cylibrary, name) + with nogil: + err = cydriver.cuLibraryGetManaged(dptr._pvt_ptr, &numbytes, cylibrary, name) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], dptr, numbytes) @@ -27370,7 +27462,8 @@ def cuLibraryGetUnifiedFunction(library, char* symbol): plibrary = int(CUlibrary(library)) cylibrary = plibrary cdef void_ptr fptr = 0 - err = cydriver.cuLibraryGetUnifiedFunction(&fptr, cylibrary, symbol) + with nogil: + err = cydriver.cuLibraryGetUnifiedFunction(&fptr, cylibrary, symbol) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], fptr) @@ -27504,7 +27597,8 @@ def cuKernelGetAttribute(attrib not None : CUfunction_attribute, kernel, dev): cykernel = pkernel cdef int pi = 0 cdef cydriver.CUfunction_attribute cyattrib = attrib.value - err = cydriver.cuKernelGetAttribute(&pi, cyattrib, cykernel, cydev) + with nogil: + err = cydriver.cuKernelGetAttribute(&pi, cyattrib, cykernel, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pi) @@ -27620,7 +27714,8 @@ def cuKernelSetAttribute(attrib not None : CUfunction_attribute, int val, kernel pkernel = int(CUkernel(kernel)) cykernel = pkernel cdef cydriver.CUfunction_attribute cyattrib = attrib.value - err = cydriver.cuKernelSetAttribute(cyattrib, val, cykernel, cydev) + with nogil: + err = cydriver.cuKernelSetAttribute(cyattrib, val, cykernel, cydev) return (_dict_CUresult[err],) {{endif}} @@ -27703,7 +27798,8 @@ def cuKernelSetCacheConfig(kernel, config not None : CUfunc_cache, dev): pkernel = int(CUkernel(kernel)) cykernel = pkernel cdef cydriver.CUfunc_cache cyconfig = config.value - err = cydriver.cuKernelSetCacheConfig(cykernel, cyconfig, cydev) + with nogil: + err = cydriver.cuKernelSetCacheConfig(cykernel, cyconfig, cydev) return (_dict_CUresult[err],) {{endif}} @@ -27742,7 +27838,8 @@ def cuKernelGetName(hfunc): phfunc = int(CUkernel(hfunc)) cyhfunc = phfunc cdef const char* name = NULL - err = cydriver.cuKernelGetName(&name, cyhfunc) + with nogil: + err = cydriver.cuKernelGetName(&name, cyhfunc) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], name if name != NULL else None) @@ -27796,7 +27893,8 @@ def cuKernelGetParamInfo(kernel, size_t paramIndex): cykernel = pkernel cdef size_t paramOffset = 0 cdef size_t paramSize = 0 - err = cydriver.cuKernelGetParamInfo(cykernel, paramIndex, ¶mOffset, ¶mSize) + with nogil: + err = cydriver.cuKernelGetParamInfo(cykernel, paramIndex, ¶mOffset, ¶mSize) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], paramOffset, paramSize) @@ -27840,7 +27938,8 @@ def cuMemGetInfo(): """ cdef size_t free = 0 cdef size_t total = 0 - err = cydriver.cuMemGetInfo(&free, &total) + with nogil: + err = cydriver.cuMemGetInfo(&free, &total) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], free, total) @@ -27875,7 +27974,8 @@ def cuMemAlloc(size_t bytesize): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMalloc` """ cdef CUdeviceptr dptr = CUdeviceptr() - err = cydriver.cuMemAlloc(dptr._pvt_ptr, bytesize) + with nogil: + err = cydriver.cuMemAlloc(dptr._pvt_ptr, bytesize) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], dptr) @@ -27943,7 +28043,8 @@ def cuMemAllocPitch(size_t WidthInBytes, size_t Height, unsigned int ElementSize """ cdef CUdeviceptr dptr = CUdeviceptr() cdef size_t pPitch = 0 - err = cydriver.cuMemAllocPitch(dptr._pvt_ptr, &pPitch, WidthInBytes, Height, ElementSizeBytes) + with nogil: + err = cydriver.cuMemAllocPitch(dptr._pvt_ptr, &pPitch, WidthInBytes, Height, ElementSizeBytes) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], dptr, pPitch) @@ -27992,7 +28093,8 @@ def cuMemFree(dptr): else: pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr - err = cydriver.cuMemFree(cydptr) + with nogil: + err = cydriver.cuMemFree(cydptr) return (_dict_CUresult[err],) {{endif}} @@ -28035,7 +28137,8 @@ def cuMemGetAddressRange(dptr): cydptr = pdptr cdef CUdeviceptr pbase = CUdeviceptr() cdef size_t psize = 0 - err = cydriver.cuMemGetAddressRange(pbase._pvt_ptr, &psize, cydptr) + with nogil: + err = cydriver.cuMemGetAddressRange(pbase._pvt_ptr, &psize, cydptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], pbase, psize) @@ -28091,7 +28194,8 @@ def cuMemAllocHost(size_t bytesize): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMallocHost` """ cdef void_ptr pp = 0 - err = cydriver.cuMemAllocHost(&pp, bytesize) + with nogil: + err = cydriver.cuMemAllocHost(&pp, bytesize) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pp) @@ -28122,7 +28226,8 @@ def cuMemFreeHost(p): """ cyp = utils.HelperInputVoidPtr(p) cdef void* cyp_ptr = cyp.cptr - err = cydriver.cuMemFreeHost(cyp_ptr) + with nogil: + err = cydriver.cuMemFreeHost(cyp_ptr) return (_dict_CUresult[err],) {{endif}} @@ -28212,7 +28317,8 @@ def cuMemHostAlloc(size_t bytesize, unsigned int Flags): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaHostAlloc` """ cdef void_ptr pp = 0 - err = cydriver.cuMemHostAlloc(&pp, bytesize, Flags) + with nogil: + err = cydriver.cuMemHostAlloc(&pp, bytesize, Flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pp) @@ -28274,7 +28380,8 @@ def cuMemHostGetDevicePointer(p, unsigned int Flags): cdef CUdeviceptr pdptr = CUdeviceptr() cyp = utils.HelperInputVoidPtr(p) cdef void* cyp_ptr = cyp.cptr - err = cydriver.cuMemHostGetDevicePointer(pdptr._pvt_ptr, cyp_ptr, Flags) + with nogil: + err = cydriver.cuMemHostGetDevicePointer(pdptr._pvt_ptr, cyp_ptr, Flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pdptr) @@ -28312,7 +28419,8 @@ def cuMemHostGetFlags(p): cdef unsigned int pFlags = 0 cyp = utils.HelperInputVoidPtr(p) cdef void* cyp_ptr = cyp.cptr - err = cydriver.cuMemHostGetFlags(&pFlags, cyp_ptr) + with nogil: + err = cydriver.cuMemHostGetFlags(&pFlags, cyp_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pFlags) @@ -28448,7 +28556,8 @@ def cuMemAllocManaged(size_t bytesize, unsigned int flags): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuStreamAttachMemAsync`, :py:obj:`~.cudaMallocManaged` """ cdef CUdeviceptr dptr = CUdeviceptr() - err = cydriver.cuMemAllocManaged(dptr._pvt_ptr, bytesize, flags) + with nogil: + err = cydriver.cuMemAllocManaged(dptr._pvt_ptr, bytesize, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], dptr) @@ -28540,7 +28649,6 @@ def cuDeviceRegisterAsyncNotification(device, callbackFunc, userData): cdef CUasyncCallbackHandle callback = CUasyncCallbackHandle() with nogil: err = cydriver.cuDeviceRegisterAsyncNotification(cydevice, cuAsyncNotificationCallbackWrapper, cbData, callback._pvt_ptr) - if err != cydriver.CUDA_SUCCESS: free(cbData) else: @@ -28592,7 +28700,8 @@ def cuDeviceUnregisterAsyncNotification(device, callback): else: pdevice = int(CUdevice(device)) cydevice = pdevice - err = cydriver.cuDeviceUnregisterAsyncNotification(cydevice, cycallback) + with nogil: + err = cydriver.cuDeviceUnregisterAsyncNotification(cydevice, cycallback) if err == cydriver.CUDA_SUCCESS: free(m_global._allocated[pcallback]) m_global._allocated.erase(pcallback) @@ -28627,7 +28736,8 @@ def cuDeviceGetByPCIBusId(char* pciBusId): :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetPCIBusId`, :py:obj:`~.cudaDeviceGetByPCIBusId` """ cdef CUdevice dev = CUdevice() - err = cydriver.cuDeviceGetByPCIBusId(dev._pvt_ptr, pciBusId) + with nogil: + err = cydriver.cuDeviceGetByPCIBusId(dev._pvt_ptr, pciBusId) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], dev) @@ -28675,7 +28785,8 @@ def cuDeviceGetPCIBusId(int length, dev): cydev = pdev pypciBusId = b" " * length cdef char* pciBusId = pypciBusId - err = cydriver.cuDeviceGetPCIBusId(pciBusId, length, cydev) + with nogil: + err = cydriver.cuDeviceGetPCIBusId(pciBusId, length, cydev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pypciBusId) @@ -28735,7 +28846,8 @@ def cuIpcGetEventHandle(event): pevent = int(CUevent(event)) cyevent = pevent cdef CUipcEventHandle pHandle = CUipcEventHandle() - err = cydriver.cuIpcGetEventHandle(pHandle._pvt_ptr, cyevent) + with nogil: + err = cydriver.cuIpcGetEventHandle(pHandle._pvt_ptr, cyevent) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pHandle) @@ -28781,7 +28893,8 @@ def cuIpcOpenEventHandle(handle not None : CUipcEventHandle): :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuIpcGetEventHandle`, :py:obj:`~.cuIpcGetMemHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cudaIpcOpenEventHandle` """ cdef CUevent phEvent = CUevent() - err = cydriver.cuIpcOpenEventHandle(phEvent._pvt_ptr, handle._pvt_ptr[0]) + with nogil: + err = cydriver.cuIpcOpenEventHandle(phEvent._pvt_ptr, handle._pvt_ptr[0]) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phEvent) @@ -28836,7 +28949,8 @@ def cuIpcGetMemHandle(dptr): pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr cdef CUipcMemHandle pHandle = CUipcMemHandle() - err = cydriver.cuIpcGetMemHandle(pHandle._pvt_ptr, cydptr) + with nogil: + err = cydriver.cuIpcGetMemHandle(pHandle._pvt_ptr, cydptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pHandle) @@ -28904,7 +29018,8 @@ def cuIpcOpenMemHandle(handle not None : CUipcMemHandle, unsigned int Flags): No guarantees are made about the address returned in `*pdptr`. In particular, multiple processes may not receive the same address for the same `handle`. """ cdef CUdeviceptr pdptr = CUdeviceptr() - err = cydriver.cuIpcOpenMemHandle(pdptr._pvt_ptr, handle._pvt_ptr[0], Flags) + with nogil: + err = cydriver.cuIpcOpenMemHandle(pdptr._pvt_ptr, handle._pvt_ptr[0], Flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pdptr) @@ -28954,7 +29069,8 @@ def cuIpcCloseMemHandle(dptr): else: pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr - err = cydriver.cuIpcCloseMemHandle(cydptr) + with nogil: + err = cydriver.cuIpcCloseMemHandle(cydptr) return (_dict_CUresult[err],) {{endif}} @@ -29058,7 +29174,8 @@ def cuMemHostRegister(p, size_t bytesize, unsigned int Flags): """ cyp = utils.HelperInputVoidPtr(p) cdef void* cyp_ptr = cyp.cptr - err = cydriver.cuMemHostRegister(cyp_ptr, bytesize, Flags) + with nogil: + err = cydriver.cuMemHostRegister(cyp_ptr, bytesize, Flags) return (_dict_CUresult[err],) {{endif}} @@ -29090,7 +29207,8 @@ def cuMemHostUnregister(p): """ cyp = utils.HelperInputVoidPtr(p) cdef void* cyp_ptr = cyp.cptr - err = cydriver.cuMemHostUnregister(cyp_ptr) + with nogil: + err = cydriver.cuMemHostUnregister(cyp_ptr) return (_dict_CUresult[err],) {{endif}} @@ -29141,7 +29259,8 @@ def cuMemcpy(dst, src, size_t ByteCount): else: pdst = int(CUdeviceptr(dst)) cydst = pdst - err = cydriver.cuMemcpy(cydst, cysrc, ByteCount) + with nogil: + err = cydriver.cuMemcpy(cydst, cysrc, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29211,7 +29330,8 @@ def cuMemcpyPeer(dstDevice, dstContext, srcDevice, srcContext, size_t ByteCount) else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemcpyPeer(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount) + with nogil: + err = cydriver.cuMemcpyPeer(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29253,7 +29373,8 @@ def cuMemcpyHtoD(dstDevice, srcHost, size_t ByteCount): cydstDevice = pdstDevice cysrcHost = utils.HelperInputVoidPtr(srcHost) cdef void* cysrcHost_ptr = cysrcHost.cptr - err = cydriver.cuMemcpyHtoD(cydstDevice, cysrcHost_ptr, ByteCount) + with nogil: + err = cydriver.cuMemcpyHtoD(cydstDevice, cysrcHost_ptr, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29295,7 +29416,8 @@ def cuMemcpyDtoH(dstHost, srcDevice, size_t ByteCount): cysrcDevice = psrcDevice cydstHost = utils.HelperInputVoidPtr(dstHost) cdef void* cydstHost_ptr = cydstHost.cptr - err = cydriver.cuMemcpyDtoH(cydstHost_ptr, cysrcDevice, ByteCount) + with nogil: + err = cydriver.cuMemcpyDtoH(cydstHost_ptr, cysrcDevice, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29343,7 +29465,8 @@ def cuMemcpyDtoD(dstDevice, srcDevice, size_t ByteCount): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemcpyDtoD(cydstDevice, cysrcDevice, ByteCount) + with nogil: + err = cydriver.cuMemcpyDtoD(cydstDevice, cysrcDevice, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29394,7 +29517,8 @@ def cuMemcpyDtoA(dstArray, size_t dstOffset, srcDevice, size_t ByteCount): else: pdstArray = int(CUarray(dstArray)) cydstArray = pdstArray - err = cydriver.cuMemcpyDtoA(cydstArray, dstOffset, cysrcDevice, ByteCount) + with nogil: + err = cydriver.cuMemcpyDtoA(cydstArray, dstOffset, cysrcDevice, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29447,7 +29571,8 @@ def cuMemcpyAtoD(dstDevice, srcArray, size_t srcOffset, size_t ByteCount): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemcpyAtoD(cydstDevice, cysrcArray, srcOffset, ByteCount) + with nogil: + err = cydriver.cuMemcpyAtoD(cydstDevice, cysrcArray, srcOffset, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29492,7 +29617,8 @@ def cuMemcpyHtoA(dstArray, size_t dstOffset, srcHost, size_t ByteCount): cydstArray = pdstArray cysrcHost = utils.HelperInputVoidPtr(srcHost) cdef void* cysrcHost_ptr = cysrcHost.cptr - err = cydriver.cuMemcpyHtoA(cydstArray, dstOffset, cysrcHost_ptr, ByteCount) + with nogil: + err = cydriver.cuMemcpyHtoA(cydstArray, dstOffset, cysrcHost_ptr, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29537,7 +29663,8 @@ def cuMemcpyAtoH(dstHost, srcArray, size_t srcOffset, size_t ByteCount): cysrcArray = psrcArray cydstHost = utils.HelperInputVoidPtr(dstHost) cdef void* cydstHost_ptr = cydstHost.cptr - err = cydriver.cuMemcpyAtoH(cydstHost_ptr, cysrcArray, srcOffset, ByteCount) + with nogil: + err = cydriver.cuMemcpyAtoH(cydstHost_ptr, cysrcArray, srcOffset, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29593,7 +29720,8 @@ def cuMemcpyAtoA(dstArray, size_t dstOffset, srcArray, size_t srcOffset, size_t else: pdstArray = int(CUarray(dstArray)) cydstArray = pdstArray - err = cydriver.cuMemcpyAtoA(cydstArray, dstOffset, cysrcArray, srcOffset, ByteCount) + with nogil: + err = cydriver.cuMemcpyAtoA(cydstArray, dstOffset, cysrcArray, srcOffset, ByteCount) return (_dict_CUresult[err],) {{endif}} @@ -29719,7 +29847,8 @@ def cuMemcpy2D(pCopy : Optional[CUDA_MEMCPY2D]): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray` """ cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._pvt_ptr if pCopy != None else NULL - err = cydriver.cuMemcpy2D(cypCopy_ptr) + with nogil: + err = cydriver.cuMemcpy2D(cypCopy_ptr) return (_dict_CUresult[err],) {{endif}} @@ -29845,7 +29974,8 @@ def cuMemcpy2DUnaligned(pCopy : Optional[CUDA_MEMCPY2D]): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray` """ cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._pvt_ptr if pCopy != None else NULL - err = cydriver.cuMemcpy2DUnaligned(cypCopy_ptr) + with nogil: + err = cydriver.cuMemcpy2DUnaligned(cypCopy_ptr) return (_dict_CUresult[err],) {{endif}} @@ -29974,7 +30104,8 @@ def cuMemcpy3D(pCopy : Optional[CUDA_MEMCPY3D]): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy3D` """ cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr = pCopy._pvt_ptr if pCopy != None else NULL - err = cydriver.cuMemcpy3D(cypCopy_ptr) + with nogil: + err = cydriver.cuMemcpy3D(cypCopy_ptr) return (_dict_CUresult[err],) {{endif}} @@ -30003,7 +30134,8 @@ def cuMemcpy3DPeer(pCopy : Optional[CUDA_MEMCPY3D_PEER]): :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpy3DPeer` """ cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr = pCopy._pvt_ptr if pCopy != None else NULL - err = cydriver.cuMemcpy3DPeer(cypCopy_ptr) + with nogil: + err = cydriver.cuMemcpy3DPeer(cypCopy_ptr) return (_dict_CUresult[err],) {{endif}} @@ -30064,7 +30196,8 @@ def cuMemcpyAsync(dst, src, size_t ByteCount, hStream): else: pdst = int(CUdeviceptr(dst)) cydst = pdst - err = cydriver.cuMemcpyAsync(cydst, cysrc, ByteCount, cyhStream) + with nogil: + err = cydriver.cuMemcpyAsync(cydst, cysrc, ByteCount, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30144,7 +30277,8 @@ def cuMemcpyPeerAsync(dstDevice, dstContext, srcDevice, srcContext, size_t ByteC else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemcpyPeerAsync(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount, cyhStream) + with nogil: + err = cydriver.cuMemcpyPeerAsync(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30196,7 +30330,8 @@ def cuMemcpyHtoDAsync(dstDevice, srcHost, size_t ByteCount, hStream): cydstDevice = pdstDevice cysrcHost = utils.HelperInputVoidPtr(srcHost) cdef void* cysrcHost_ptr = cysrcHost.cptr - err = cydriver.cuMemcpyHtoDAsync(cydstDevice, cysrcHost_ptr, ByteCount, cyhStream) + with nogil: + err = cydriver.cuMemcpyHtoDAsync(cydstDevice, cysrcHost_ptr, ByteCount, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30248,7 +30383,8 @@ def cuMemcpyDtoHAsync(dstHost, srcDevice, size_t ByteCount, hStream): cysrcDevice = psrcDevice cydstHost = utils.HelperInputVoidPtr(dstHost) cdef void* cydstHost_ptr = cydstHost.cptr - err = cydriver.cuMemcpyDtoHAsync(cydstHost_ptr, cysrcDevice, ByteCount, cyhStream) + with nogil: + err = cydriver.cuMemcpyDtoHAsync(cydstHost_ptr, cysrcDevice, ByteCount, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30306,7 +30442,8 @@ def cuMemcpyDtoDAsync(dstDevice, srcDevice, size_t ByteCount, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemcpyDtoDAsync(cydstDevice, cysrcDevice, ByteCount, cyhStream) + with nogil: + err = cydriver.cuMemcpyDtoDAsync(cydstDevice, cysrcDevice, ByteCount, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30361,7 +30498,8 @@ def cuMemcpyHtoAAsync(dstArray, size_t dstOffset, srcHost, size_t ByteCount, hSt cydstArray = pdstArray cysrcHost = utils.HelperInputVoidPtr(srcHost) cdef void* cysrcHost_ptr = cysrcHost.cptr - err = cydriver.cuMemcpyHtoAAsync(cydstArray, dstOffset, cysrcHost_ptr, ByteCount, cyhStream) + with nogil: + err = cydriver.cuMemcpyHtoAAsync(cydstArray, dstOffset, cysrcHost_ptr, ByteCount, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30416,7 +30554,8 @@ def cuMemcpyAtoHAsync(dstHost, srcArray, size_t srcOffset, size_t ByteCount, hSt cysrcArray = psrcArray cydstHost = utils.HelperInputVoidPtr(dstHost) cdef void* cydstHost_ptr = cydstHost.cptr - err = cydriver.cuMemcpyAtoHAsync(cydstHost_ptr, cysrcArray, srcOffset, ByteCount, cyhStream) + with nogil: + err = cydriver.cuMemcpyAtoHAsync(cydstHost_ptr, cysrcArray, srcOffset, ByteCount, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30559,7 +30698,8 @@ def cuMemcpy2DAsync(pCopy : Optional[CUDA_MEMCPY2D], hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._pvt_ptr if pCopy != None else NULL - err = cydriver.cuMemcpy2DAsync(cypCopy_ptr, cyhStream) + with nogil: + err = cydriver.cuMemcpy2DAsync(cypCopy_ptr, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30698,7 +30838,8 @@ def cuMemcpy3DAsync(pCopy : Optional[CUDA_MEMCPY3D], hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr = pCopy._pvt_ptr if pCopy != None else NULL - err = cydriver.cuMemcpy3DAsync(cypCopy_ptr, cyhStream) + with nogil: + err = cydriver.cuMemcpy3DAsync(cypCopy_ptr, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30737,7 +30878,8 @@ def cuMemcpy3DPeerAsync(pCopy : Optional[CUDA_MEMCPY3D_PEER], hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr = pCopy._pvt_ptr if pCopy != None else NULL - err = cydriver.cuMemcpy3DPeerAsync(cypCopy_ptr, cyhStream) + with nogil: + err = cydriver.cuMemcpy3DPeerAsync(cypCopy_ptr, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -30875,42 +31017,49 @@ def cuMemcpyBatchAsync(dsts : Optional[Tuple[CUdeviceptr] | List[CUdeviceptr]], if not all(isinstance(_x, (CUdeviceptr,)) for _x in dsts): raise TypeError("Argument 'dsts' is not instance of type (expected Tuple[cydriver.CUdeviceptr,] or List[cydriver.CUdeviceptr,]") cdef cydriver.CUdeviceptr* cydsts = NULL - if len(dsts) > 0: + if len(dsts) > 1: cydsts = calloc(len(dsts), sizeof(cydriver.CUdeviceptr)) if cydsts is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dsts)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) else: for idx in range(len(dsts)): cydsts[idx] = (dsts[idx])._pvt_ptr[0] + elif len(dsts) == 1: + cydsts = (dsts[0])._pvt_ptr cdef cydriver.CUdeviceptr* cysrcs = NULL - if len(srcs) > 0: + if len(srcs) > 1: cysrcs = calloc(len(srcs), sizeof(cydriver.CUdeviceptr)) if cysrcs is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(srcs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) else: for idx in range(len(srcs)): cysrcs[idx] = (srcs[idx])._pvt_ptr[0] + elif len(srcs) == 1: + cysrcs = (srcs[0])._pvt_ptr cdef vector[size_t] cysizes = sizes if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) cdef cydriver.CUmemcpyAttributes* cyattrs = NULL - if len(attrs) > 0: + if len(attrs) > 1: cyattrs = calloc(len(attrs), sizeof(cydriver.CUmemcpyAttributes)) if cyattrs is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cydriver.CUmemcpyAttributes))) for idx in range(len(attrs)): string.memcpy(&cyattrs[idx], (attrs[idx])._pvt_ptr, sizeof(cydriver.CUmemcpyAttributes)) + elif len(attrs) == 1: + cyattrs = (attrs[0])._pvt_ptr cdef vector[size_t] cyattrsIdxs = attrsIdxs if numAttrs > len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs)) if numAttrs > len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs)) cdef size_t failIdx = 0 - err = cydriver.cuMemcpyBatchAsync((dsts[0])._pvt_ptr if len(dsts) == 1 else cydsts, (srcs[0])._pvt_ptr if len(srcs) == 1 else cysrcs, cysizes.data(), count, (attrs[0])._pvt_ptr if len(attrs) == 1 else cyattrs, cyattrsIdxs.data(), numAttrs, &failIdx, cyhStream) - if cydsts is not NULL: + with nogil: + err = cydriver.cuMemcpyBatchAsync(cydsts, cysrcs, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, &failIdx, cyhStream) + if len(dsts) > 1 and cydsts is not NULL: free(cydsts) - if cysrcs is not NULL: + if len(srcs) > 1 and cysrcs is not NULL: free(cysrcs) - if cyattrs is not NULL: + if len(attrs) > 1 and cyattrs is not NULL: free(cyattrs) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -31041,15 +31190,18 @@ def cuMemcpy3DBatchAsync(size_t numOps, opList : Optional[Tuple[CUDA_MEMCPY3D_BA raise TypeError("Argument 'opList' is not instance of type (expected Tuple[cydriver.CUDA_MEMCPY3D_BATCH_OP,] or List[cydriver.CUDA_MEMCPY3D_BATCH_OP,]") if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) cdef cydriver.CUDA_MEMCPY3D_BATCH_OP* cyopList = NULL - if len(opList) > 0: + if len(opList) > 1: cyopList = calloc(len(opList), sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP)) if cyopList is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP))) for idx in range(len(opList)): string.memcpy(&cyopList[idx], (opList[idx])._pvt_ptr, sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP)) + elif len(opList) == 1: + cyopList = (opList[0])._pvt_ptr cdef size_t failIdx = 0 - err = cydriver.cuMemcpy3DBatchAsync(numOps, (opList[0])._pvt_ptr if len(opList) == 1 else cyopList, &failIdx, flags, cyhStream) - if cyopList is not NULL: + with nogil: + err = cydriver.cuMemcpy3DBatchAsync(numOps, cyopList, &failIdx, flags, cyhStream) + if len(opList) > 1 and cyopList is not NULL: free(cyopList) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -31090,7 +31242,8 @@ def cuMemsetD8(dstDevice, unsigned char uc, size_t N): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD8(cydstDevice, uc, N) + with nogil: + err = cydriver.cuMemsetD8(cydstDevice, uc, N) return (_dict_CUresult[err],) {{endif}} @@ -31129,7 +31282,8 @@ def cuMemsetD16(dstDevice, unsigned short us, size_t N): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD16(cydstDevice, us, N) + with nogil: + err = cydriver.cuMemsetD16(cydstDevice, us, N) return (_dict_CUresult[err],) {{endif}} @@ -31168,7 +31322,8 @@ def cuMemsetD32(dstDevice, unsigned int ui, size_t N): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD32(cydstDevice, ui, N) + with nogil: + err = cydriver.cuMemsetD32(cydstDevice, ui, N) return (_dict_CUresult[err],) {{endif}} @@ -31214,7 +31369,8 @@ def cuMemsetD2D8(dstDevice, size_t dstPitch, unsigned char uc, size_t Width, siz else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD2D8(cydstDevice, dstPitch, uc, Width, Height) + with nogil: + err = cydriver.cuMemsetD2D8(cydstDevice, dstPitch, uc, Width, Height) return (_dict_CUresult[err],) {{endif}} @@ -31261,7 +31417,8 @@ def cuMemsetD2D16(dstDevice, size_t dstPitch, unsigned short us, size_t Width, s else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD2D16(cydstDevice, dstPitch, us, Width, Height) + with nogil: + err = cydriver.cuMemsetD2D16(cydstDevice, dstPitch, us, Width, Height) return (_dict_CUresult[err],) {{endif}} @@ -31308,7 +31465,8 @@ def cuMemsetD2D32(dstDevice, size_t dstPitch, unsigned int ui, size_t Width, siz else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD2D32(cydstDevice, dstPitch, ui, Width, Height) + with nogil: + err = cydriver.cuMemsetD2D32(cydstDevice, dstPitch, ui, Width, Height) return (_dict_CUresult[err],) {{endif}} @@ -31356,7 +31514,8 @@ def cuMemsetD8Async(dstDevice, unsigned char uc, size_t N, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD8Async(cydstDevice, uc, N, cyhStream) + with nogil: + err = cydriver.cuMemsetD8Async(cydstDevice, uc, N, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -31405,7 +31564,8 @@ def cuMemsetD16Async(dstDevice, unsigned short us, size_t N, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD16Async(cydstDevice, us, N, cyhStream) + with nogil: + err = cydriver.cuMemsetD16Async(cydstDevice, us, N, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -31454,7 +31614,8 @@ def cuMemsetD32Async(dstDevice, unsigned int ui, size_t N, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD32Async(cydstDevice, ui, N, cyhStream) + with nogil: + err = cydriver.cuMemsetD32Async(cydstDevice, ui, N, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -31510,7 +31671,8 @@ def cuMemsetD2D8Async(dstDevice, size_t dstPitch, unsigned char uc, size_t Width else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD2D8Async(cydstDevice, dstPitch, uc, Width, Height, cyhStream) + with nogil: + err = cydriver.cuMemsetD2D8Async(cydstDevice, dstPitch, uc, Width, Height, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -31567,7 +31729,8 @@ def cuMemsetD2D16Async(dstDevice, size_t dstPitch, unsigned short us, size_t Wid else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD2D16Async(cydstDevice, dstPitch, us, Width, Height, cyhStream) + with nogil: + err = cydriver.cuMemsetD2D16Async(cydstDevice, dstPitch, us, Width, Height, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -31624,7 +31787,8 @@ def cuMemsetD2D32Async(dstDevice, size_t dstPitch, unsigned int ui, size_t Width else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice - err = cydriver.cuMemsetD2D32Async(cydstDevice, dstPitch, ui, Width, Height, cyhStream) + with nogil: + err = cydriver.cuMemsetD2D32Async(cydstDevice, dstPitch, ui, Width, Height, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -31692,7 +31856,8 @@ def cuArrayCreate(pAllocateArray : Optional[CUDA_ARRAY_DESCRIPTOR]): """ cdef CUarray pHandle = CUarray() cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cypAllocateArray_ptr = pAllocateArray._pvt_ptr if pAllocateArray != None else NULL - err = cydriver.cuArrayCreate(pHandle._pvt_ptr, cypAllocateArray_ptr) + with nogil: + err = cydriver.cuArrayCreate(pHandle._pvt_ptr, cypAllocateArray_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pHandle) @@ -31734,7 +31899,8 @@ def cuArrayGetDescriptor(hArray): phArray = int(CUarray(hArray)) cyhArray = phArray cdef CUDA_ARRAY_DESCRIPTOR pArrayDescriptor = CUDA_ARRAY_DESCRIPTOR() - err = cydriver.cuArrayGetDescriptor(pArrayDescriptor._pvt_ptr, cyhArray) + with nogil: + err = cydriver.cuArrayGetDescriptor(pArrayDescriptor._pvt_ptr, cyhArray) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pArrayDescriptor) @@ -31789,7 +31955,8 @@ def cuArrayGetSparseProperties(array): parray = int(CUarray(array)) cyarray = parray cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES() - err = cydriver.cuArrayGetSparseProperties(sparseProperties._pvt_ptr, cyarray) + with nogil: + err = cydriver.cuArrayGetSparseProperties(sparseProperties._pvt_ptr, cyarray) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], sparseProperties) @@ -31846,7 +32013,8 @@ def cuMipmappedArrayGetSparseProperties(mipmap): pmipmap = int(CUmipmappedArray(mipmap)) cymipmap = pmipmap cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES() - err = cydriver.cuMipmappedArrayGetSparseProperties(sparseProperties._pvt_ptr, cymipmap) + with nogil: + err = cydriver.cuMipmappedArrayGetSparseProperties(sparseProperties._pvt_ptr, cymipmap) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], sparseProperties) @@ -31903,7 +32071,8 @@ def cuArrayGetMemoryRequirements(array, device): parray = int(CUarray(array)) cyarray = parray cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS() - err = cydriver.cuArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cyarray, cydevice) + with nogil: + err = cydriver.cuArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cyarray, cydevice) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], memoryRequirements) @@ -31961,7 +32130,8 @@ def cuMipmappedArrayGetMemoryRequirements(mipmap, device): pmipmap = int(CUmipmappedArray(mipmap)) cymipmap = pmipmap cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS() - err = cydriver.cuMipmappedArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cymipmap, cydevice) + with nogil: + err = cydriver.cuMipmappedArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cymipmap, cydevice) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], memoryRequirements) @@ -32016,7 +32186,8 @@ def cuArrayGetPlane(hArray, unsigned int planeIdx): phArray = int(CUarray(hArray)) cyhArray = phArray cdef CUarray pPlaneArray = CUarray() - err = cydriver.cuArrayGetPlane(pPlaneArray._pvt_ptr, cyhArray, planeIdx) + with nogil: + err = cydriver.cuArrayGetPlane(pPlaneArray._pvt_ptr, cyhArray, planeIdx) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pPlaneArray) @@ -32052,7 +32223,8 @@ def cuArrayDestroy(hArray): else: phArray = int(CUarray(hArray)) cyhArray = phArray - err = cydriver.cuArrayDestroy(cyhArray) + with nogil: + err = cydriver.cuArrayDestroy(cyhArray) return (_dict_CUresult[err],) {{endif}} @@ -32184,7 +32356,8 @@ def cuArray3DCreate(pAllocateArray : Optional[CUDA_ARRAY3D_DESCRIPTOR]): """ cdef CUarray pHandle = CUarray() cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypAllocateArray_ptr = pAllocateArray._pvt_ptr if pAllocateArray != None else NULL - err = cydriver.cuArray3DCreate(pHandle._pvt_ptr, cypAllocateArray_ptr) + with nogil: + err = cydriver.cuArray3DCreate(pHandle._pvt_ptr, cypAllocateArray_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pHandle) @@ -32230,7 +32403,8 @@ def cuArray3DGetDescriptor(hArray): phArray = int(CUarray(hArray)) cyhArray = phArray cdef CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor = CUDA_ARRAY3D_DESCRIPTOR() - err = cydriver.cuArray3DGetDescriptor(pArrayDescriptor._pvt_ptr, cyhArray) + with nogil: + err = cydriver.cuArray3DGetDescriptor(pArrayDescriptor._pvt_ptr, cyhArray) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pArrayDescriptor) @@ -32352,7 +32526,8 @@ def cuMipmappedArrayCreate(pMipmappedArrayDesc : Optional[CUDA_ARRAY3D_DESCRIPTO """ cdef CUmipmappedArray pHandle = CUmipmappedArray() cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypMipmappedArrayDesc_ptr = pMipmappedArrayDesc._pvt_ptr if pMipmappedArrayDesc != None else NULL - err = cydriver.cuMipmappedArrayCreate(pHandle._pvt_ptr, cypMipmappedArrayDesc_ptr, numMipmapLevels) + with nogil: + err = cydriver.cuMipmappedArrayCreate(pHandle._pvt_ptr, cypMipmappedArrayDesc_ptr, numMipmapLevels) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pHandle) @@ -32397,7 +32572,8 @@ def cuMipmappedArrayGetLevel(hMipmappedArray, unsigned int level): phMipmappedArray = int(CUmipmappedArray(hMipmappedArray)) cyhMipmappedArray = phMipmappedArray cdef CUarray pLevelArray = CUarray() - err = cydriver.cuMipmappedArrayGetLevel(pLevelArray._pvt_ptr, cyhMipmappedArray, level) + with nogil: + err = cydriver.cuMipmappedArrayGetLevel(pLevelArray._pvt_ptr, cyhMipmappedArray, level) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pLevelArray) @@ -32433,7 +32609,8 @@ def cuMipmappedArrayDestroy(hMipmappedArray): else: phMipmappedArray = int(CUmipmappedArray(hMipmappedArray)) cyhMipmappedArray = phMipmappedArray - err = cydriver.cuMipmappedArrayDestroy(cyhMipmappedArray) + with nogil: + err = cydriver.cuMipmappedArrayDestroy(cyhMipmappedArray) return (_dict_CUresult[err],) {{endif}} @@ -32508,7 +32685,8 @@ def cuMemGetHandleForAddressRange(dptr, size_t size, handleType not None : CUmem cdef int handle = 0 cdef void* cyhandle_ptr = &handle cdef cydriver.CUmemRangeHandleType cyhandleType = handleType.value - err = cydriver.cuMemGetHandleForAddressRange(cyhandle_ptr, cydptr, size, cyhandleType, flags) + with nogil: + err = cydriver.cuMemGetHandleForAddressRange(cyhandle_ptr, cydptr, size, cyhandleType, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], handle) @@ -32593,7 +32771,8 @@ def cuMemBatchDecompressAsync(paramsArray : Optional[CUmemDecompressParams], siz cystream = pstream cdef cydriver.CUmemDecompressParams* cyparamsArray_ptr = paramsArray._pvt_ptr if paramsArray != None else NULL cdef size_t errorIndex = 0 - err = cydriver.cuMemBatchDecompressAsync(cyparamsArray_ptr, count, flags, &errorIndex, cystream) + with nogil: + err = cydriver.cuMemBatchDecompressAsync(cyparamsArray_ptr, count, flags, &errorIndex, cystream) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], errorIndex) @@ -32645,7 +32824,8 @@ def cuMemAddressReserve(size_t size, size_t alignment, addr, unsigned long long paddr = int(CUdeviceptr(addr)) cyaddr = paddr cdef CUdeviceptr ptr = CUdeviceptr() - err = cydriver.cuMemAddressReserve(ptr._pvt_ptr, size, alignment, cyaddr, flags) + with nogil: + err = cydriver.cuMemAddressReserve(ptr._pvt_ptr, size, alignment, cyaddr, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], ptr) @@ -32685,7 +32865,8 @@ def cuMemAddressFree(ptr, size_t size): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - err = cydriver.cuMemAddressFree(cyptr, size) + with nogil: + err = cydriver.cuMemAddressFree(cyptr, size) return (_dict_CUresult[err],) {{endif}} @@ -32768,7 +32949,8 @@ def cuMemCreate(size_t size, prop : Optional[CUmemAllocationProp], unsigned long """ cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle() cdef cydriver.CUmemAllocationProp* cyprop_ptr = prop._pvt_ptr if prop != None else NULL - err = cydriver.cuMemCreate(handle._pvt_ptr, size, cyprop_ptr, flags) + with nogil: + err = cydriver.cuMemCreate(handle._pvt_ptr, size, cyprop_ptr, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], handle) @@ -32813,7 +32995,8 @@ def cuMemRelease(handle): else: phandle = int(CUmemGenericAllocationHandle(handle)) cyhandle = phandle - err = cydriver.cuMemRelease(cyhandle) + with nogil: + err = cydriver.cuMemRelease(cyhandle) return (_dict_CUresult[err],) {{endif}} @@ -32895,7 +33078,8 @@ def cuMemMap(ptr, size_t size, size_t offset, handle, unsigned long long flags): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - err = cydriver.cuMemMap(cyptr, size, offset, cyhandle, flags) + with nogil: + err = cydriver.cuMemMap(cyptr, size, offset, cyhandle, flags) return (_dict_CUresult[err],) {{endif}} @@ -33057,15 +33241,18 @@ def cuMemMapArrayAsync(mapInfoList : Optional[Tuple[CUarrayMapInfo] | List[CUarr if not all(isinstance(_x, (CUarrayMapInfo,)) for _x in mapInfoList): raise TypeError("Argument 'mapInfoList' is not instance of type (expected Tuple[cydriver.CUarrayMapInfo,] or List[cydriver.CUarrayMapInfo,]") cdef cydriver.CUarrayMapInfo* cymapInfoList = NULL - if len(mapInfoList) > 0: + if len(mapInfoList) > 1: cymapInfoList = calloc(len(mapInfoList), sizeof(cydriver.CUarrayMapInfo)) if cymapInfoList is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(mapInfoList)) + 'x' + str(sizeof(cydriver.CUarrayMapInfo))) for idx in range(len(mapInfoList)): string.memcpy(&cymapInfoList[idx], (mapInfoList[idx])._pvt_ptr, sizeof(cydriver.CUarrayMapInfo)) + elif len(mapInfoList) == 1: + cymapInfoList = (mapInfoList[0])._pvt_ptr if count > len(mapInfoList): raise RuntimeError("List is too small: " + str(len(mapInfoList)) + " < " + str(count)) - err = cydriver.cuMemMapArrayAsync((mapInfoList[0])._pvt_ptr if len(mapInfoList) == 1 else cymapInfoList, count, cyhStream) - if cymapInfoList is not NULL: + with nogil: + err = cydriver.cuMemMapArrayAsync(cymapInfoList, count, cyhStream) + if len(mapInfoList) > 1 and cymapInfoList is not NULL: free(cymapInfoList) return (_dict_CUresult[err],) {{endif}} @@ -33113,7 +33300,8 @@ def cuMemUnmap(ptr, size_t size): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - err = cydriver.cuMemUnmap(cyptr, size) + with nogil: + err = cydriver.cuMemUnmap(cyptr, size) return (_dict_CUresult[err],) {{endif}} @@ -33174,15 +33362,18 @@ def cuMemSetAccess(ptr, size_t size, desc : Optional[Tuple[CUmemAccessDesc] | Li pptr = int(CUdeviceptr(ptr)) cyptr = pptr cdef cydriver.CUmemAccessDesc* cydesc = NULL - if len(desc) > 0: + if len(desc) > 1: cydesc = calloc(len(desc), sizeof(cydriver.CUmemAccessDesc)) if cydesc is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(desc)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc))) for idx in range(len(desc)): string.memcpy(&cydesc[idx], (desc[idx])._pvt_ptr, sizeof(cydriver.CUmemAccessDesc)) + elif len(desc) == 1: + cydesc = (desc[0])._pvt_ptr if count > len(desc): raise RuntimeError("List is too small: " + str(len(desc)) + " < " + str(count)) - err = cydriver.cuMemSetAccess(cyptr, size, (desc[0])._pvt_ptr if len(desc) == 1 else cydesc, count) - if cydesc is not NULL: + with nogil: + err = cydriver.cuMemSetAccess(cyptr, size, cydesc, count) + if len(desc) > 1 and cydesc is not NULL: free(cydesc) return (_dict_CUresult[err],) {{endif}} @@ -33221,7 +33412,8 @@ def cuMemGetAccess(location : Optional[CUmemLocation], ptr): cyptr = pptr cdef unsigned long long flags = 0 cdef cydriver.CUmemLocation* cylocation_ptr = location._pvt_ptr if location != None else NULL - err = cydriver.cuMemGetAccess(&flags, cylocation_ptr, cyptr) + with nogil: + err = cydriver.cuMemGetAccess(&flags, cylocation_ptr, cyptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], flags) @@ -33280,7 +33472,8 @@ def cuMemExportToShareableHandle(handle, handleType not None : CUmemAllocationHa cdef utils.HelperCUmemAllocationHandleType cyshareableHandle = utils.HelperCUmemAllocationHandleType(handleType) cdef void* cyshareableHandle_ptr = cyshareableHandle.cptr cdef cydriver.CUmemAllocationHandleType cyhandleType = handleType.value - err = cydriver.cuMemExportToShareableHandle(cyshareableHandle_ptr, cyhandle, cyhandleType, flags) + with nogil: + err = cydriver.cuMemExportToShareableHandle(cyshareableHandle_ptr, cyhandle, cyhandleType, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cyshareableHandle.pyObj()) @@ -33329,7 +33522,8 @@ def cuMemImportFromShareableHandle(osHandle, shHandleType not None : CUmemAlloca cyosHandle = utils.HelperInputVoidPtr(osHandle) cdef void* cyosHandle_ptr = cyosHandle.cptr cdef cydriver.CUmemAllocationHandleType cyshHandleType = shHandleType.value - err = cydriver.cuMemImportFromShareableHandle(handle._pvt_ptr, cyosHandle_ptr, cyshHandleType) + with nogil: + err = cydriver.cuMemImportFromShareableHandle(handle._pvt_ptr, cyosHandle_ptr, cyshHandleType) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], handle) @@ -33367,7 +33561,8 @@ def cuMemGetAllocationGranularity(prop : Optional[CUmemAllocationProp], option n cdef size_t granularity = 0 cdef cydriver.CUmemAllocationProp* cyprop_ptr = prop._pvt_ptr if prop != None else NULL cdef cydriver.CUmemAllocationGranularity_flags cyoption = option.value - err = cydriver.cuMemGetAllocationGranularity(&granularity, cyprop_ptr, cyoption) + with nogil: + err = cydriver.cuMemGetAllocationGranularity(&granularity, cyprop_ptr, cyoption) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], granularity) @@ -33405,7 +33600,8 @@ def cuMemGetAllocationPropertiesFromHandle(handle): phandle = int(CUmemGenericAllocationHandle(handle)) cyhandle = phandle cdef CUmemAllocationProp prop = CUmemAllocationProp() - err = cydriver.cuMemGetAllocationPropertiesFromHandle(prop._pvt_ptr, cyhandle) + with nogil: + err = cydriver.cuMemGetAllocationPropertiesFromHandle(prop._pvt_ptr, cyhandle) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], prop) @@ -33445,7 +33641,8 @@ def cuMemRetainAllocationHandle(addr): cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle() cyaddr = utils.HelperInputVoidPtr(addr) cdef void* cyaddr_ptr = cyaddr.cptr - err = cydriver.cuMemRetainAllocationHandle(handle._pvt_ptr, cyaddr_ptr) + with nogil: + err = cydriver.cuMemRetainAllocationHandle(handle._pvt_ptr, cyaddr_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], handle) @@ -33494,7 +33691,8 @@ def cuMemFreeAsync(dptr, hStream): else: pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr - err = cydriver.cuMemFreeAsync(cydptr, cyhStream) + with nogil: + err = cydriver.cuMemFreeAsync(cydptr, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -33545,7 +33743,8 @@ def cuMemAllocAsync(size_t bytesize, hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef CUdeviceptr dptr = CUdeviceptr() - err = cydriver.cuMemAllocAsync(dptr._pvt_ptr, bytesize, cyhStream) + with nogil: + err = cydriver.cuMemAllocAsync(dptr._pvt_ptr, bytesize, cyhStream) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], dptr) @@ -33596,7 +33795,8 @@ def cuMemPoolTrimTo(pool, size_t minBytesToKeep): else: ppool = int(CUmemoryPool(pool)) cypool = ppool - err = cydriver.cuMemPoolTrimTo(cypool, minBytesToKeep) + with nogil: + err = cydriver.cuMemPoolTrimTo(cypool, minBytesToKeep) return (_dict_CUresult[err],) {{endif}} @@ -33670,7 +33870,8 @@ def cuMemPoolSetAttribute(pool, attr not None : CUmemPool_attribute, value): cdef cydriver.CUmemPool_attribute cyattr = attr.value cdef utils.HelperCUmemPool_attribute cyvalue = utils.HelperCUmemPool_attribute(attr, value, is_getter=False) cdef void* cyvalue_ptr = cyvalue.cptr - err = cydriver.cuMemPoolSetAttribute(cypool, cyattr, cyvalue_ptr) + with nogil: + err = cydriver.cuMemPoolSetAttribute(cypool, cyattr, cyvalue_ptr) return (_dict_CUresult[err],) {{endif}} @@ -33751,7 +33952,8 @@ def cuMemPoolGetAttribute(pool, attr not None : CUmemPool_attribute): cdef cydriver.CUmemPool_attribute cyattr = attr.value cdef utils.HelperCUmemPool_attribute cyvalue = utils.HelperCUmemPool_attribute(attr, 0, is_getter=True) cdef void* cyvalue_ptr = cyvalue.cptr - err = cydriver.cuMemPoolGetAttribute(cypool, cyattr, cyvalue_ptr) + with nogil: + err = cydriver.cuMemPoolGetAttribute(cypool, cyattr, cyvalue_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cyvalue.pyObj()) @@ -33794,15 +33996,18 @@ def cuMemPoolSetAccess(pool, map : Optional[Tuple[CUmemAccessDesc] | List[CUmemA ppool = int(CUmemoryPool(pool)) cypool = ppool cdef cydriver.CUmemAccessDesc* cymap = NULL - if len(map) > 0: + if len(map) > 1: cymap = calloc(len(map), sizeof(cydriver.CUmemAccessDesc)) if cymap is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(map)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc))) for idx in range(len(map)): string.memcpy(&cymap[idx], (map[idx])._pvt_ptr, sizeof(cydriver.CUmemAccessDesc)) + elif len(map) == 1: + cymap = (map[0])._pvt_ptr if count > len(map): raise RuntimeError("List is too small: " + str(len(map)) + " < " + str(count)) - err = cydriver.cuMemPoolSetAccess(cypool, (map[0])._pvt_ptr if len(map) == 1 else cymap, count) - if cymap is not NULL: + with nogil: + err = cydriver.cuMemPoolSetAccess(cypool, cymap, count) + if len(map) > 1 and cymap is not NULL: free(cymap) return (_dict_CUresult[err],) {{endif}} @@ -33844,7 +34049,8 @@ def cuMemPoolGetAccess(memPool, location : Optional[CUmemLocation]): cymemPool = pmemPool cdef cydriver.CUmemAccess_flags flags cdef cydriver.CUmemLocation* cylocation_ptr = location._pvt_ptr if location != None else NULL - err = cydriver.cuMemPoolGetAccess(&flags, cymemPool, cylocation_ptr) + with nogil: + err = cydriver.cuMemPoolGetAccess(&flags, cymemPool, cylocation_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUmemAccess_flags(flags)) @@ -33918,7 +34124,8 @@ def cuMemPoolCreate(poolProps : Optional[CUmemPoolProps]): """ cdef CUmemoryPool pool = CUmemoryPool() cdef cydriver.CUmemPoolProps* cypoolProps_ptr = poolProps._pvt_ptr if poolProps != None else NULL - err = cydriver.cuMemPoolCreate(pool._pvt_ptr, cypoolProps_ptr) + with nogil: + err = cydriver.cuMemPoolCreate(pool._pvt_ptr, cypoolProps_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pool) @@ -33965,7 +34172,8 @@ def cuMemPoolDestroy(pool): else: ppool = int(CUmemoryPool(pool)) cypool = ppool - err = cydriver.cuMemPoolDestroy(cypool) + with nogil: + err = cydriver.cuMemPoolDestroy(cypool) return (_dict_CUresult[err],) {{endif}} @@ -34021,7 +34229,8 @@ def cuMemAllocFromPoolAsync(size_t bytesize, pool, hStream): ppool = int(CUmemoryPool(pool)) cypool = ppool cdef CUdeviceptr dptr = CUdeviceptr() - err = cydriver.cuMemAllocFromPoolAsync(dptr._pvt_ptr, bytesize, cypool, cyhStream) + with nogil: + err = cydriver.cuMemAllocFromPoolAsync(dptr._pvt_ptr, bytesize, cypool, cyhStream) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], dptr) @@ -34077,7 +34286,8 @@ def cuMemPoolExportToShareableHandle(pool, handleType not None : CUmemAllocation cdef utils.HelperCUmemAllocationHandleType cyhandle_out = utils.HelperCUmemAllocationHandleType(handleType) cdef void* cyhandle_out_ptr = cyhandle_out.cptr cdef cydriver.CUmemAllocationHandleType cyhandleType = handleType.value - err = cydriver.cuMemPoolExportToShareableHandle(cyhandle_out_ptr, cypool, cyhandleType, flags) + with nogil: + err = cydriver.cuMemPoolExportToShareableHandle(cyhandle_out_ptr, cypool, cyhandleType, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cyhandle_out.pyObj()) @@ -34125,7 +34335,8 @@ def cuMemPoolImportFromShareableHandle(handle, handleType not None : CUmemAlloca cyhandle = utils.HelperInputVoidPtr(handle) cdef void* cyhandle_ptr = cyhandle.cptr cdef cydriver.CUmemAllocationHandleType cyhandleType = handleType.value - err = cydriver.cuMemPoolImportFromShareableHandle(pool_out._pvt_ptr, cyhandle_ptr, cyhandleType, flags) + with nogil: + err = cydriver.cuMemPoolImportFromShareableHandle(pool_out._pvt_ptr, cyhandle_ptr, cyhandleType, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pool_out) @@ -34167,7 +34378,8 @@ def cuMemPoolExportPointer(ptr): pptr = int(CUdeviceptr(ptr)) cyptr = pptr cdef CUmemPoolPtrExportData shareData_out = CUmemPoolPtrExportData() - err = cydriver.cuMemPoolExportPointer(shareData_out._pvt_ptr, cyptr) + with nogil: + err = cydriver.cuMemPoolExportPointer(shareData_out._pvt_ptr, cyptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], shareData_out) @@ -34219,7 +34431,8 @@ def cuMemPoolImportPointer(pool, shareData : Optional[CUmemPoolPtrExportData]): cypool = ppool cdef CUdeviceptr ptr_out = CUdeviceptr() cdef cydriver.CUmemPoolPtrExportData* cyshareData_ptr = shareData._pvt_ptr if shareData != None else NULL - err = cydriver.cuMemPoolImportPointer(ptr_out._pvt_ptr, cypool, cyshareData_ptr) + with nogil: + err = cydriver.cuMemPoolImportPointer(ptr_out._pvt_ptr, cypool, cyshareData_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], ptr_out) @@ -34279,7 +34492,8 @@ def cuMulticastCreate(prop : Optional[CUmulticastObjectProp]): """ cdef CUmemGenericAllocationHandle mcHandle = CUmemGenericAllocationHandle() cdef cydriver.CUmulticastObjectProp* cyprop_ptr = prop._pvt_ptr if prop != None else NULL - err = cydriver.cuMulticastCreate(mcHandle._pvt_ptr, cyprop_ptr) + with nogil: + err = cydriver.cuMulticastCreate(mcHandle._pvt_ptr, cyprop_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], mcHandle) @@ -34336,7 +34550,8 @@ def cuMulticastAddDevice(mcHandle, dev): else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle - err = cydriver.cuMulticastAddDevice(cymcHandle, cydev) + with nogil: + err = cydriver.cuMulticastAddDevice(cymcHandle, cydev) return (_dict_CUresult[err],) {{endif}} @@ -34415,7 +34630,8 @@ def cuMulticastBindMem(mcHandle, size_t mcOffset, memHandle, size_t memOffset, s else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle - err = cydriver.cuMulticastBindMem(cymcHandle, mcOffset, cymemHandle, memOffset, size, flags) + with nogil: + err = cydriver.cuMulticastBindMem(cymcHandle, mcOffset, cymemHandle, memOffset, size, flags) return (_dict_CUresult[err],) {{endif}} @@ -34490,7 +34706,8 @@ def cuMulticastBindAddr(mcHandle, size_t mcOffset, memptr, size_t size, unsigned else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle - err = cydriver.cuMulticastBindAddr(cymcHandle, mcOffset, cymemptr, size, flags) + with nogil: + err = cydriver.cuMulticastBindAddr(cymcHandle, mcOffset, cymemptr, size, flags) return (_dict_CUresult[err],) {{endif}} @@ -34548,7 +34765,8 @@ def cuMulticastUnbind(mcHandle, dev, size_t mcOffset, size_t size): else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle - err = cydriver.cuMulticastUnbind(cymcHandle, cydev, mcOffset, size) + with nogil: + err = cydriver.cuMulticastUnbind(cymcHandle, cydev, mcOffset, size) return (_dict_CUresult[err],) {{endif}} @@ -34584,7 +34802,8 @@ def cuMulticastGetGranularity(prop : Optional[CUmulticastObjectProp], option not cdef size_t granularity = 0 cdef cydriver.CUmulticastObjectProp* cyprop_ptr = prop._pvt_ptr if prop != None else NULL cdef cydriver.CUmulticastGranularity_flags cyoption = option.value - err = cydriver.cuMulticastGetGranularity(&granularity, cyprop_ptr, cyoption) + with nogil: + err = cydriver.cuMulticastGetGranularity(&granularity, cyprop_ptr, cyoption) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], granularity) @@ -34796,7 +35015,8 @@ def cuPointerGetAttribute(attribute not None : CUpointer_attribute, ptr): cdef utils.HelperCUpointer_attribute cydata = utils.HelperCUpointer_attribute(attribute, 0, is_getter=True) cdef void* cydata_ptr = cydata.cptr cdef cydriver.CUpointer_attribute cyattribute = attribute.value - err = cydriver.cuPointerGetAttribute(cydata_ptr, cyattribute, cyptr) + with nogil: + err = cydriver.cuPointerGetAttribute(cydata_ptr, cyattribute, cyptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cydata.pyObj()) @@ -34914,7 +35134,8 @@ def cuMemPrefetchAsync(devPtr, size_t count, dstDevice, hStream): else: pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr - err = cydriver.cuMemPrefetchAsync(cydevPtr, count, cydstDevice, cyhStream) + with nogil: + err = cydriver.cuMemPrefetchAsync(cydevPtr, count, cydstDevice, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -35035,7 +35256,8 @@ def cuMemPrefetchAsync_v2(devPtr, size_t count, location not None : CUmemLocatio else: pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr - err = cydriver.cuMemPrefetchAsync_v2(cydevPtr, count, location._pvt_ptr[0], flags, cyhStream) + with nogil: + err = cydriver.cuMemPrefetchAsync_v2(cydevPtr, count, location._pvt_ptr[0], flags, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -35219,7 +35441,8 @@ def cuMemAdvise(devPtr, size_t count, advice not None : CUmem_advise, device): pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr cdef cydriver.CUmem_advise cyadvice = advice.value - err = cydriver.cuMemAdvise(cydevPtr, count, cyadvice, cydevice) + with nogil: + err = cydriver.cuMemAdvise(cydevPtr, count, cyadvice, cydevice) return (_dict_CUresult[err],) {{endif}} @@ -35424,7 +35647,8 @@ def cuMemAdvise_v2(devPtr, size_t count, advice not None : CUmem_advise, locatio pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr cdef cydriver.CUmem_advise cyadvice = advice.value - err = cydriver.cuMemAdvise_v2(cydevPtr, count, cyadvice, location._pvt_ptr[0]) + with nogil: + err = cydriver.cuMemAdvise_v2(cydevPtr, count, cyadvice, location._pvt_ptr[0]) return (_dict_CUresult[err],) {{endif}} @@ -35579,7 +35803,8 @@ def cuMemRangeGetAttribute(size_t dataSize, attribute not None : CUmem_range_att cdef utils.HelperCUmem_range_attribute cydata = utils.HelperCUmem_range_attribute(attribute, dataSize) cdef void* cydata_ptr = cydata.cptr cdef cydriver.CUmem_range_attribute cyattribute = attribute.value - err = cydriver.cuMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr, count) + with nogil: + err = cydriver.cuMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr, count) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cydata.pyObj()) @@ -35665,7 +35890,8 @@ def cuMemRangeGetAttributes(dataSizes : Tuple[int] | List[int], attributes : Opt cdef vector[cydriver.CUmem_range_attribute] cyattributes = [pyattributes.value for pyattributes in (attributes)] if numAttributes > len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes)) if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) - err = cydriver.cuMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr, count) + with nogil: + err = cydriver.cuMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr, count) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], [obj.pyObj() for obj in pylist]) @@ -35722,7 +35948,8 @@ def cuPointerSetAttribute(value, attribute not None : CUpointer_attribute, ptr): cdef utils.HelperCUpointer_attribute cyvalue = utils.HelperCUpointer_attribute(attribute, value, is_getter=False) cdef void* cyvalue_ptr = cyvalue.cptr cdef cydriver.CUpointer_attribute cyattribute = attribute.value - err = cydriver.cuPointerSetAttribute(cyvalue_ptr, cyattribute, cyptr) + with nogil: + err = cydriver.cuPointerSetAttribute(cyvalue_ptr, cyattribute, cyptr) return (_dict_CUresult[err],) {{endif}} @@ -35813,7 +36040,8 @@ def cuPointerGetAttributes(unsigned int numAttributes, attributes : Optional[Tup pylist = [utils.HelperCUpointer_attribute(pyattributes, 0, is_getter=True) for pyattributes in attributes] cdef utils.InputVoidPtrPtrHelper voidStarHelperdata = utils.InputVoidPtrPtrHelper(pylist) cdef void** cyvoidStarHelper_ptr = voidStarHelperdata.cptr - err = cydriver.cuPointerGetAttributes(numAttributes, cyattributes.data(), cyvoidStarHelper_ptr, cyptr) + with nogil: + err = cydriver.cuPointerGetAttributes(numAttributes, cyattributes.data(), cyvoidStarHelper_ptr, cyptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], [obj.pyObj() for obj in pylist]) @@ -35854,7 +36082,8 @@ def cuStreamCreate(unsigned int Flags): :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamGetDevice` :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` """ cdef CUstream phStream = CUstream() - err = cydriver.cuStreamCreate(phStream._pvt_ptr, Flags) + with nogil: + err = cydriver.cuStreamCreate(phStream._pvt_ptr, Flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phStream) @@ -35908,7 +36137,8 @@ def cuStreamCreateWithPriority(unsigned int flags, int priority): In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations. """ cdef CUstream phStream = CUstream() - err = cydriver.cuStreamCreateWithPriority(phStream._pvt_ptr, flags, priority) + with nogil: + err = cydriver.cuStreamCreateWithPriority(phStream._pvt_ptr, flags, priority) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phStream) @@ -35955,7 +36185,8 @@ def cuStreamGetPriority(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef int priority = 0 - err = cydriver.cuStreamGetPriority(cyhStream, &priority) + with nogil: + err = cydriver.cuStreamGetPriority(cyhStream, &priority) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], priority) @@ -35994,7 +36225,8 @@ def cuStreamGetDevice(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef CUdevice device = CUdevice() - err = cydriver.cuStreamGetDevice(cyhStream, device._pvt_ptr) + with nogil: + err = cydriver.cuStreamGetDevice(cyhStream, device._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], device) @@ -36038,7 +36270,8 @@ def cuStreamGetFlags(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef unsigned int flags = 0 - err = cydriver.cuStreamGetFlags(cyhStream, &flags) + with nogil: + err = cydriver.cuStreamGetFlags(cyhStream, &flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], flags) @@ -36094,7 +36327,8 @@ def cuStreamGetId(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef unsigned long long streamId = 0 - err = cydriver.cuStreamGetId(cyhStream, &streamId) + with nogil: + err = cydriver.cuStreamGetId(cyhStream, &streamId) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], streamId) @@ -36159,7 +36393,8 @@ def cuStreamGetCtx(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef CUcontext pctx = CUcontext() - err = cydriver.cuStreamGetCtx(cyhStream, pctx._pvt_ptr) + with nogil: + err = cydriver.cuStreamGetCtx(cyhStream, pctx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pctx) @@ -36236,7 +36471,8 @@ def cuStreamGetCtx_v2(hStream): cyhStream = phStream cdef CUcontext pCtx = CUcontext() cdef CUgreenCtx pGreenCtx = CUgreenCtx() - err = cydriver.cuStreamGetCtx_v2(cyhStream, pCtx._pvt_ptr, pGreenCtx._pvt_ptr) + with nogil: + err = cydriver.cuStreamGetCtx_v2(cyhStream, pCtx._pvt_ptr, pGreenCtx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], pCtx, pGreenCtx) @@ -36296,7 +36532,8 @@ def cuStreamWaitEvent(hStream, hEvent, unsigned int Flags): else: phStream = int(CUstream(hStream)) cyhStream = phStream - err = cydriver.cuStreamWaitEvent(cyhStream, cyhEvent, Flags) + with nogil: + err = cydriver.cuStreamWaitEvent(cyhStream, cyhEvent, Flags) return (_dict_CUresult[err],) {{endif}} @@ -36412,7 +36649,6 @@ def cuStreamAddCallback(hStream, callback, userData, unsigned int flags): with nogil: err = cydriver.cuStreamAddCallback(cyhStream, cuStreamCallbackWrapper, cbData, flags) - if err != cydriver.CUDA_SUCCESS: free(cbData) return (_dict_CUresult[err],) @@ -36469,7 +36705,8 @@ def cuStreamBeginCapture(hStream, mode not None : CUstreamCaptureMode): phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUstreamCaptureMode cymode = mode.value - err = cydriver.cuStreamBeginCapture(cyhStream, cymode) + with nogil: + err = cydriver.cuStreamBeginCapture(cyhStream, cymode) return (_dict_CUresult[err],) {{endif}} @@ -36550,27 +36787,32 @@ def cuStreamBeginCaptureToGraph(hStream, hGraph, dependencies : Optional[Tuple[C phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 0: + if len(dependencyData) > 1: cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) if cydependencyData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) for idx in range(len(dependencyData)): string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) if numDependencies > len(dependencyData): raise RuntimeError("List is too small: " + str(len(dependencyData)) + " < " + str(numDependencies)) cdef cydriver.CUstreamCaptureMode cymode = mode.value - err = cydriver.cuStreamBeginCaptureToGraph(cyhStream, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, (dependencyData[0])._pvt_ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cymode) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuStreamBeginCaptureToGraph(cyhStream, cyhGraph, cydependencies, cydependencyData, numDependencies, cymode) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) - if cydependencyData is not NULL: + if len(dependencyData) > 1 and cydependencyData is not NULL: free(cydependencyData) return (_dict_CUresult[err],) {{endif}} @@ -36642,7 +36884,8 @@ def cuThreadExchangeStreamCaptureMode(mode not None : CUstreamCaptureMode): :py:obj:`~.cuStreamBeginCapture` """ cdef cydriver.CUstreamCaptureMode cymode = mode.value - err = cydriver.cuThreadExchangeStreamCaptureMode(&cymode) + with nogil: + err = cydriver.cuThreadExchangeStreamCaptureMode(&cymode) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUstreamCaptureMode(cymode)) @@ -36689,7 +36932,8 @@ def cuStreamEndCapture(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef CUgraph phGraph = CUgraph() - err = cydriver.cuStreamEndCapture(cyhStream, phGraph._pvt_ptr) + with nogil: + err = cydriver.cuStreamEndCapture(cyhStream, phGraph._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phGraph) @@ -36751,7 +36995,8 @@ def cuStreamIsCapturing(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUstreamCaptureStatus captureStatus - err = cydriver.cuStreamIsCapturing(cyhStream, &captureStatus) + with nogil: + err = cydriver.cuStreamIsCapturing(cyhStream, &captureStatus) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUstreamCaptureStatus(captureStatus)) @@ -36832,7 +37077,8 @@ def cuStreamGetCaptureInfo(hStream): cdef const cydriver.CUgraphNode* cydependencies_out = NULL pydependencies_out = [] cdef size_t numDependencies_out = 0 - err = cydriver.cuStreamGetCaptureInfo(cyhStream, &captureStatus_out, id_out._pvt_ptr, graph_out._pvt_ptr, &cydependencies_out, &numDependencies_out) + with nogil: + err = cydriver.cuStreamGetCaptureInfo(cyhStream, &captureStatus_out, id_out._pvt_ptr, graph_out._pvt_ptr, &cydependencies_out, &numDependencies_out) if CUresult(err) == CUresult(0): pydependencies_out = [CUgraphNode(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] if err != cydriver.CUDA_SUCCESS: @@ -36929,7 +37175,8 @@ def cuStreamGetCaptureInfo_v3(hStream): cdef const cydriver.CUgraphEdgeData* cyedgeData_out = NULL pyedgeData_out = [] cdef size_t numDependencies_out = 0 - err = cydriver.cuStreamGetCaptureInfo_v3(cyhStream, &captureStatus_out, id_out._pvt_ptr, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) + with nogil: + err = cydriver.cuStreamGetCaptureInfo_v3(cyhStream, &captureStatus_out, id_out._pvt_ptr, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) if CUresult(err) == CUresult(0): pydependencies_out = [CUgraphNode(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] if CUresult(err) == CUresult(0): @@ -36998,16 +37245,19 @@ def cuStreamUpdateCaptureDependencies(hStream, dependencies : Optional[Tuple[CUg phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - err = cydriver.cuStreamUpdateCaptureDependencies(cyhStream, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, flags) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuStreamUpdateCaptureDependencies(cyhStream, cydependencies, numDependencies, flags) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) return (_dict_CUresult[err],) {{endif}} @@ -37072,24 +37322,29 @@ def cuStreamUpdateCaptureDependencies_v2(hStream, dependencies : Optional[Tuple[ phStream = int(CUstream(hStream)) cyhStream = phStream cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 0: + if len(dependencyData) > 1: cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) if cydependencyData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) for idx in range(len(dependencyData)): string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - err = cydriver.cuStreamUpdateCaptureDependencies_v2(cyhStream, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, (dependencyData[0])._pvt_ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, flags) - if cydependencies is not NULL: + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + with nogil: + err = cydriver.cuStreamUpdateCaptureDependencies_v2(cyhStream, cydependencies, cydependencyData, numDependencies, flags) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) - if cydependencyData is not NULL: + if len(dependencyData) > 1 and cydependencyData is not NULL: free(cydependencyData) return (_dict_CUresult[err],) {{endif}} @@ -37202,7 +37457,8 @@ def cuStreamAttachMemAsync(hStream, dptr, size_t length, unsigned int flags): else: phStream = int(CUstream(hStream)) cyhStream = phStream - err = cydriver.cuStreamAttachMemAsync(cyhStream, cydptr, length, flags) + with nogil: + err = cydriver.cuStreamAttachMemAsync(cyhStream, cydptr, length, flags) return (_dict_CUresult[err],) {{endif}} @@ -37242,7 +37498,8 @@ def cuStreamQuery(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - err = cydriver.cuStreamQuery(cyhStream) + with nogil: + err = cydriver.cuStreamQuery(cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -37281,7 +37538,8 @@ def cuStreamSynchronize(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - err = cydriver.cuStreamSynchronize(cyhStream) + with nogil: + err = cydriver.cuStreamSynchronize(cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -37321,7 +37579,8 @@ def cuStreamDestroy(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - err = cydriver.cuStreamDestroy(cyhStream) + with nogil: + err = cydriver.cuStreamDestroy(cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -37366,7 +37625,8 @@ def cuStreamCopyAttributes(dst, src): else: pdst = int(CUstream(dst)) cydst = pdst - err = cydriver.cuStreamCopyAttributes(cydst, cysrc) + with nogil: + err = cydriver.cuStreamCopyAttributes(cydst, cysrc) return (_dict_CUresult[err],) {{endif}} @@ -37407,7 +37667,8 @@ def cuStreamGetAttribute(hStream, attr not None : CUstreamAttrID): cyhStream = phStream cdef cydriver.CUstreamAttrID cyattr = attr.value cdef CUstreamAttrValue value_out = CUstreamAttrValue() - err = cydriver.cuStreamGetAttribute(cyhStream, cyattr, value_out._pvt_ptr) + with nogil: + err = cydriver.cuStreamGetAttribute(cyhStream, cyattr, value_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], value_out) @@ -37451,7 +37712,8 @@ def cuStreamSetAttribute(hStream, attr not None : CUstreamAttrID, value : Option cyhStream = phStream cdef cydriver.CUstreamAttrID cyattr = attr.value cdef cydriver.CUstreamAttrValue* cyvalue_ptr = value._pvt_ptr if value != None else NULL - err = cydriver.cuStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr) + with nogil: + err = cydriver.cuStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr) return (_dict_CUresult[err],) {{endif}} @@ -37499,7 +37761,8 @@ def cuEventCreate(unsigned int Flags): :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventCreateWithFlags` """ cdef CUevent phEvent = CUevent() - err = cydriver.cuEventCreate(phEvent._pvt_ptr, Flags) + with nogil: + err = cydriver.cuEventCreate(phEvent._pvt_ptr, Flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phEvent) @@ -37560,7 +37823,8 @@ def cuEventRecord(hEvent, hStream): else: phEvent = int(CUevent(hEvent)) cyhEvent = phEvent - err = cydriver.cuEventRecord(cyhEvent, cyhStream) + with nogil: + err = cydriver.cuEventRecord(cyhEvent, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -37629,7 +37893,8 @@ def cuEventRecordWithFlags(hEvent, hStream, unsigned int flags): else: phEvent = int(CUevent(hEvent)) cyhEvent = phEvent - err = cydriver.cuEventRecordWithFlags(cyhEvent, cyhStream, flags) + with nogil: + err = cydriver.cuEventRecordWithFlags(cyhEvent, cyhStream, flags) return (_dict_CUresult[err],) {{endif}} @@ -37673,7 +37938,8 @@ def cuEventQuery(hEvent): else: phEvent = int(CUevent(hEvent)) cyhEvent = phEvent - err = cydriver.cuEventQuery(cyhEvent) + with nogil: + err = cydriver.cuEventQuery(cyhEvent) return (_dict_CUresult[err],) {{endif}} @@ -37716,7 +37982,8 @@ def cuEventSynchronize(hEvent): else: phEvent = int(CUevent(hEvent)) cyhEvent = phEvent - err = cydriver.cuEventSynchronize(cyhEvent) + with nogil: + err = cydriver.cuEventSynchronize(cyhEvent) return (_dict_CUresult[err],) {{endif}} @@ -37756,7 +38023,8 @@ def cuEventDestroy(hEvent): else: phEvent = int(CUevent(hEvent)) cyhEvent = phEvent - err = cydriver.cuEventDestroy(cyhEvent) + with nogil: + err = cydriver.cuEventDestroy(cyhEvent) return (_dict_CUresult[err],) {{endif}} @@ -37826,7 +38094,8 @@ def cuEventElapsedTime(hStart, hEnd): phStart = int(CUevent(hStart)) cyhStart = phStart cdef float pMilliseconds = 0 - err = cydriver.cuEventElapsedTime(&pMilliseconds, cyhStart, cyhEnd) + with nogil: + err = cydriver.cuEventElapsedTime(&pMilliseconds, cyhStart, cyhEnd) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pMilliseconds) @@ -37898,7 +38167,8 @@ def cuEventElapsedTime_v2(hStart, hEnd): phStart = int(CUevent(hStart)) cyhStart = phStart cdef float pMilliseconds = 0 - err = cydriver.cuEventElapsedTime_v2(&pMilliseconds, cyhStart, cyhEnd) + with nogil: + err = cydriver.cuEventElapsedTime_v2(&pMilliseconds, cyhStart, cyhEnd) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pMilliseconds) @@ -38053,7 +38323,8 @@ def cuImportExternalMemory(memHandleDesc : Optional[CUDA_EXTERNAL_MEMORY_HANDLE_ """ cdef CUexternalMemory extMem_out = CUexternalMemory() cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC* cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc != None else NULL - err = cydriver.cuImportExternalMemory(extMem_out._pvt_ptr, cymemHandleDesc_ptr) + with nogil: + err = cydriver.cuImportExternalMemory(extMem_out._pvt_ptr, cymemHandleDesc_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], extMem_out) @@ -38122,7 +38393,8 @@ def cuExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[CUDA_EXTERNAL_ cyextMem = pextMem cdef CUdeviceptr devPtr = CUdeviceptr() cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC* cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc != None else NULL - err = cydriver.cuExternalMemoryGetMappedBuffer(devPtr._pvt_ptr, cyextMem, cybufferDesc_ptr) + with nogil: + err = cydriver.cuExternalMemoryGetMappedBuffer(devPtr._pvt_ptr, cyextMem, cybufferDesc_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], devPtr) @@ -38194,7 +38466,8 @@ def cuExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[CUDA_E cyextMem = pextMem cdef CUmipmappedArray mipmap = CUmipmappedArray() cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc != None else NULL - err = cydriver.cuExternalMemoryGetMappedMipmappedArray(mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr) + with nogil: + err = cydriver.cuExternalMemoryGetMappedMipmappedArray(mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], mipmap) @@ -38233,7 +38506,8 @@ def cuDestroyExternalMemory(extMem): else: pextMem = int(CUexternalMemory(extMem)) cyextMem = pextMem - err = cydriver.cuDestroyExternalMemory(cyextMem) + with nogil: + err = cydriver.cuDestroyExternalMemory(cyextMem) return (_dict_CUresult[err],) {{endif}} @@ -38381,7 +38655,8 @@ def cuImportExternalSemaphore(semHandleDesc : Optional[CUDA_EXTERNAL_SEMAPHORE_H """ cdef CUexternalSemaphore extSem_out = CUexternalSemaphore() cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc != None else NULL - err = cydriver.cuImportExternalSemaphore(extSem_out._pvt_ptr, cysemHandleDesc_ptr) + with nogil: + err = cydriver.cuImportExternalSemaphore(extSem_out._pvt_ptr, cysemHandleDesc_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], extSem_out) @@ -38497,26 +38772,31 @@ def cuSignalExternalSemaphoresAsync(extSemArray : Optional[Tuple[CUexternalSemap if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cydriver.CUexternalSemaphore,] or List[cydriver.CUexternalSemaphore,]") cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL - if len(extSemArray) > 0: + if len(extSemArray) > 1: cyextSemArray = calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore)) if cyextSemArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore))) else: for idx in range(len(extSemArray)): cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* cyparamsArray = NULL - if len(paramsArray) > 0: + if len(paramsArray) > 1: cyparamsArray = calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)) if cyparamsArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))) for idx in range(len(paramsArray)): string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - err = cydriver.cuSignalExternalSemaphoresAsync((extSemArray[0])._pvt_ptr if len(extSemArray) == 1 else cyextSemArray, (paramsArray[0])._pvt_ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream) - if cyextSemArray is not NULL: + with nogil: + err = cydriver.cuSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + if len(extSemArray) > 1 and cyextSemArray is not NULL: free(cyextSemArray) - if cyparamsArray is not NULL: + if len(paramsArray) > 1 and cyparamsArray is not NULL: free(cyparamsArray) return (_dict_CUresult[err],) {{endif}} @@ -38622,26 +38902,31 @@ def cuWaitExternalSemaphoresAsync(extSemArray : Optional[Tuple[CUexternalSemapho if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cydriver.CUexternalSemaphore,] or List[cydriver.CUexternalSemaphore,]") cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL - if len(extSemArray) > 0: + if len(extSemArray) > 1: cyextSemArray = calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore)) if cyextSemArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore))) else: for idx in range(len(extSemArray)): cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* cyparamsArray = NULL - if len(paramsArray) > 0: + if len(paramsArray) > 1: cyparamsArray = calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)) if cyparamsArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))) for idx in range(len(paramsArray)): string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - err = cydriver.cuWaitExternalSemaphoresAsync((extSemArray[0])._pvt_ptr if len(extSemArray) == 1 else cyextSemArray, (paramsArray[0])._pvt_ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream) - if cyextSemArray is not NULL: + with nogil: + err = cydriver.cuWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + if len(extSemArray) > 1 and cyextSemArray is not NULL: free(cyextSemArray) - if cyparamsArray is not NULL: + if len(paramsArray) > 1 and cyparamsArray is not NULL: free(cyparamsArray) return (_dict_CUresult[err],) {{endif}} @@ -38678,7 +38963,8 @@ def cuDestroyExternalSemaphore(extSem): else: pextSem = int(CUexternalSemaphore(extSem)) cyextSem = pextSem - err = cydriver.cuDestroyExternalSemaphore(cyextSem) + with nogil: + err = cydriver.cuDestroyExternalSemaphore(cyextSem) return (_dict_CUresult[err],) {{endif}} @@ -38751,7 +39037,8 @@ def cuStreamWaitValue32(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream - err = cydriver.cuStreamWaitValue32(cystream, cyaddr, cyvalue, flags) + with nogil: + err = cydriver.cuStreamWaitValue32(cystream, cyaddr, cyvalue, flags) return (_dict_CUresult[err],) {{endif}} @@ -38822,7 +39109,8 @@ def cuStreamWaitValue64(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream - err = cydriver.cuStreamWaitValue64(cystream, cyaddr, cyvalue, flags) + with nogil: + err = cydriver.cuStreamWaitValue64(cystream, cyaddr, cyvalue, flags) return (_dict_CUresult[err],) {{endif}} @@ -38883,7 +39171,8 @@ def cuStreamWriteValue32(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream - err = cydriver.cuStreamWriteValue32(cystream, cyaddr, cyvalue, flags) + with nogil: + err = cydriver.cuStreamWriteValue32(cystream, cyaddr, cyvalue, flags) return (_dict_CUresult[err],) {{endif}} @@ -38946,7 +39235,8 @@ def cuStreamWriteValue64(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream - err = cydriver.cuStreamWriteValue64(cystream, cyaddr, cyvalue, flags) + with nogil: + err = cydriver.cuStreamWriteValue64(cystream, cyaddr, cyvalue, flags) return (_dict_CUresult[err],) {{endif}} @@ -39008,14 +39298,17 @@ def cuStreamBatchMemOp(stream, unsigned int count, paramArray : Optional[Tuple[C cystream = pstream if count > len(paramArray): raise RuntimeError("List is too small: " + str(len(paramArray)) + " < " + str(count)) cdef cydriver.CUstreamBatchMemOpParams* cyparamArray = NULL - if len(paramArray) > 0: + if len(paramArray) > 1: cyparamArray = calloc(len(paramArray), sizeof(cydriver.CUstreamBatchMemOpParams)) if cyparamArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramArray)) + 'x' + str(sizeof(cydriver.CUstreamBatchMemOpParams))) for idx in range(len(paramArray)): string.memcpy(&cyparamArray[idx], (paramArray[idx])._pvt_ptr, sizeof(cydriver.CUstreamBatchMemOpParams)) - err = cydriver.cuStreamBatchMemOp(cystream, count, (paramArray[0])._pvt_ptr if len(paramArray) == 1 else cyparamArray, flags) - if cyparamArray is not NULL: + elif len(paramArray) == 1: + cyparamArray = (paramArray[0])._pvt_ptr + with nogil: + err = cydriver.cuStreamBatchMemOp(cystream, count, cyparamArray, flags) + if len(paramArray) > 1 and cyparamArray is not NULL: free(cyparamArray) return (_dict_CUresult[err],) {{endif}} @@ -39149,7 +39442,8 @@ def cuFuncGetAttribute(attrib not None : CUfunction_attribute, hfunc): cyhfunc = phfunc cdef int pi = 0 cdef cydriver.CUfunction_attribute cyattrib = attrib.value - err = cydriver.cuFuncGetAttribute(&pi, cyattrib, cyhfunc) + with nogil: + err = cydriver.cuFuncGetAttribute(&pi, cyattrib, cyhfunc) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pi) @@ -39244,7 +39538,8 @@ def cuFuncSetAttribute(hfunc, attrib not None : CUfunction_attribute, int value) phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc cdef cydriver.CUfunction_attribute cyattrib = attrib.value - err = cydriver.cuFuncSetAttribute(cyhfunc, cyattrib, value) + with nogil: + err = cydriver.cuFuncSetAttribute(cyhfunc, cyattrib, value) return (_dict_CUresult[err],) {{endif}} @@ -39309,7 +39604,8 @@ def cuFuncSetCacheConfig(hfunc, config not None : CUfunc_cache): phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc cdef cydriver.CUfunc_cache cyconfig = config.value - err = cydriver.cuFuncSetCacheConfig(cyhfunc, cyconfig) + with nogil: + err = cydriver.cuFuncSetCacheConfig(cyhfunc, cyconfig) return (_dict_CUresult[err],) {{endif}} @@ -39350,7 +39646,8 @@ def cuFuncGetModule(hfunc): phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc cdef CUmodule hmod = CUmodule() - err = cydriver.cuFuncGetModule(hmod._pvt_ptr, cyhfunc) + with nogil: + err = cydriver.cuFuncGetModule(hmod._pvt_ptr, cyhfunc) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], hmod) @@ -39391,7 +39688,8 @@ def cuFuncGetName(hfunc): phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc cdef const char* name = NULL - err = cydriver.cuFuncGetName(&name, cyhfunc) + with nogil: + err = cydriver.cuFuncGetName(&name, cyhfunc) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], name if name != NULL else None) @@ -39445,7 +39743,8 @@ def cuFuncGetParamInfo(func, size_t paramIndex): cyfunc = pfunc cdef size_t paramOffset = 0 cdef size_t paramSize = 0 - err = cydriver.cuFuncGetParamInfo(cyfunc, paramIndex, ¶mOffset, ¶mSize) + with nogil: + err = cydriver.cuFuncGetParamInfo(cyfunc, paramIndex, ¶mOffset, ¶mSize) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], paramOffset, paramSize) @@ -39484,7 +39783,8 @@ def cuFuncIsLoaded(function): pfunction = int(CUfunction(function)) cyfunction = pfunction cdef cydriver.CUfunctionLoadingState state - err = cydriver.cuFuncIsLoaded(&state, cyfunction) + with nogil: + err = cydriver.cuFuncIsLoaded(&state, cyfunction) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUfunctionLoadingState(state)) @@ -39521,7 +39821,8 @@ def cuFuncLoad(function): else: pfunction = int(CUfunction(function)) cyfunction = pfunction - err = cydriver.cuFuncLoad(cyfunction) + with nogil: + err = cydriver.cuFuncLoad(cyfunction) return (_dict_CUresult[err],) {{endif}} @@ -39652,7 +39953,9 @@ def cuLaunchKernel(f, unsigned int gridDimX, unsigned int gridDimY, unsigned int pf = int(CUfunction(f)) cyf = pf cykernelParams = utils.HelperKernelParams(kernelParams) - err = cydriver.cuLaunchKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, cykernelParams.ckernelParams, extra) + cdef void** cykernelParams_ptr = cykernelParams.ckernelParams + with nogil: + err = cydriver.cuLaunchKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, cykernelParams_ptr, extra) return (_dict_CUresult[err],) {{endif}} @@ -39891,7 +40194,9 @@ def cuLaunchKernelEx(config : Optional[CUlaunchConfig], f, kernelParams, void_pt cyf = pf cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._pvt_ptr if config != None else NULL cykernelParams = utils.HelperKernelParams(kernelParams) - err = cydriver.cuLaunchKernelEx(cyconfig_ptr, cyf, cykernelParams.ckernelParams, extra) + cdef void** cykernelParams_ptr = cykernelParams.ckernelParams + with nogil: + err = cydriver.cuLaunchKernelEx(cyconfig_ptr, cyf, cykernelParams_ptr, extra) return (_dict_CUresult[err],) {{endif}} @@ -40002,7 +40307,9 @@ def cuLaunchCooperativeKernel(f, unsigned int gridDimX, unsigned int gridDimY, u pf = int(CUfunction(f)) cyf = pf cykernelParams = utils.HelperKernelParams(kernelParams) - err = cydriver.cuLaunchCooperativeKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, cykernelParams.ckernelParams) + cdef void** cykernelParams_ptr = cykernelParams.ckernelParams + with nogil: + err = cydriver.cuLaunchCooperativeKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, cykernelParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -40164,15 +40471,18 @@ def cuLaunchCooperativeKernelMultiDevice(launchParamsList : Optional[Tuple[CUDA_ if not all(isinstance(_x, (CUDA_LAUNCH_PARAMS,)) for _x in launchParamsList): raise TypeError("Argument 'launchParamsList' is not instance of type (expected Tuple[cydriver.CUDA_LAUNCH_PARAMS,] or List[cydriver.CUDA_LAUNCH_PARAMS,]") cdef cydriver.CUDA_LAUNCH_PARAMS* cylaunchParamsList = NULL - if len(launchParamsList) > 0: + if len(launchParamsList) > 1: cylaunchParamsList = calloc(len(launchParamsList), sizeof(cydriver.CUDA_LAUNCH_PARAMS)) if cylaunchParamsList is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(launchParamsList)) + 'x' + str(sizeof(cydriver.CUDA_LAUNCH_PARAMS))) for idx in range(len(launchParamsList)): string.memcpy(&cylaunchParamsList[idx], (launchParamsList[idx])._pvt_ptr, sizeof(cydriver.CUDA_LAUNCH_PARAMS)) + elif len(launchParamsList) == 1: + cylaunchParamsList = (launchParamsList[0])._pvt_ptr if numDevices > len(launchParamsList): raise RuntimeError("List is too small: " + str(len(launchParamsList)) + " < " + str(numDevices)) - err = cydriver.cuLaunchCooperativeKernelMultiDevice((launchParamsList[0])._pvt_ptr if len(launchParamsList) == 1 else cylaunchParamsList, numDevices, flags) - if cylaunchParamsList is not NULL: + with nogil: + err = cydriver.cuLaunchCooperativeKernelMultiDevice(cylaunchParamsList, numDevices, flags) + if len(launchParamsList) > 1 and cylaunchParamsList is not NULL: free(cylaunchParamsList) return (_dict_CUresult[err],) {{endif}} @@ -40281,7 +40591,6 @@ def cuLaunchHostFunc(hStream, fn, userData): with nogil: err = cydriver.cuLaunchHostFunc(cyhStream, cuHostCallbackWrapper, cbData) - if err != cydriver.CUDA_SUCCESS: free(cbData) return (_dict_CUresult[err],) @@ -40326,7 +40635,8 @@ def cuFuncSetBlockShape(hfunc, int x, int y, int z): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - err = cydriver.cuFuncSetBlockShape(cyhfunc, x, y, z) + with nogil: + err = cydriver.cuFuncSetBlockShape(cyhfunc, x, y, z) return (_dict_CUresult[err],) {{endif}} @@ -40366,7 +40676,8 @@ def cuFuncSetSharedSize(hfunc, unsigned int numbytes): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - err = cydriver.cuFuncSetSharedSize(cyhfunc, numbytes) + with nogil: + err = cydriver.cuFuncSetSharedSize(cyhfunc, numbytes) return (_dict_CUresult[err],) {{endif}} @@ -40405,7 +40716,8 @@ def cuParamSetSize(hfunc, unsigned int numbytes): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - err = cydriver.cuParamSetSize(cyhfunc, numbytes) + with nogil: + err = cydriver.cuParamSetSize(cyhfunc, numbytes) return (_dict_CUresult[err],) {{endif}} @@ -40447,7 +40759,8 @@ def cuParamSeti(hfunc, int offset, unsigned int value): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - err = cydriver.cuParamSeti(cyhfunc, offset, value) + with nogil: + err = cydriver.cuParamSeti(cyhfunc, offset, value) return (_dict_CUresult[err],) {{endif}} @@ -40489,7 +40802,8 @@ def cuParamSetf(hfunc, int offset, float value): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - err = cydriver.cuParamSetf(cyhfunc, offset, value) + with nogil: + err = cydriver.cuParamSetf(cyhfunc, offset, value) return (_dict_CUresult[err],) {{endif}} @@ -40535,7 +40849,8 @@ def cuParamSetv(hfunc, int offset, ptr, unsigned int numbytes): cyhfunc = phfunc cyptr = utils.HelperInputVoidPtr(ptr) cdef void* cyptr_ptr = cyptr.cptr - err = cydriver.cuParamSetv(cyhfunc, offset, cyptr_ptr, numbytes) + with nogil: + err = cydriver.cuParamSetv(cyhfunc, offset, cyptr_ptr, numbytes) return (_dict_CUresult[err],) {{endif}} @@ -40585,7 +40900,8 @@ def cuLaunch(f): else: pf = int(CUfunction(f)) cyf = pf - err = cydriver.cuLaunch(cyf) + with nogil: + err = cydriver.cuLaunch(cyf) return (_dict_CUresult[err],) {{endif}} @@ -40639,7 +40955,8 @@ def cuLaunchGrid(f, int grid_width, int grid_height): else: pf = int(CUfunction(f)) cyf = pf - err = cydriver.cuLaunchGrid(cyf, grid_width, grid_height) + with nogil: + err = cydriver.cuLaunchGrid(cyf, grid_width, grid_height) return (_dict_CUresult[err],) {{endif}} @@ -40709,7 +41026,8 @@ def cuLaunchGridAsync(f, int grid_width, int grid_height, hStream): else: pf = int(CUfunction(f)) cyf = pf - err = cydriver.cuLaunchGridAsync(cyf, grid_width, grid_height, cyhStream) + with nogil: + err = cydriver.cuLaunchGridAsync(cyf, grid_width, grid_height, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -40757,7 +41075,8 @@ def cuParamSetTexRef(hfunc, int texunit, hTexRef): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - err = cydriver.cuParamSetTexRef(cyhfunc, texunit, cyhTexRef) + with nogil: + err = cydriver.cuParamSetTexRef(cyhfunc, texunit, cyhTexRef) return (_dict_CUresult[err],) {{endif}} @@ -40829,7 +41148,8 @@ def cuFuncSetSharedMemConfig(hfunc, config not None : CUsharedconfig): phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc cdef cydriver.CUsharedconfig cyconfig = config.value - err = cydriver.cuFuncSetSharedMemConfig(cyhfunc, cyconfig) + with nogil: + err = cydriver.cuFuncSetSharedMemConfig(cyhfunc, cyconfig) return (_dict_CUresult[err],) {{endif}} @@ -40858,7 +41178,8 @@ def cuGraphCreate(unsigned int flags): :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphDestroy`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphClone` """ cdef CUgraph phGraph = CUgraph() - err = cydriver.cuGraphCreate(phGraph._pvt_ptr, flags) + with nogil: + err = cydriver.cuGraphCreate(phGraph._pvt_ptr, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phGraph) @@ -40968,17 +41289,20 @@ def cuGraphAddKernelNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | Li cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddKernelNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddKernelNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41027,7 +41351,8 @@ def cuGraphKernelNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_KERNEL_NODE_PARAMS nodeParams = CUDA_KERNEL_NODE_PARAMS() - err = cydriver.cuGraphKernelNodeGetParams(cyhNode, nodeParams._pvt_ptr) + with nogil: + err = cydriver.cuGraphKernelNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], nodeParams) @@ -41066,7 +41391,8 @@ def cuGraphKernelNodeSetParams(hNode, nodeParams : Optional[CUDA_KERNEL_NODE_PAR phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphKernelNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphKernelNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -41141,17 +41467,20 @@ def cuGraphAddMemcpyNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | Li cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr = copyParams._pvt_ptr if copyParams != None else NULL - err = cydriver.cuGraphAddMemcpyNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cycopyParams_ptr, cyctx) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddMemcpyNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cycopyParams_ptr, cyctx) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41191,7 +41520,8 @@ def cuGraphMemcpyNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_MEMCPY3D nodeParams = CUDA_MEMCPY3D() - err = cydriver.cuGraphMemcpyNodeGetParams(cyhNode, nodeParams._pvt_ptr) + with nogil: + err = cydriver.cuGraphMemcpyNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], nodeParams) @@ -41230,7 +41560,8 @@ def cuGraphMemcpyNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMCPY3D]): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUDA_MEMCPY3D* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphMemcpyNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphMemcpyNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -41295,17 +41626,20 @@ def cuGraphAddMemsetNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | Li cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr = memsetParams._pvt_ptr if memsetParams != None else NULL - err = cydriver.cuGraphAddMemsetNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cymemsetParams_ptr, cyctx) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddMemsetNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cymemsetParams_ptr, cyctx) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41345,7 +41679,8 @@ def cuGraphMemsetNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_MEMSET_NODE_PARAMS nodeParams = CUDA_MEMSET_NODE_PARAMS() - err = cydriver.cuGraphMemsetNodeGetParams(cyhNode, nodeParams._pvt_ptr) + with nogil: + err = cydriver.cuGraphMemsetNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], nodeParams) @@ -41384,7 +41719,8 @@ def cuGraphMemsetNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMSET_NODE_PAR phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphMemsetNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphMemsetNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -41439,17 +41775,20 @@ def cuGraphAddHostNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddHostNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddHostNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41489,7 +41828,8 @@ def cuGraphHostNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_HOST_NODE_PARAMS nodeParams = CUDA_HOST_NODE_PARAMS() - err = cydriver.cuGraphHostNodeGetParams(cyhNode, nodeParams._pvt_ptr) + with nogil: + err = cydriver.cuGraphHostNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], nodeParams) @@ -41528,7 +41868,8 @@ def cuGraphHostNodeSetParams(hNode, nodeParams : Optional[CUDA_HOST_NODE_PARAMS] phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphHostNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphHostNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -41594,16 +41935,19 @@ def cuGraphAddChildGraphNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - err = cydriver.cuGraphAddChildGraphNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cychildGraph) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddChildGraphNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cychildGraph) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41648,7 +41992,8 @@ def cuGraphChildGraphNodeGetGraph(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUgraph phGraph = CUgraph() - err = cydriver.cuGraphChildGraphNodeGetGraph(cyhNode, phGraph._pvt_ptr) + with nogil: + err = cydriver.cuGraphChildGraphNodeGetGraph(cyhNode, phGraph._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phGraph) @@ -41706,16 +42051,19 @@ def cuGraphAddEmptyNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | Lis cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - err = cydriver.cuGraphAddEmptyNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddEmptyNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41781,16 +42129,19 @@ def cuGraphAddEventRecordNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - err = cydriver.cuGraphAddEventRecordNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cyevent) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddEventRecordNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cyevent) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41830,7 +42181,8 @@ def cuGraphEventRecordNodeGetEvent(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUevent event_out = CUevent() - err = cydriver.cuGraphEventRecordNodeGetEvent(cyhNode, event_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphEventRecordNodeGetEvent(cyhNode, event_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], event_out) @@ -41876,7 +42228,8 @@ def cuGraphEventRecordNodeSetEvent(hNode, event): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - err = cydriver.cuGraphEventRecordNodeSetEvent(cyhNode, cyevent) + with nogil: + err = cydriver.cuGraphEventRecordNodeSetEvent(cyhNode, cyevent) return (_dict_CUresult[err],) {{endif}} @@ -41941,16 +42294,19 @@ def cuGraphAddEventWaitNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - err = cydriver.cuGraphAddEventWaitNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cyevent) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddEventWaitNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cyevent) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -41990,7 +42346,8 @@ def cuGraphEventWaitNodeGetEvent(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUevent event_out = CUevent() - err = cydriver.cuGraphEventWaitNodeGetEvent(cyhNode, event_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphEventWaitNodeGetEvent(cyhNode, event_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], event_out) @@ -42036,7 +42393,8 @@ def cuGraphEventWaitNodeSetEvent(hNode, event): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - err = cydriver.cuGraphEventWaitNodeSetEvent(cyhNode, cyevent) + with nogil: + err = cydriver.cuGraphEventWaitNodeSetEvent(cyhNode, cyevent) return (_dict_CUresult[err],) {{endif}} @@ -42092,17 +42450,20 @@ def cuGraphAddExternalSemaphoresSignalNode(hGraph, dependencies : Optional[Tuple cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddExternalSemaphoresSignalNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddExternalSemaphoresSignalNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -42148,7 +42509,8 @@ def cuGraphExternalSemaphoresSignalNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS params_out = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS() - err = cydriver.cuGraphExternalSemaphoresSignalNodeGetParams(cyhNode, params_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphExternalSemaphoresSignalNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], params_out) @@ -42188,7 +42550,8 @@ def cuGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[CU phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -42244,17 +42607,20 @@ def cuGraphAddExternalSemaphoresWaitNode(hGraph, dependencies : Optional[Tuple[C cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddExternalSemaphoresWaitNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddExternalSemaphoresWaitNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -42300,7 +42666,8 @@ def cuGraphExternalSemaphoresWaitNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_EXT_SEM_WAIT_NODE_PARAMS params_out = CUDA_EXT_SEM_WAIT_NODE_PARAMS() - err = cydriver.cuGraphExternalSemaphoresWaitNodeGetParams(cyhNode, params_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphExternalSemaphoresWaitNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], params_out) @@ -42340,7 +42707,8 @@ def cuGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[CUDA phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -42399,17 +42767,20 @@ def cuGraphAddBatchMemOpNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddBatchMemOpNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddBatchMemOpNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -42454,7 +42825,8 @@ def cuGraphBatchMemOpNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_BATCH_MEM_OP_NODE_PARAMS nodeParams_out = CUDA_BATCH_MEM_OP_NODE_PARAMS() - err = cydriver.cuGraphBatchMemOpNodeGetParams(cyhNode, nodeParams_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphBatchMemOpNodeGetParams(cyhNode, nodeParams_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], nodeParams_out) @@ -42496,7 +42868,8 @@ def cuGraphBatchMemOpNodeSetParams(hNode, nodeParams : Optional[CUDA_BATCH_MEM_O phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphBatchMemOpNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphBatchMemOpNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -42566,7 +42939,8 @@ def cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode, nodeParams : Optional[ phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExecBatchMemOpNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExecBatchMemOpNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -42662,17 +43036,20 @@ def cuGraphAddMemAllocNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddMemAllocNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddMemAllocNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -42715,7 +43092,8 @@ def cuGraphMemAllocNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUDA_MEM_ALLOC_NODE_PARAMS params_out = CUDA_MEM_ALLOC_NODE_PARAMS() - err = cydriver.cuGraphMemAllocNodeGetParams(cyhNode, params_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphMemAllocNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], params_out) @@ -42798,16 +43176,19 @@ def cuGraphAddMemFreeNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | L cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - err = cydriver.cuGraphAddMemFreeNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cydptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddMemFreeNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cydptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -42847,7 +43228,8 @@ def cuGraphMemFreeNodeGetParams(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef CUdeviceptr dptr_out = CUdeviceptr() - err = cydriver.cuGraphMemFreeNodeGetParams(cyhNode, dptr_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphMemFreeNodeGetParams(cyhNode, dptr_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], dptr_out) @@ -42885,7 +43267,8 @@ def cuDeviceGraphMemTrim(device): else: pdevice = int(CUdevice(device)) cydevice = pdevice - err = cydriver.cuDeviceGraphMemTrim(cydevice) + with nogil: + err = cydriver.cuDeviceGraphMemTrim(cydevice) return (_dict_CUresult[err],) {{endif}} @@ -42941,7 +43324,8 @@ def cuDeviceGetGraphMemAttribute(device, attr not None : CUgraphMem_attribute): cdef cydriver.CUgraphMem_attribute cyattr = attr.value cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, 0, is_getter=True) cdef void* cyvalue_ptr = cyvalue.cptr - err = cydriver.cuDeviceGetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr) + with nogil: + err = cydriver.cuDeviceGetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cyvalue.pyObj()) @@ -42992,7 +43376,8 @@ def cuDeviceSetGraphMemAttribute(device, attr not None : CUgraphMem_attribute, v cdef cydriver.CUgraphMem_attribute cyattr = attr.value cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, value, is_getter=False) cdef void* cyvalue_ptr = cyvalue.cptr - err = cydriver.cuDeviceSetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr) + with nogil: + err = cydriver.cuDeviceSetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr) return (_dict_CUresult[err],) {{endif}} @@ -43039,7 +43424,8 @@ def cuGraphClone(originalGraph): poriginalGraph = int(CUgraph(originalGraph)) cyoriginalGraph = poriginalGraph cdef CUgraph phGraphClone = CUgraph() - err = cydriver.cuGraphClone(phGraphClone._pvt_ptr, cyoriginalGraph) + with nogil: + err = cydriver.cuGraphClone(phGraphClone._pvt_ptr, cyoriginalGraph) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phGraphClone) @@ -43095,7 +43481,8 @@ def cuGraphNodeFindInClone(hOriginalNode, hClonedGraph): phOriginalNode = int(CUgraphNode(hOriginalNode)) cyhOriginalNode = phOriginalNode cdef CUgraphNode phNode = CUgraphNode() - err = cydriver.cuGraphNodeFindInClone(phNode._pvt_ptr, cyhOriginalNode, cyhClonedGraph) + with nogil: + err = cydriver.cuGraphNodeFindInClone(phNode._pvt_ptr, cyhOriginalNode, cyhClonedGraph) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phNode) @@ -43134,7 +43521,8 @@ def cuGraphNodeGetType(hNode): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUgraphNodeType typename - err = cydriver.cuGraphNodeGetType(cyhNode, &typename) + with nogil: + err = cydriver.cuGraphNodeGetType(cyhNode, &typename) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUgraphNodeType(typename)) @@ -43188,7 +43576,8 @@ def cuGraphGetNodes(hGraph, size_t numNodes = 0): cynodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) if cynodes is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - err = cydriver.cuGraphGetNodes(cyhGraph, cynodes, &numNodes) + with nogil: + err = cydriver.cuGraphGetNodes(cyhGraph, cynodes, &numNodes) if CUresult(err) == CUresult(0): pynodes = [CUgraphNode(init_value=cynodes[idx]) for idx in range(_graph_length)] if cynodes is not NULL: @@ -43246,7 +43635,8 @@ def cuGraphGetRootNodes(hGraph, size_t numRootNodes = 0): cyrootNodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) if cyrootNodes is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - err = cydriver.cuGraphGetRootNodes(cyhGraph, cyrootNodes, &numRootNodes) + with nogil: + err = cydriver.cuGraphGetRootNodes(cyhGraph, cyrootNodes, &numRootNodes) if CUresult(err) == CUresult(0): pyrootNodes = [CUgraphNode(init_value=cyrootNodes[idx]) for idx in range(_graph_length)] if cyrootNodes is not NULL: @@ -43314,7 +43704,8 @@ def cuGraphGetEdges(hGraph, size_t numEdges = 0): cyto = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - err = cydriver.cuGraphGetEdges(cyhGraph, cyfrom_, cyto, &numEdges) + with nogil: + err = cydriver.cuGraphGetEdges(cyhGraph, cyfrom_, cyto, &numEdges) if CUresult(err) == CUresult(0): pyfrom_ = [CUgraphNode(init_value=cyfrom_[idx]) for idx in range(_graph_length)] if cyfrom_ is not NULL: @@ -43399,7 +43790,8 @@ def cuGraphGetEdges_v2(hGraph, size_t numEdges = 0): cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - err = cydriver.cuGraphGetEdges_v2(cyhGraph, cyfrom_, cyto, cyedgeData, &numEdges) + with nogil: + err = cydriver.cuGraphGetEdges_v2(cyhGraph, cyfrom_, cyto, cyedgeData, &numEdges) if CUresult(err) == CUresult(0): pyfrom_ = [CUgraphNode(init_value=cyfrom_[idx]) for idx in range(_graph_length)] if cyfrom_ is not NULL: @@ -43466,7 +43858,8 @@ def cuGraphNodeGetDependencies(hNode, size_t numDependencies = 0): cydependencies = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - err = cydriver.cuGraphNodeGetDependencies(cyhNode, cydependencies, &numDependencies) + with nogil: + err = cydriver.cuGraphNodeGetDependencies(cyhNode, cydependencies, &numDependencies) if CUresult(err) == CUresult(0): pydependencies = [CUgraphNode(init_value=cydependencies[idx]) for idx in range(_graph_length)] if cydependencies is not NULL: @@ -43538,7 +43931,8 @@ def cuGraphNodeGetDependencies_v2(hNode, size_t numDependencies = 0): cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - err = cydriver.cuGraphNodeGetDependencies_v2(cyhNode, cydependencies, cyedgeData, &numDependencies) + with nogil: + err = cydriver.cuGraphNodeGetDependencies_v2(cyhNode, cydependencies, cyedgeData, &numDependencies) if CUresult(err) == CUresult(0): pydependencies = [CUgraphNode(init_value=cydependencies[idx]) for idx in range(_graph_length)] if cydependencies is not NULL: @@ -43601,7 +43995,8 @@ def cuGraphNodeGetDependentNodes(hNode, size_t numDependentNodes = 0): cydependentNodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) if cydependentNodes is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - err = cydriver.cuGraphNodeGetDependentNodes(cyhNode, cydependentNodes, &numDependentNodes) + with nogil: + err = cydriver.cuGraphNodeGetDependentNodes(cyhNode, cydependentNodes, &numDependentNodes) if CUresult(err) == CUresult(0): pydependentNodes = [CUgraphNode(init_value=cydependentNodes[idx]) for idx in range(_graph_length)] if cydependentNodes is not NULL: @@ -43673,7 +44068,8 @@ def cuGraphNodeGetDependentNodes_v2(hNode, size_t numDependentNodes = 0): cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - err = cydriver.cuGraphNodeGetDependentNodes_v2(cyhNode, cydependentNodes, cyedgeData, &numDependentNodes) + with nogil: + err = cydriver.cuGraphNodeGetDependentNodes_v2(cyhNode, cydependentNodes, cyedgeData, &numDependentNodes) if CUresult(err) == CUresult(0): pydependentNodes = [CUgraphNode(init_value=cydependentNodes[idx]) for idx in range(_graph_length)] if cydependentNodes is not NULL: @@ -43735,25 +44131,30 @@ def cuGraphAddDependencies(hGraph, from_ : Optional[Tuple[CUgraphNode] | List[CU phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph cdef cydriver.CUgraphNode* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cydriver.CUgraphNode* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] - err = cydriver.cuGraphAddDependencies(cyhGraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, numDependencies) - if cyfrom_ is not NULL: + elif len(to) == 1: + cyto = (to[0])._pvt_ptr + with nogil: + err = cydriver.cuGraphAddDependencies(cyhGraph, cyfrom_, cyto, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) return (_dict_CUresult[err],) {{endif}} @@ -43812,34 +44213,41 @@ def cuGraphAddDependencies_v2(hGraph, from_ : Optional[Tuple[CUgraphNode] | List phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph cdef cydriver.CUgraphNode* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cydriver.CUgraphNode* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cyedgeData = NULL - if len(edgeData) > 0: + if len(edgeData) > 1: cyedgeData = calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) for idx in range(len(edgeData)): string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - err = cydriver.cuGraphAddDependencies_v2(cyhGraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, (edgeData[0])._pvt_ptr if len(edgeData) == 1 else cyedgeData, numDependencies) - if cyfrom_ is not NULL: + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cydriver.cuGraphAddDependencies_v2(cyhGraph, cyfrom_, cyto, cyedgeData, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) - if cyedgeData is not NULL: + if len(edgeData) > 1 and cyedgeData is not NULL: free(cyedgeData) return (_dict_CUresult[err],) {{endif}} @@ -43896,25 +44304,30 @@ def cuGraphRemoveDependencies(hGraph, from_ : Optional[Tuple[CUgraphNode] | List phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph cdef cydriver.CUgraphNode* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cydriver.CUgraphNode* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] - err = cydriver.cuGraphRemoveDependencies(cyhGraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, numDependencies) - if cyfrom_ is not NULL: + elif len(to) == 1: + cyto = (to[0])._pvt_ptr + with nogil: + err = cydriver.cuGraphRemoveDependencies(cyhGraph, cyfrom_, cyto, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) return (_dict_CUresult[err],) {{endif}} @@ -43979,34 +44392,41 @@ def cuGraphRemoveDependencies_v2(hGraph, from_ : Optional[Tuple[CUgraphNode] | L phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph cdef cydriver.CUgraphNode* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cydriver.CUgraphNode* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cyedgeData = NULL - if len(edgeData) > 0: + if len(edgeData) > 1: cyedgeData = calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) for idx in range(len(edgeData)): string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - err = cydriver.cuGraphRemoveDependencies_v2(cyhGraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, (edgeData[0])._pvt_ptr if len(edgeData) == 1 else cyedgeData, numDependencies) - if cyfrom_ is not NULL: + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cydriver.cuGraphRemoveDependencies_v2(cyhGraph, cyfrom_, cyto, cyedgeData, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) - if cyedgeData is not NULL: + if len(edgeData) > 1 and cyedgeData is not NULL: free(cyedgeData) return (_dict_CUresult[err],) {{endif}} @@ -44045,7 +44465,8 @@ def cuGraphDestroyNode(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - err = cydriver.cuGraphDestroyNode(cyhNode) + with nogil: + err = cydriver.cuGraphDestroyNode(cyhNode) return (_dict_CUresult[err],) {{endif}} @@ -44147,7 +44568,8 @@ def cuGraphInstantiate(hGraph, unsigned long long flags): phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph cdef CUgraphExec phGraphExec = CUgraphExec() - err = cydriver.cuGraphInstantiate(phGraphExec._pvt_ptr, cyhGraph, flags) + with nogil: + err = cydriver.cuGraphInstantiate(phGraphExec._pvt_ptr, cyhGraph, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phGraphExec) @@ -44292,7 +44714,8 @@ def cuGraphInstantiateWithParams(hGraph, instantiateParams : Optional[CUDA_GRAPH cyhGraph = phGraph cdef CUgraphExec phGraphExec = CUgraphExec() cdef cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS* cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams != None else NULL - err = cydriver.cuGraphInstantiateWithParams(phGraphExec._pvt_ptr, cyhGraph, cyinstantiateParams_ptr) + with nogil: + err = cydriver.cuGraphInstantiateWithParams(phGraphExec._pvt_ptr, cyhGraph, cyinstantiateParams_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phGraphExec) @@ -44334,7 +44757,8 @@ def cuGraphExecGetFlags(hGraphExec): phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cuuint64_t flags = cuuint64_t() - err = cydriver.cuGraphExecGetFlags(cyhGraphExec, flags._pvt_ptr) + with nogil: + err = cydriver.cuGraphExecGetFlags(cyhGraphExec, flags._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], flags) @@ -44416,7 +44840,8 @@ def cuGraphExecKernelNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExecKernelNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExecKernelNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -44490,7 +44915,8 @@ def cuGraphExecMemcpyNodeSetParams(hGraphExec, hNode, copyParams : Optional[CUDA phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr = copyParams._pvt_ptr if copyParams != None else NULL - err = cydriver.cuGraphExecMemcpyNodeSetParams(cyhGraphExec, cyhNode, cycopyParams_ptr, cyctx) + with nogil: + err = cydriver.cuGraphExecMemcpyNodeSetParams(cyhGraphExec, cyhNode, cycopyParams_ptr, cyctx) return (_dict_CUresult[err],) {{endif}} @@ -44569,7 +44995,8 @@ def cuGraphExecMemsetNodeSetParams(hGraphExec, hNode, memsetParams : Optional[CU phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr = memsetParams._pvt_ptr if memsetParams != None else NULL - err = cydriver.cuGraphExecMemsetNodeSetParams(cyhGraphExec, cyhNode, cymemsetParams_ptr, cyctx) + with nogil: + err = cydriver.cuGraphExecMemsetNodeSetParams(cyhGraphExec, cyhNode, cymemsetParams_ptr, cyctx) return (_dict_CUresult[err],) {{endif}} @@ -44623,7 +45050,8 @@ def cuGraphExecHostNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_H phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExecHostNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExecHostNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -44692,7 +45120,8 @@ def cuGraphExecChildGraphNodeSetParams(hGraphExec, hNode, childGraph): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - err = cydriver.cuGraphExecChildGraphNodeSetParams(cyhGraphExec, cyhNode, cychildGraph) + with nogil: + err = cydriver.cuGraphExecChildGraphNodeSetParams(cyhGraphExec, cyhNode, cychildGraph) return (_dict_CUresult[err],) {{endif}} @@ -44754,7 +45183,8 @@ def cuGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - err = cydriver.cuGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent) + with nogil: + err = cydriver.cuGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_dict_CUresult[err],) {{endif}} @@ -44816,7 +45246,8 @@ def cuGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - err = cydriver.cuGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent) + with nogil: + err = cydriver.cuGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_dict_CUresult[err],) {{endif}} @@ -44875,7 +45306,8 @@ def cuGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodePara phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -44934,7 +45366,8 @@ def cuGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -45002,7 +45435,8 @@ def cuGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - err = cydriver.cuGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled) + with nogil: + err = cydriver.cuGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled) return (_dict_CUresult[err],) {{endif}} @@ -45060,7 +45494,8 @@ def cuGraphNodeGetEnabled(hGraphExec, hNode): phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef unsigned int isEnabled = 0 - err = cydriver.cuGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled) + with nogil: + err = cydriver.cuGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], isEnabled) @@ -45110,7 +45545,8 @@ def cuGraphUpload(hGraphExec, hStream): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - err = cydriver.cuGraphUpload(cyhGraphExec, cyhStream) + with nogil: + err = cydriver.cuGraphUpload(cyhGraphExec, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -45163,7 +45599,8 @@ def cuGraphLaunch(hGraphExec, hStream): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - err = cydriver.cuGraphLaunch(cyhGraphExec, cyhStream) + with nogil: + err = cydriver.cuGraphLaunch(cyhGraphExec, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -45199,7 +45636,8 @@ def cuGraphExecDestroy(hGraphExec): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - err = cydriver.cuGraphExecDestroy(cyhGraphExec) + with nogil: + err = cydriver.cuGraphExecDestroy(cyhGraphExec) return (_dict_CUresult[err],) {{endif}} @@ -45233,7 +45671,8 @@ def cuGraphDestroy(hGraph): else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - err = cydriver.cuGraphDestroy(cyhGraph) + with nogil: + err = cydriver.cuGraphDestroy(cyhGraph) return (_dict_CUresult[err],) {{endif}} @@ -45410,7 +45849,8 @@ def cuGraphExecUpdate(hGraphExec, hGraph): phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef CUgraphExecUpdateResultInfo resultInfo = CUgraphExecUpdateResultInfo() - err = cydriver.cuGraphExecUpdate(cyhGraphExec, cyhGraph, resultInfo._pvt_ptr) + with nogil: + err = cydriver.cuGraphExecUpdate(cyhGraphExec, cyhGraph, resultInfo._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], resultInfo) @@ -45458,7 +45898,8 @@ def cuGraphKernelNodeCopyAttributes(dst, src): else: pdst = int(CUgraphNode(dst)) cydst = pdst - err = cydriver.cuGraphKernelNodeCopyAttributes(cydst, cysrc) + with nogil: + err = cydriver.cuGraphKernelNodeCopyAttributes(cydst, cysrc) return (_dict_CUresult[err],) {{endif}} @@ -45499,7 +45940,8 @@ def cuGraphKernelNodeGetAttribute(hNode, attr not None : CUkernelNodeAttrID): cyhNode = phNode cdef cydriver.CUkernelNodeAttrID cyattr = attr.value cdef CUkernelNodeAttrValue value_out = CUkernelNodeAttrValue() - err = cydriver.cuGraphKernelNodeGetAttribute(cyhNode, cyattr, value_out._pvt_ptr) + with nogil: + err = cydriver.cuGraphKernelNodeGetAttribute(cyhNode, cyattr, value_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], value_out) @@ -45542,7 +45984,8 @@ def cuGraphKernelNodeSetAttribute(hNode, attr not None : CUkernelNodeAttrID, val cyhNode = phNode cdef cydriver.CUkernelNodeAttrID cyattr = attr.value cdef cydriver.CUkernelNodeAttrValue* cyvalue_ptr = value._pvt_ptr if value != None else NULL - err = cydriver.cuGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr) + with nogil: + err = cydriver.cuGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr) return (_dict_CUresult[err],) {{endif}} @@ -45581,7 +46024,8 @@ def cuGraphDebugDotPrint(hGraph, char* path, unsigned int flags): else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - err = cydriver.cuGraphDebugDotPrint(cyhGraph, path, flags) + with nogil: + err = cydriver.cuGraphDebugDotPrint(cyhGraph, path, flags) return (_dict_CUresult[err],) {{endif}} @@ -45640,7 +46084,8 @@ def cuUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned int cdef CUuserObject object_out = CUuserObject() cyptr = utils.HelperInputVoidPtr(ptr) cdef void* cyptr_ptr = cyptr.cptr - err = cydriver.cuUserObjectCreate(object_out._pvt_ptr, cyptr_ptr, cydestroy, initialRefcount, flags) + with nogil: + err = cydriver.cuUserObjectCreate(object_out._pvt_ptr, cyptr_ptr, cydestroy, initialRefcount, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], object_out) @@ -45683,7 +46128,8 @@ def cuUserObjectRetain(object, unsigned int count): else: pobject = int(CUuserObject(object)) cyobject = pobject - err = cydriver.cuUserObjectRetain(cyobject, count) + with nogil: + err = cydriver.cuUserObjectRetain(cyobject, count) return (_dict_CUresult[err],) {{endif}} @@ -45727,7 +46173,8 @@ def cuUserObjectRelease(object, unsigned int count): else: pobject = int(CUuserObject(object)) cyobject = pobject - err = cydriver.cuUserObjectRelease(cyobject, count) + with nogil: + err = cydriver.cuUserObjectRelease(cyobject, count) return (_dict_CUresult[err],) {{endif}} @@ -45782,7 +46229,8 @@ def cuGraphRetainUserObject(graph, object, unsigned int count, unsigned int flag else: pgraph = int(CUgraph(graph)) cygraph = pgraph - err = cydriver.cuGraphRetainUserObject(cygraph, cyobject, count, flags) + with nogil: + err = cydriver.cuGraphRetainUserObject(cygraph, cyobject, count, flags) return (_dict_CUresult[err],) {{endif}} @@ -45832,7 +46280,8 @@ def cuGraphReleaseUserObject(graph, object, unsigned int count): else: pgraph = int(CUgraph(graph)) cygraph = pgraph - err = cydriver.cuGraphReleaseUserObject(cygraph, cyobject, count) + with nogil: + err = cydriver.cuGraphReleaseUserObject(cygraph, cyobject, count) return (_dict_CUresult[err],) {{endif}} @@ -45896,17 +46345,20 @@ def cuGraphAddNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUg cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddNode(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -45979,27 +46431,32 @@ def cuGraphAddNode_v2(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[ cyhGraph = phGraph cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 0: + if len(dependencyData) > 1: cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) if cydependencyData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) for idx in range(len(dependencyData)): string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) if numDependencies > len(dependencyData): raise RuntimeError("List is too small: " + str(len(dependencyData)) + " < " + str(numDependencies)) cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphAddNode_v2(phGraphNode._pvt_ptr, cyhGraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, (dependencyData[0])._pvt_ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cynodeParams_ptr) - if cydependencies is not NULL: + with nogil: + err = cydriver.cuGraphAddNode_v2(phGraphNode._pvt_ptr, cyhGraph, cydependencies, cydependencyData, numDependencies, cynodeParams_ptr) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) - if cydependencyData is not NULL: + if len(dependencyData) > 1 and cydependencyData is not NULL: free(cydependencyData) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -46045,7 +46502,8 @@ def cuGraphNodeSetParams(hNode, nodeParams : Optional[CUgraphNodeParams]): phNode = int(CUgraphNode(hNode)) cyhNode = phNode cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -46104,7 +46562,8 @@ def cuGraphExecNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUgraphNod phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cydriver.cuGraphExecNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cydriver.cuGraphExecNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_CUresult[err],) {{endif}} @@ -46166,7 +46625,8 @@ def cuGraphConditionalHandleCreate(hGraph, ctx, unsigned int defaultLaunchValue, phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph cdef CUgraphConditionalHandle pHandle_out = CUgraphConditionalHandle() - err = cydriver.cuGraphConditionalHandleCreate(pHandle_out._pvt_ptr, cyhGraph, cyctx, defaultLaunchValue, flags) + with nogil: + err = cydriver.cuGraphConditionalHandleCreate(pHandle_out._pvt_ptr, cyhGraph, cyctx, defaultLaunchValue, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pHandle_out) @@ -46216,7 +46676,8 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dyna pfunc = int(CUfunction(func)) cyfunc = pfunc cdef int numBlocks = 0 - err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc, blockSize, dynamicSMemSize) + with nogil: + err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc, blockSize, dynamicSMemSize) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], numBlocks) @@ -46284,7 +46745,8 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, si pfunc = int(CUfunction(func)) cyfunc = pfunc cdef int numBlocks = 0 - err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc, blockSize, dynamicSMemSize, flags) + with nogil: + err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc, blockSize, dynamicSMemSize, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], numBlocks) @@ -46370,7 +46832,8 @@ def cuOccupancyMaxPotentialBlockSize(func, blockSizeToDynamicSMemSize, size_t dy cyfunc = pfunc cdef int minGridSize = 0 cdef int blockSize = 0 - err = cydriver.cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit) + with nogil: + err = cydriver.cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], minGridSize, blockSize) @@ -46455,7 +46918,8 @@ def cuOccupancyMaxPotentialBlockSizeWithFlags(func, blockSizeToDynamicSMemSize, cyfunc = pfunc cdef int minGridSize = 0 cdef int blockSize = 0 - err = cydriver.cuOccupancyMaxPotentialBlockSizeWithFlags(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags) + with nogil: + err = cydriver.cuOccupancyMaxPotentialBlockSizeWithFlags(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], minGridSize, blockSize) @@ -46501,7 +46965,8 @@ def cuOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize): pfunc = int(CUfunction(func)) cyfunc = pfunc cdef size_t dynamicSmemSize = 0 - err = cydriver.cuOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc, numBlocks, blockSize) + with nogil: + err = cydriver.cuOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc, numBlocks, blockSize) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], dynamicSmemSize) @@ -46560,7 +47025,8 @@ def cuOccupancyMaxPotentialClusterSize(func, config : Optional[CUlaunchConfig]): cyfunc = pfunc cdef int clusterSize = 0 cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._pvt_ptr if config != None else NULL - err = cydriver.cuOccupancyMaxPotentialClusterSize(&clusterSize, cyfunc, cyconfig_ptr) + with nogil: + err = cydriver.cuOccupancyMaxPotentialClusterSize(&clusterSize, cyfunc, cyconfig_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], clusterSize) @@ -46619,7 +47085,8 @@ def cuOccupancyMaxActiveClusters(func, config : Optional[CUlaunchConfig]): cyfunc = pfunc cdef int numClusters = 0 cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._pvt_ptr if config != None else NULL - err = cydriver.cuOccupancyMaxActiveClusters(&numClusters, cyfunc, cyconfig_ptr) + with nogil: + err = cydriver.cuOccupancyMaxActiveClusters(&numClusters, cyfunc, cyconfig_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], numClusters) @@ -46673,7 +47140,8 @@ def cuTexRefSetArray(hTexRef, hArray, unsigned int Flags): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefSetArray(cyhTexRef, cyhArray, Flags) + with nogil: + err = cydriver.cuTexRefSetArray(cyhTexRef, cyhArray, Flags) return (_dict_CUresult[err],) {{endif}} @@ -46725,7 +47193,8 @@ def cuTexRefSetMipmappedArray(hTexRef, hMipmappedArray, unsigned int Flags): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefSetMipmappedArray(cyhTexRef, cyhMipmappedArray, Flags) + with nogil: + err = cydriver.cuTexRefSetMipmappedArray(cyhTexRef, cyhMipmappedArray, Flags) return (_dict_CUresult[err],) {{endif}} @@ -46797,7 +47266,8 @@ def cuTexRefSetAddress(hTexRef, dptr, size_t numbytes): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef size_t ByteOffset = 0 - err = cydriver.cuTexRefSetAddress(&ByteOffset, cyhTexRef, cydptr, numbytes) + with nogil: + err = cydriver.cuTexRefSetAddress(&ByteOffset, cyhTexRef, cydptr, numbytes) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], ByteOffset) @@ -46879,7 +47349,8 @@ def cuTexRefSetAddress2D(hTexRef, desc : Optional[CUDA_ARRAY_DESCRIPTOR], dptr, phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cydesc_ptr = desc._pvt_ptr if desc != None else NULL - err = cydriver.cuTexRefSetAddress2D(cyhTexRef, cydesc_ptr, cydptr, Pitch) + with nogil: + err = cydriver.cuTexRefSetAddress2D(cyhTexRef, cydesc_ptr, cydptr, Pitch) return (_dict_CUresult[err],) {{endif}} @@ -46924,7 +47395,8 @@ def cuTexRefSetFormat(hTexRef, fmt not None : CUarray_format, int NumPackedCompo phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUarray_format cyfmt = fmt.value - err = cydriver.cuTexRefSetFormat(cyhTexRef, cyfmt, NumPackedComponents) + with nogil: + err = cydriver.cuTexRefSetFormat(cyhTexRef, cyfmt, NumPackedComponents) return (_dict_CUresult[err],) {{endif}} @@ -46976,7 +47448,8 @@ def cuTexRefSetAddressMode(hTexRef, int dim, am not None : CUaddress_mode): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUaddress_mode cyam = am.value - err = cydriver.cuTexRefSetAddressMode(cyhTexRef, dim, cyam) + with nogil: + err = cydriver.cuTexRefSetAddressMode(cyhTexRef, dim, cyam) return (_dict_CUresult[err],) {{endif}} @@ -47022,7 +47495,8 @@ def cuTexRefSetFilterMode(hTexRef, fm not None : CUfilter_mode): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUfilter_mode cyfm = fm.value - err = cydriver.cuTexRefSetFilterMode(cyhTexRef, cyfm) + with nogil: + err = cydriver.cuTexRefSetFilterMode(cyhTexRef, cyfm) return (_dict_CUresult[err],) {{endif}} @@ -47068,7 +47542,8 @@ def cuTexRefSetMipmapFilterMode(hTexRef, fm not None : CUfilter_mode): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUfilter_mode cyfm = fm.value - err = cydriver.cuTexRefSetMipmapFilterMode(cyhTexRef, cyfm) + with nogil: + err = cydriver.cuTexRefSetMipmapFilterMode(cyhTexRef, cyfm) return (_dict_CUresult[err],) {{endif}} @@ -47111,7 +47586,8 @@ def cuTexRefSetMipmapLevelBias(hTexRef, float bias): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefSetMipmapLevelBias(cyhTexRef, bias) + with nogil: + err = cydriver.cuTexRefSetMipmapLevelBias(cyhTexRef, bias) return (_dict_CUresult[err],) {{endif}} @@ -47156,7 +47632,8 @@ def cuTexRefSetMipmapLevelClamp(hTexRef, float minMipmapLevelClamp, float maxMip else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefSetMipmapLevelClamp(cyhTexRef, minMipmapLevelClamp, maxMipmapLevelClamp) + with nogil: + err = cydriver.cuTexRefSetMipmapLevelClamp(cyhTexRef, minMipmapLevelClamp, maxMipmapLevelClamp) return (_dict_CUresult[err],) {{endif}} @@ -47198,7 +47675,8 @@ def cuTexRefSetMaxAnisotropy(hTexRef, unsigned int maxAniso): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefSetMaxAnisotropy(cyhTexRef, maxAniso) + with nogil: + err = cydriver.cuTexRefSetMaxAnisotropy(cyhTexRef, maxAniso) return (_dict_CUresult[err],) {{endif}} @@ -47245,7 +47723,8 @@ def cuTexRefSetBorderColor(hTexRef, float pBorderColor): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefSetBorderColor(cyhTexRef, &pBorderColor) + with nogil: + err = cydriver.cuTexRefSetBorderColor(cyhTexRef, &pBorderColor) return (_dict_CUresult[err],) {{endif}} @@ -47302,7 +47781,8 @@ def cuTexRefSetFlags(hTexRef, unsigned int Flags): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefSetFlags(cyhTexRef, Flags) + with nogil: + err = cydriver.cuTexRefSetFlags(cyhTexRef, Flags) return (_dict_CUresult[err],) {{endif}} @@ -47343,7 +47823,8 @@ def cuTexRefGetAddress(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef CUdeviceptr pdptr = CUdeviceptr() - err = cydriver.cuTexRefGetAddress(pdptr._pvt_ptr, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetAddress(pdptr._pvt_ptr, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pdptr) @@ -47386,7 +47867,8 @@ def cuTexRefGetArray(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef CUarray phArray = CUarray() - err = cydriver.cuTexRefGetArray(phArray._pvt_ptr, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetArray(phArray._pvt_ptr, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phArray) @@ -47430,7 +47912,8 @@ def cuTexRefGetMipmappedArray(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef CUmipmappedArray phMipmappedArray = CUmipmappedArray() - err = cydriver.cuTexRefGetMipmappedArray(phMipmappedArray._pvt_ptr, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetMipmappedArray(phMipmappedArray._pvt_ptr, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phMipmappedArray) @@ -47475,7 +47958,8 @@ def cuTexRefGetAddressMode(hTexRef, int dim): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUaddress_mode pam - err = cydriver.cuTexRefGetAddressMode(&pam, cyhTexRef, dim) + with nogil: + err = cydriver.cuTexRefGetAddressMode(&pam, cyhTexRef, dim) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUaddress_mode(pam)) @@ -47517,7 +48001,8 @@ def cuTexRefGetFilterMode(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUfilter_mode pfm - err = cydriver.cuTexRefGetFilterMode(&pfm, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetFilterMode(&pfm, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUfilter_mode(pfm)) @@ -47563,7 +48048,8 @@ def cuTexRefGetFormat(hTexRef): cyhTexRef = phTexRef cdef cydriver.CUarray_format pFormat cdef int pNumChannels = 0 - err = cydriver.cuTexRefGetFormat(&pFormat, &pNumChannels, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetFormat(&pFormat, &pNumChannels, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], CUarray_format(pFormat), pNumChannels) @@ -47605,7 +48091,8 @@ def cuTexRefGetMipmapFilterMode(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef cydriver.CUfilter_mode pfm - err = cydriver.cuTexRefGetMipmapFilterMode(&pfm, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetMipmapFilterMode(&pfm, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUfilter_mode(pfm)) @@ -47648,7 +48135,8 @@ def cuTexRefGetMipmapLevelBias(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef float pbias = 0 - err = cydriver.cuTexRefGetMipmapLevelBias(&pbias, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetMipmapLevelBias(&pbias, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pbias) @@ -47694,7 +48182,8 @@ def cuTexRefGetMipmapLevelClamp(hTexRef): cyhTexRef = phTexRef cdef float pminMipmapLevelClamp = 0 cdef float pmaxMipmapLevelClamp = 0 - err = cydriver.cuTexRefGetMipmapLevelClamp(&pminMipmapLevelClamp, &pmaxMipmapLevelClamp, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetMipmapLevelClamp(&pminMipmapLevelClamp, &pmaxMipmapLevelClamp, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], pminMipmapLevelClamp, pmaxMipmapLevelClamp) @@ -47736,7 +48225,8 @@ def cuTexRefGetMaxAnisotropy(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef int pmaxAniso = 0 - err = cydriver.cuTexRefGetMaxAnisotropy(&pmaxAniso, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetMaxAnisotropy(&pmaxAniso, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pmaxAniso) @@ -47781,7 +48271,8 @@ def cuTexRefGetBorderColor(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef float pBorderColor = 0 - err = cydriver.cuTexRefGetBorderColor(&pBorderColor, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetBorderColor(&pBorderColor, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pBorderColor) @@ -47822,7 +48313,8 @@ def cuTexRefGetFlags(hTexRef): phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef cdef unsigned int pFlags = 0 - err = cydriver.cuTexRefGetFlags(&pFlags, cyhTexRef) + with nogil: + err = cydriver.cuTexRefGetFlags(&pFlags, cyhTexRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pFlags) @@ -47855,7 +48347,8 @@ def cuTexRefCreate(): :py:obj:`~.cuTexRefDestroy` """ cdef CUtexref pTexRef = CUtexref() - err = cydriver.cuTexRefCreate(pTexRef._pvt_ptr) + with nogil: + err = cydriver.cuTexRefCreate(pTexRef._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pTexRef) @@ -47893,7 +48386,8 @@ def cuTexRefDestroy(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - err = cydriver.cuTexRefDestroy(cyhTexRef) + with nogil: + err = cydriver.cuTexRefDestroy(cyhTexRef) return (_dict_CUresult[err],) {{endif}} @@ -47946,7 +48440,8 @@ def cuSurfRefSetArray(hSurfRef, hArray, unsigned int Flags): else: phSurfRef = int(CUsurfref(hSurfRef)) cyhSurfRef = phSurfRef - err = cydriver.cuSurfRefSetArray(cyhSurfRef, cyhArray, Flags) + with nogil: + err = cydriver.cuSurfRefSetArray(cyhSurfRef, cyhArray, Flags) return (_dict_CUresult[err],) {{endif}} @@ -47987,7 +48482,8 @@ def cuSurfRefGetArray(hSurfRef): phSurfRef = int(CUsurfref(hSurfRef)) cyhSurfRef = phSurfRef cdef CUarray phArray = CUarray() - err = cydriver.cuSurfRefGetArray(phArray._pvt_ptr, cyhSurfRef) + with nogil: + err = cydriver.cuSurfRefGetArray(phArray._pvt_ptr, cyhSurfRef) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phArray) @@ -48226,7 +48722,8 @@ def cuTexObjectCreate(pResDesc : Optional[CUDA_RESOURCE_DESC], pTexDesc : Option cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc != None else NULL cdef cydriver.CUDA_TEXTURE_DESC* cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc != None else NULL cdef cydriver.CUDA_RESOURCE_VIEW_DESC* cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc != None else NULL - err = cydriver.cuTexObjectCreate(pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr) + with nogil: + err = cydriver.cuTexObjectCreate(pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pTexObject) @@ -48262,7 +48759,8 @@ def cuTexObjectDestroy(texObject): else: ptexObject = int(CUtexObject(texObject)) cytexObject = ptexObject - err = cydriver.cuTexObjectDestroy(cytexObject) + with nogil: + err = cydriver.cuTexObjectDestroy(cytexObject) return (_dict_CUresult[err],) {{endif}} @@ -48300,7 +48798,8 @@ def cuTexObjectGetResourceDesc(texObject): ptexObject = int(CUtexObject(texObject)) cytexObject = ptexObject cdef CUDA_RESOURCE_DESC pResDesc = CUDA_RESOURCE_DESC() - err = cydriver.cuTexObjectGetResourceDesc(pResDesc._pvt_ptr, cytexObject) + with nogil: + err = cydriver.cuTexObjectGetResourceDesc(pResDesc._pvt_ptr, cytexObject) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pResDesc) @@ -48340,7 +48839,8 @@ def cuTexObjectGetTextureDesc(texObject): ptexObject = int(CUtexObject(texObject)) cytexObject = ptexObject cdef CUDA_TEXTURE_DESC pTexDesc = CUDA_TEXTURE_DESC() - err = cydriver.cuTexObjectGetTextureDesc(pTexDesc._pvt_ptr, cytexObject) + with nogil: + err = cydriver.cuTexObjectGetTextureDesc(pTexDesc._pvt_ptr, cytexObject) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pTexDesc) @@ -48381,7 +48881,8 @@ def cuTexObjectGetResourceViewDesc(texObject): ptexObject = int(CUtexObject(texObject)) cytexObject = ptexObject cdef CUDA_RESOURCE_VIEW_DESC pResViewDesc = CUDA_RESOURCE_VIEW_DESC() - err = cydriver.cuTexObjectGetResourceViewDesc(pResViewDesc._pvt_ptr, cytexObject) + with nogil: + err = cydriver.cuTexObjectGetResourceViewDesc(pResViewDesc._pvt_ptr, cytexObject) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pResViewDesc) @@ -48423,7 +48924,8 @@ def cuSurfObjectCreate(pResDesc : Optional[CUDA_RESOURCE_DESC]): """ cdef CUsurfObject pSurfObject = CUsurfObject() cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc != None else NULL - err = cydriver.cuSurfObjectCreate(pSurfObject._pvt_ptr, cypResDesc_ptr) + with nogil: + err = cydriver.cuSurfObjectCreate(pSurfObject._pvt_ptr, cypResDesc_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pSurfObject) @@ -48459,7 +48961,8 @@ def cuSurfObjectDestroy(surfObject): else: psurfObject = int(CUsurfObject(surfObject)) cysurfObject = psurfObject - err = cydriver.cuSurfObjectDestroy(cysurfObject) + with nogil: + err = cydriver.cuSurfObjectDestroy(cysurfObject) return (_dict_CUresult[err],) {{endif}} @@ -48497,7 +49000,8 @@ def cuSurfObjectGetResourceDesc(surfObject): psurfObject = int(CUsurfObject(surfObject)) cysurfObject = psurfObject cdef CUDA_RESOURCE_DESC pResDesc = CUDA_RESOURCE_DESC() - err = cydriver.cuSurfObjectGetResourceDesc(pResDesc._pvt_ptr, cysurfObject) + with nogil: + err = cydriver.cuSurfObjectGetResourceDesc(pResDesc._pvt_ptr, cysurfObject) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pResDesc) @@ -48756,49 +49260,58 @@ def cuTensorMapEncodeTiled(tensorDataType not None : CUtensorMapDataType, tensor cyglobalAddress = utils.HelperInputVoidPtr(globalAddress) cdef void* cyglobalAddress_ptr = cyglobalAddress.cptr cdef cydriver.cuuint64_t* cyglobalDim = NULL - if len(globalDim) > 0: + if len(globalDim) > 1: cyglobalDim = calloc(len(globalDim), sizeof(cydriver.cuuint64_t)) if cyglobalDim is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalDim)) + 'x' + str(sizeof(cydriver.cuuint64_t))) else: for idx in range(len(globalDim)): cyglobalDim[idx] = (globalDim[idx])._pvt_ptr[0] + elif len(globalDim) == 1: + cyglobalDim = (globalDim[0])._pvt_ptr cdef cydriver.cuuint64_t* cyglobalStrides = NULL - if len(globalStrides) > 0: + if len(globalStrides) > 1: cyglobalStrides = calloc(len(globalStrides), sizeof(cydriver.cuuint64_t)) if cyglobalStrides is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalStrides)) + 'x' + str(sizeof(cydriver.cuuint64_t))) else: for idx in range(len(globalStrides)): cyglobalStrides[idx] = (globalStrides[idx])._pvt_ptr[0] + elif len(globalStrides) == 1: + cyglobalStrides = (globalStrides[0])._pvt_ptr cdef cydriver.cuuint32_t* cyboxDim = NULL - if len(boxDim) > 0: + if len(boxDim) > 1: cyboxDim = calloc(len(boxDim), sizeof(cydriver.cuuint32_t)) if cyboxDim is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(boxDim)) + 'x' + str(sizeof(cydriver.cuuint32_t))) else: for idx in range(len(boxDim)): cyboxDim[idx] = (boxDim[idx])._pvt_ptr[0] + elif len(boxDim) == 1: + cyboxDim = (boxDim[0])._pvt_ptr cdef cydriver.cuuint32_t* cyelementStrides = NULL - if len(elementStrides) > 0: + if len(elementStrides) > 1: cyelementStrides = calloc(len(elementStrides), sizeof(cydriver.cuuint32_t)) if cyelementStrides is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(elementStrides)) + 'x' + str(sizeof(cydriver.cuuint32_t))) else: for idx in range(len(elementStrides)): cyelementStrides[idx] = (elementStrides[idx])._pvt_ptr[0] + elif len(elementStrides) == 1: + cyelementStrides = (elementStrides[0])._pvt_ptr cdef cydriver.CUtensorMapInterleave cyinterleave = interleave.value cdef cydriver.CUtensorMapSwizzle cyswizzle = swizzle.value cdef cydriver.CUtensorMapL2promotion cyl2Promotion = l2Promotion.value cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = oobFill.value - err = cydriver.cuTensorMapEncodeTiled(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, (globalDim[0])._pvt_ptr if len(globalDim) == 1 else cyglobalDim, (globalStrides[0])._pvt_ptr if len(globalStrides) == 1 else cyglobalStrides, (boxDim[0])._pvt_ptr if len(boxDim) == 1 else cyboxDim, (elementStrides[0])._pvt_ptr if len(elementStrides) == 1 else cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) - if cyglobalDim is not NULL: + with nogil: + err = cydriver.cuTensorMapEncodeTiled(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, cyglobalDim, cyglobalStrides, cyboxDim, cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) + if len(globalDim) > 1 and cyglobalDim is not NULL: free(cyglobalDim) - if cyglobalStrides is not NULL: + if len(globalStrides) > 1 and cyglobalStrides is not NULL: free(cyglobalStrides) - if cyboxDim is not NULL: + if len(boxDim) > 1 and cyboxDim is not NULL: free(cyboxDim) - if cyelementStrides is not NULL: + if len(elementStrides) > 1 and cyelementStrides is not NULL: free(cyelementStrides) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -49104,41 +49617,48 @@ def cuTensorMapEncodeIm2col(tensorDataType not None : CUtensorMapDataType, tenso cyglobalAddress = utils.HelperInputVoidPtr(globalAddress) cdef void* cyglobalAddress_ptr = cyglobalAddress.cptr cdef cydriver.cuuint64_t* cyglobalDim = NULL - if len(globalDim) > 0: + if len(globalDim) > 1: cyglobalDim = calloc(len(globalDim), sizeof(cydriver.cuuint64_t)) if cyglobalDim is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalDim)) + 'x' + str(sizeof(cydriver.cuuint64_t))) else: for idx in range(len(globalDim)): cyglobalDim[idx] = (globalDim[idx])._pvt_ptr[0] + elif len(globalDim) == 1: + cyglobalDim = (globalDim[0])._pvt_ptr cdef cydriver.cuuint64_t* cyglobalStrides = NULL - if len(globalStrides) > 0: + if len(globalStrides) > 1: cyglobalStrides = calloc(len(globalStrides), sizeof(cydriver.cuuint64_t)) if cyglobalStrides is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalStrides)) + 'x' + str(sizeof(cydriver.cuuint64_t))) else: for idx in range(len(globalStrides)): cyglobalStrides[idx] = (globalStrides[idx])._pvt_ptr[0] + elif len(globalStrides) == 1: + cyglobalStrides = (globalStrides[0])._pvt_ptr cdef vector[int] cypixelBoxLowerCorner = pixelBoxLowerCorner cdef vector[int] cypixelBoxUpperCorner = pixelBoxUpperCorner cdef cydriver.cuuint32_t* cyelementStrides = NULL - if len(elementStrides) > 0: + if len(elementStrides) > 1: cyelementStrides = calloc(len(elementStrides), sizeof(cydriver.cuuint32_t)) if cyelementStrides is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(elementStrides)) + 'x' + str(sizeof(cydriver.cuuint32_t))) else: for idx in range(len(elementStrides)): cyelementStrides[idx] = (elementStrides[idx])._pvt_ptr[0] + elif len(elementStrides) == 1: + cyelementStrides = (elementStrides[0])._pvt_ptr cdef cydriver.CUtensorMapInterleave cyinterleave = interleave.value cdef cydriver.CUtensorMapSwizzle cyswizzle = swizzle.value cdef cydriver.CUtensorMapL2promotion cyl2Promotion = l2Promotion.value cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = oobFill.value - err = cydriver.cuTensorMapEncodeIm2col(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, (globalDim[0])._pvt_ptr if len(globalDim) == 1 else cyglobalDim, (globalStrides[0])._pvt_ptr if len(globalStrides) == 1 else cyglobalStrides, cypixelBoxLowerCorner.data(), cypixelBoxUpperCorner.data(), cychannelsPerPixel, cypixelsPerColumn, (elementStrides[0])._pvt_ptr if len(elementStrides) == 1 else cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) - if cyglobalDim is not NULL: + with nogil: + err = cydriver.cuTensorMapEncodeIm2col(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, cyglobalDim, cyglobalStrides, cypixelBoxLowerCorner.data(), cypixelBoxUpperCorner.data(), cychannelsPerPixel, cypixelsPerColumn, cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) + if len(globalDim) > 1 and cyglobalDim is not NULL: free(cyglobalDim) - if cyglobalStrides is not NULL: + if len(globalStrides) > 1 and cyglobalStrides is not NULL: free(cyglobalStrides) - if cyelementStrides is not NULL: + if len(elementStrides) > 1 and cyelementStrides is not NULL: free(cyelementStrides) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -49425,40 +49945,47 @@ def cuTensorMapEncodeIm2colWide(tensorDataType not None : CUtensorMapDataType, t cyglobalAddress = utils.HelperInputVoidPtr(globalAddress) cdef void* cyglobalAddress_ptr = cyglobalAddress.cptr cdef cydriver.cuuint64_t* cyglobalDim = NULL - if len(globalDim) > 0: + if len(globalDim) > 1: cyglobalDim = calloc(len(globalDim), sizeof(cydriver.cuuint64_t)) if cyglobalDim is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalDim)) + 'x' + str(sizeof(cydriver.cuuint64_t))) else: for idx in range(len(globalDim)): cyglobalDim[idx] = (globalDim[idx])._pvt_ptr[0] + elif len(globalDim) == 1: + cyglobalDim = (globalDim[0])._pvt_ptr cdef cydriver.cuuint64_t* cyglobalStrides = NULL - if len(globalStrides) > 0: + if len(globalStrides) > 1: cyglobalStrides = calloc(len(globalStrides), sizeof(cydriver.cuuint64_t)) if cyglobalStrides is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalStrides)) + 'x' + str(sizeof(cydriver.cuuint64_t))) else: for idx in range(len(globalStrides)): cyglobalStrides[idx] = (globalStrides[idx])._pvt_ptr[0] + elif len(globalStrides) == 1: + cyglobalStrides = (globalStrides[0])._pvt_ptr cdef cydriver.cuuint32_t* cyelementStrides = NULL - if len(elementStrides) > 0: + if len(elementStrides) > 1: cyelementStrides = calloc(len(elementStrides), sizeof(cydriver.cuuint32_t)) if cyelementStrides is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(elementStrides)) + 'x' + str(sizeof(cydriver.cuuint32_t))) else: for idx in range(len(elementStrides)): cyelementStrides[idx] = (elementStrides[idx])._pvt_ptr[0] + elif len(elementStrides) == 1: + cyelementStrides = (elementStrides[0])._pvt_ptr cdef cydriver.CUtensorMapInterleave cyinterleave = interleave.value cdef cydriver.CUtensorMapIm2ColWideMode cymode = mode.value cdef cydriver.CUtensorMapSwizzle cyswizzle = swizzle.value cdef cydriver.CUtensorMapL2promotion cyl2Promotion = l2Promotion.value cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = oobFill.value - err = cydriver.cuTensorMapEncodeIm2colWide(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, (globalDim[0])._pvt_ptr if len(globalDim) == 1 else cyglobalDim, (globalStrides[0])._pvt_ptr if len(globalStrides) == 1 else cyglobalStrides, pixelBoxLowerCornerWidth, pixelBoxUpperCornerWidth, cychannelsPerPixel, cypixelsPerColumn, (elementStrides[0])._pvt_ptr if len(elementStrides) == 1 else cyelementStrides, cyinterleave, cymode, cyswizzle, cyl2Promotion, cyoobFill) - if cyglobalDim is not NULL: + with nogil: + err = cydriver.cuTensorMapEncodeIm2colWide(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, cyglobalDim, cyglobalStrides, pixelBoxLowerCornerWidth, pixelBoxUpperCornerWidth, cychannelsPerPixel, cypixelsPerColumn, cyelementStrides, cyinterleave, cymode, cyswizzle, cyl2Promotion, cyoobFill) + if len(globalDim) > 1 and cyglobalDim is not NULL: free(cyglobalDim) - if cyglobalStrides is not NULL: + if len(globalStrides) > 1 and cyglobalStrides is not NULL: free(cyglobalStrides) - if cyelementStrides is not NULL: + if len(elementStrides) > 1 and cyelementStrides is not NULL: free(cyelementStrides) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -49498,7 +50025,8 @@ def cuTensorMapReplaceAddress(tensorMap : Optional[CUtensorMap], globalAddress): cdef cydriver.CUtensorMap* cytensorMap_ptr = tensorMap._pvt_ptr if tensorMap != None else NULL cyglobalAddress = utils.HelperInputVoidPtr(globalAddress) cdef void* cyglobalAddress_ptr = cyglobalAddress.cptr - err = cydriver.cuTensorMapReplaceAddress(cytensorMap_ptr, cyglobalAddress_ptr) + with nogil: + err = cydriver.cuTensorMapReplaceAddress(cytensorMap_ptr, cyglobalAddress_ptr) return (_dict_CUresult[err],) {{endif}} @@ -49551,7 +50079,8 @@ def cuDeviceCanAccessPeer(dev, peerDev): pdev = int(CUdevice(dev)) cydev = pdev cdef int canAccessPeer = 0 - err = cydriver.cuDeviceCanAccessPeer(&canAccessPeer, cydev, cypeerDev) + with nogil: + err = cydriver.cuDeviceCanAccessPeer(&canAccessPeer, cydev, cypeerDev) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], canAccessPeer) @@ -49622,7 +50151,8 @@ def cuCtxEnablePeerAccess(peerContext, unsigned int Flags): else: ppeerContext = int(CUcontext(peerContext)) cypeerContext = ppeerContext - err = cydriver.cuCtxEnablePeerAccess(cypeerContext, Flags) + with nogil: + err = cydriver.cuCtxEnablePeerAccess(cypeerContext, Flags) return (_dict_CUresult[err],) {{endif}} @@ -49661,7 +50191,8 @@ def cuCtxDisablePeerAccess(peerContext): else: ppeerContext = int(CUcontext(peerContext)) cypeerContext = ppeerContext - err = cydriver.cuCtxDisablePeerAccess(cypeerContext) + with nogil: + err = cydriver.cuCtxDisablePeerAccess(cypeerContext) return (_dict_CUresult[err],) {{endif}} @@ -49732,7 +50263,8 @@ def cuDeviceGetP2PAttribute(attrib not None : CUdevice_P2PAttribute, srcDevice, cysrcDevice = psrcDevice cdef int value = 0 cdef cydriver.CUdevice_P2PAttribute cyattrib = attrib.value - err = cydriver.cuDeviceGetP2PAttribute(&value, cyattrib, cysrcDevice, cydstDevice) + with nogil: + err = cydriver.cuDeviceGetP2PAttribute(&value, cyattrib, cysrcDevice, cydstDevice) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], value) @@ -49772,7 +50304,8 @@ def cuGraphicsUnregisterResource(resource): else: presource = int(CUgraphicsResource(resource)) cyresource = presource - err = cydriver.cuGraphicsUnregisterResource(cyresource) + with nogil: + err = cydriver.cuGraphicsUnregisterResource(cyresource) return (_dict_CUresult[err],) {{endif}} @@ -49827,7 +50360,8 @@ def cuGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, unsig presource = int(CUgraphicsResource(resource)) cyresource = presource cdef CUarray pArray = CUarray() - err = cydriver.cuGraphicsSubResourceGetMappedArray(pArray._pvt_ptr, cyresource, arrayIndex, mipLevel) + with nogil: + err = cydriver.cuGraphicsSubResourceGetMappedArray(pArray._pvt_ptr, cyresource, arrayIndex, mipLevel) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pArray) @@ -49873,7 +50407,8 @@ def cuGraphicsResourceGetMappedMipmappedArray(resource): presource = int(CUgraphicsResource(resource)) cyresource = presource cdef CUmipmappedArray pMipmappedArray = CUmipmappedArray() - err = cydriver.cuGraphicsResourceGetMappedMipmappedArray(pMipmappedArray._pvt_ptr, cyresource) + with nogil: + err = cydriver.cuGraphicsResourceGetMappedMipmappedArray(pMipmappedArray._pvt_ptr, cyresource) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pMipmappedArray) @@ -49919,7 +50454,8 @@ def cuGraphicsResourceGetMappedPointer(resource): cyresource = presource cdef CUdeviceptr pDevPtr = CUdeviceptr() cdef size_t pSize = 0 - err = cydriver.cuGraphicsResourceGetMappedPointer(pDevPtr._pvt_ptr, &pSize, cyresource) + with nogil: + err = cydriver.cuGraphicsResourceGetMappedPointer(pDevPtr._pvt_ptr, &pSize, cyresource) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], pDevPtr, pSize) @@ -49979,7 +50515,8 @@ def cuGraphicsResourceSetMapFlags(resource, unsigned int flags): else: presource = int(CUgraphicsResource(resource)) cyresource = presource - err = cydriver.cuGraphicsResourceSetMapFlags(cyresource, flags) + with nogil: + err = cydriver.cuGraphicsResourceSetMapFlags(cyresource, flags) return (_dict_CUresult[err],) {{endif}} @@ -50041,7 +50578,8 @@ def cuGraphicsMapResources(unsigned int count, resources, hStream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) - err = cydriver.cuGraphicsMapResources(count, cyresources, cyhStream) + with nogil: + err = cydriver.cuGraphicsMapResources(count, cyresources, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -50101,7 +50639,8 @@ def cuGraphicsUnmapResources(unsigned int count, resources, hStream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) - err = cydriver.cuGraphicsUnmapResources(count, cyresources, cyhStream) + with nogil: + err = cydriver.cuGraphicsUnmapResources(count, cyresources, cyhStream) return (_dict_CUresult[err],) {{endif}} @@ -50202,7 +50741,8 @@ def cuGetProcAddress(char* symbol, int cudaVersion, flags): cyflags = pflags cdef void_ptr pfn = 0 cdef cydriver.CUdriverProcAddressQueryResult symbolStatus - err = cydriver.cuGetProcAddress(symbol, &pfn, cudaVersion, cyflags, &symbolStatus) + with nogil: + err = cydriver.cuGetProcAddress(symbol, &pfn, cudaVersion, cyflags, &symbolStatus) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], pfn, CUdriverProcAddressQueryResult(symbolStatus)) @@ -50321,7 +50861,8 @@ def cuCoredumpGetAttribute(attrib not None : CUcoredumpSettings): cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, 0, is_getter=True) cdef void* cyvalue_ptr = cyvalue.cptr cdef size_t size = cyvalue.size() - err = cydriver.cuCoredumpGetAttribute(cyattrib, cyvalue_ptr, &size) + with nogil: + err = cydriver.cuCoredumpGetAttribute(cyattrib, cyvalue_ptr, &size) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cyvalue.pyObj()) @@ -50436,7 +50977,8 @@ def cuCoredumpGetAttributeGlobal(attrib not None : CUcoredumpSettings): cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, 0, is_getter=True) cdef void* cyvalue_ptr = cyvalue.cptr cdef size_t size = cyvalue.size() - err = cydriver.cuCoredumpGetAttributeGlobal(cyattrib, cyvalue_ptr, &size) + with nogil: + err = cydriver.cuCoredumpGetAttributeGlobal(cyattrib, cyvalue_ptr, &size) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], cyvalue.pyObj()) @@ -50558,7 +51100,8 @@ def cuCoredumpSetAttribute(attrib not None : CUcoredumpSettings, value): cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, value, is_getter=False) cdef void* cyvalue_ptr = cyvalue.cptr cdef size_t size = cyvalue.size() - err = cydriver.cuCoredumpSetAttribute(cyattrib, cyvalue_ptr, &size) + with nogil: + err = cydriver.cuCoredumpSetAttribute(cyattrib, cyvalue_ptr, &size) return (_dict_CUresult[err],) {{endif}} @@ -50683,7 +51226,8 @@ def cuCoredumpSetAttributeGlobal(attrib not None : CUcoredumpSettings, value): cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, value, is_getter=False) cdef void* cyvalue_ptr = cyvalue.cptr cdef size_t size = cyvalue.size() - err = cydriver.cuCoredumpSetAttributeGlobal(cyattrib, cyvalue_ptr, &size) + with nogil: + err = cydriver.cuCoredumpSetAttributeGlobal(cyattrib, cyvalue_ptr, &size) return (_dict_CUresult[err],) {{endif}} @@ -50707,7 +51251,8 @@ def cuGetExportTable(pExportTableId : Optional[CUuuid]): """ cdef void_ptr ppExportTable = 0 cdef cydriver.CUuuid* cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId != None else NULL - err = cydriver.cuGetExportTable(&ppExportTable, cypExportTableId_ptr) + with nogil: + err = cydriver.cuGetExportTable(&ppExportTable, cypExportTableId_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], ppExportTable) @@ -50782,7 +51327,8 @@ def cuGreenCtxCreate(desc, dev, unsigned int flags): pdesc = int(CUdevResourceDesc(desc)) cydesc = pdesc cdef CUgreenCtx phCtx = CUgreenCtx() - err = cydriver.cuGreenCtxCreate(phCtx._pvt_ptr, cydesc, cydev, flags) + with nogil: + err = cydriver.cuGreenCtxCreate(phCtx._pvt_ptr, cydesc, cydev, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phCtx) @@ -50821,7 +51367,8 @@ def cuGreenCtxDestroy(hCtx): else: phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx - err = cydriver.cuGreenCtxDestroy(cyhCtx) + with nogil: + err = cydriver.cuGreenCtxDestroy(cyhCtx) return (_dict_CUresult[err],) {{endif}} @@ -50867,7 +51414,8 @@ def cuCtxFromGreenCtx(hCtx): phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx cdef CUcontext pContext = CUcontext() - err = cydriver.cuCtxFromGreenCtx(pContext._pvt_ptr, cyhCtx) + with nogil: + err = cydriver.cuCtxFromGreenCtx(pContext._pvt_ptr, cyhCtx) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pContext) @@ -50913,7 +51461,8 @@ def cuDeviceGetDevResource(device, typename not None : CUdevResourceType): cydevice = pdevice cdef CUdevResource resource = CUdevResource() cdef cydriver.CUdevResourceType cytypename = typename.value - err = cydriver.cuDeviceGetDevResource(cydevice, resource._pvt_ptr, cytypename) + with nogil: + err = cydriver.cuDeviceGetDevResource(cydevice, resource._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], resource) @@ -50956,7 +51505,8 @@ def cuCtxGetDevResource(hCtx, typename not None : CUdevResourceType): cyhCtx = phCtx cdef CUdevResource resource = CUdevResource() cdef cydriver.CUdevResourceType cytypename = typename.value - err = cydriver.cuCtxGetDevResource(cyhCtx, resource._pvt_ptr, cytypename) + with nogil: + err = cydriver.cuCtxGetDevResource(cyhCtx, resource._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], resource) @@ -50999,7 +51549,8 @@ def cuGreenCtxGetDevResource(hCtx, typename not None : CUdevResourceType): cyhCtx = phCtx cdef CUdevResource resource = CUdevResource() cdef cydriver.CUdevResourceType cytypename = typename.value - err = cydriver.cuGreenCtxGetDevResource(cyhCtx, resource._pvt_ptr, cytypename) + with nogil: + err = cydriver.cuGreenCtxGetDevResource(cyhCtx, resource._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], resource) @@ -51111,7 +51662,8 @@ def cuDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[CUdevRe cdef unsigned int cynbGroups = nbGroups cdef cydriver.CUdevResource* cyinput__ptr = input_._pvt_ptr if input_ != None else NULL cdef CUdevResource remaining = CUdevResource() - err = cydriver.cuDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, remaining._pvt_ptr, useFlags, minCount) + with nogil: + err = cydriver.cuDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, remaining._pvt_ptr, useFlags, minCount) if CUresult(err) == CUresult(0): for idx in range(nbGroups): string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cydriver.CUdevResource)) @@ -51171,15 +51723,18 @@ def cuDevResourceGenerateDesc(resources : Optional[Tuple[CUdevResource] | List[C raise TypeError("Argument 'resources' is not instance of type (expected Tuple[cydriver.CUdevResource,] or List[cydriver.CUdevResource,]") cdef CUdevResourceDesc phDesc = CUdevResourceDesc() cdef cydriver.CUdevResource* cyresources = NULL - if len(resources) > 0: + if len(resources) > 1: cyresources = calloc(len(resources), sizeof(cydriver.CUdevResource)) if cyresources is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cydriver.CUdevResource))) for idx in range(len(resources)): string.memcpy(&cyresources[idx], (resources[idx])._pvt_ptr, sizeof(cydriver.CUdevResource)) + elif len(resources) == 1: + cyresources = (resources[0])._pvt_ptr if nbResources > len(resources): raise RuntimeError("List is too small: " + str(len(resources)) + " < " + str(nbResources)) - err = cydriver.cuDevResourceGenerateDesc(phDesc._pvt_ptr, (resources[0])._pvt_ptr if len(resources) == 1 else cyresources, nbResources) - if cyresources is not NULL: + with nogil: + err = cydriver.cuDevResourceGenerateDesc(phDesc._pvt_ptr, cyresources, nbResources) + if len(resources) > 1 and cyresources is not NULL: free(cyresources) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) @@ -51236,7 +51791,8 @@ def cuGreenCtxRecordEvent(hCtx, hEvent): else: phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx - err = cydriver.cuGreenCtxRecordEvent(cyhCtx, cyhEvent) + with nogil: + err = cydriver.cuGreenCtxRecordEvent(cyhCtx, cyhEvent) return (_dict_CUresult[err],) {{endif}} @@ -51290,7 +51846,8 @@ def cuGreenCtxWaitEvent(hCtx, hEvent): else: phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx - err = cydriver.cuGreenCtxWaitEvent(cyhCtx, cyhEvent) + with nogil: + err = cydriver.cuGreenCtxWaitEvent(cyhCtx, cyhEvent) return (_dict_CUresult[err],) {{endif}} @@ -51347,7 +51904,8 @@ def cuStreamGetGreenCtx(hStream): phStream = int(CUstream(hStream)) cyhStream = phStream cdef CUgreenCtx phCtx = CUgreenCtx() - err = cydriver.cuStreamGetGreenCtx(cyhStream, phCtx._pvt_ptr) + with nogil: + err = cydriver.cuStreamGetGreenCtx(cyhStream, phCtx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phCtx) @@ -51421,7 +51979,8 @@ def cuGreenCtxStreamCreate(greenCtx, unsigned int flags, int priority): pgreenCtx = int(CUgreenCtx(greenCtx)) cygreenCtx = pgreenCtx cdef CUstream phStream = CUstream() - err = cydriver.cuGreenCtxStreamCreate(phStream._pvt_ptr, cygreenCtx, flags, priority) + with nogil: + err = cydriver.cuGreenCtxStreamCreate(phStream._pvt_ptr, cygreenCtx, flags, priority) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phStream) @@ -51482,7 +52041,6 @@ def cuLogsRegisterCallback(callbackFunc, userData): cdef CUlogsCallbackHandle callback_out = CUlogsCallbackHandle() with nogil: err = cydriver.cuLogsRegisterCallback(cuLogsCallbackWrapper, cbData, callback_out._pvt_ptr) - if err != cydriver.CUDA_SUCCESS: free(cbData) else: @@ -51516,7 +52074,8 @@ def cuLogsUnregisterCallback(callback): else: pcallback = int(CUlogsCallbackHandle(callback)) cycallback = pcallback - err = cydriver.cuLogsUnregisterCallback(cycallback) + with nogil: + err = cydriver.cuLogsUnregisterCallback(cycallback) if err == cydriver.CUDA_SUCCESS: free(m_global._allocated[pcallback]) m_global._allocated.erase(pcallback) @@ -51542,7 +52101,8 @@ def cuLogsCurrent(unsigned int flags): Location to store an iterator to the current tail of the logs """ cdef CUlogIterator iterator_out = CUlogIterator() - err = cydriver.cuLogsCurrent(iterator_out._pvt_ptr, flags) + with nogil: + err = cydriver.cuLogsCurrent(iterator_out._pvt_ptr, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], iterator_out) @@ -51582,7 +52142,11 @@ def cuLogsDumpToFile(iterator : Optional[CUlogIterator], char* pathToFile, unsig The driver reserves limited memory for storing logs. The oldest logs may be overwritten and become unrecoverable. An indication will appear in the destination outupt if the logs have been truncated. Call dump after each failed API to mitigate this risk. """ - err = cydriver.cuLogsDumpToFile(iterator._pvt_ptr if iterator != None else NULL, pathToFile, flags) + cdef cydriver.CUlogIterator* cyiterator = NULL + if iterator is not None: + cyiterator = iterator._pvt_ptr + with nogil: + err = cydriver.cuLogsDumpToFile(cyiterator, pathToFile, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], iterator) @@ -51636,7 +52200,11 @@ def cuLogsDumpToMemory(iterator : Optional[CUlogIterator], char* buffer, size_t If the provided value in `*size` is not large enough to hold all buffered messages, a message will be added at the head of the buffer indicating this. The driver then computes the number of messages it is able to store in `buffer` and writes it out. The final message in `buffer` will always be the most recent log message as of when the API is called. """ - err = cydriver.cuLogsDumpToMemory(iterator._pvt_ptr if iterator != None else NULL, buffer, &size, flags) + cdef cydriver.CUlogIterator* cyiterator = NULL + if iterator is not None: + cyiterator = iterator._pvt_ptr + with nogil: + err = cydriver.cuLogsDumpToMemory(cyiterator, buffer, &size, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None, None) return (_dict_CUresult[err], iterator, size) @@ -51664,7 +52232,8 @@ def cuCheckpointProcessGetRestoreThreadId(int pid): Returned restore thread ID """ cdef int tid = 0 - err = cydriver.cuCheckpointProcessGetRestoreThreadId(pid, &tid) + with nogil: + err = cydriver.cuCheckpointProcessGetRestoreThreadId(pid, &tid) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], tid) @@ -51692,7 +52261,8 @@ def cuCheckpointProcessGetState(int pid): Returned CUDA process state """ cdef cydriver.CUprocessState state - err = cydriver.cuCheckpointProcessGetState(pid, &state) + with nogil: + err = cydriver.cuCheckpointProcessGetState(pid, &state) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], CUprocessState(state)) @@ -51725,7 +52295,8 @@ def cuCheckpointProcessLock(int pid, args : Optional[CUcheckpointLockArgs]): :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED` :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` :py:obj:`~.CUDA_ERROR_NOT_READY` """ cdef cydriver.CUcheckpointLockArgs* cyargs_ptr = args._pvt_ptr if args != None else NULL - err = cydriver.cuCheckpointProcessLock(pid, cyargs_ptr) + with nogil: + err = cydriver.cuCheckpointProcessLock(pid, cyargs_ptr) return (_dict_CUresult[err],) {{endif}} @@ -51755,7 +52326,8 @@ def cuCheckpointProcessCheckpoint(int pid, args : Optional[CUcheckpointCheckpoin :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED` :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ cdef cydriver.CUcheckpointCheckpointArgs* cyargs_ptr = args._pvt_ptr if args != None else NULL - err = cydriver.cuCheckpointProcessCheckpoint(pid, cyargs_ptr) + with nogil: + err = cydriver.cuCheckpointProcessCheckpoint(pid, cyargs_ptr) return (_dict_CUresult[err],) {{endif}} @@ -51790,7 +52362,8 @@ def cuCheckpointProcessRestore(int pid, args : Optional[CUcheckpointRestoreArgs] :py:obj:`~.cuInit` """ cdef cydriver.CUcheckpointRestoreArgs* cyargs_ptr = args._pvt_ptr if args != None else NULL - err = cydriver.cuCheckpointProcessRestore(pid, cyargs_ptr) + with nogil: + err = cydriver.cuCheckpointProcessRestore(pid, cyargs_ptr) return (_dict_CUresult[err],) {{endif}} @@ -51818,7 +52391,8 @@ def cuCheckpointProcessUnlock(int pid, args : Optional[CUcheckpointUnlockArgs]): :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED` :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ cdef cydriver.CUcheckpointUnlockArgs* cyargs_ptr = args._pvt_ptr if args != None else NULL - err = cydriver.cuCheckpointProcessUnlock(pid, cyargs_ptr) + with nogil: + err = cydriver.cuCheckpointProcessUnlock(pid, cyargs_ptr) return (_dict_CUresult[err],) {{endif}} @@ -51845,7 +52419,8 @@ def cuProfilerStart(): -------- :py:obj:`~.cuProfilerInitialize`, :py:obj:`~.cuProfilerStop`, :py:obj:`~.cudaProfilerStart` """ - err = cydriver.cuProfilerStart() + with nogil: + err = cydriver.cuProfilerStart() return (_dict_CUresult[err],) {{endif}} @@ -51872,7 +52447,8 @@ def cuProfilerStop(): -------- :py:obj:`~.cuProfilerInitialize`, :py:obj:`~.cuProfilerStart`, :py:obj:`~.cudaProfilerStop` """ - err = cydriver.cuProfilerStop() + with nogil: + err = cydriver.cuProfilerStop() return (_dict_CUresult[err],) {{endif}} @@ -51945,7 +52521,8 @@ def cuGraphicsEGLRegisterImage(image, unsigned int flags): pimage = int(EGLImageKHR(image)) cyimage = pimage cdef CUgraphicsResource pCudaResource = CUgraphicsResource() - err = cydriver.cuGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags) + with nogil: + err = cydriver.cuGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pCudaResource) @@ -51987,7 +52564,8 @@ def cuEGLStreamConsumerConnect(stream): pstream = int(EGLStreamKHR(stream)) cystream = pstream cdef CUeglStreamConnection conn = CUeglStreamConnection() - err = cydriver.cuEGLStreamConsumerConnect(conn._pvt_ptr, cystream) + with nogil: + err = cydriver.cuEGLStreamConsumerConnect(conn._pvt_ptr, cystream) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], conn) @@ -52033,7 +52611,8 @@ def cuEGLStreamConsumerConnectWithFlags(stream, unsigned int flags): pstream = int(EGLStreamKHR(stream)) cystream = pstream cdef CUeglStreamConnection conn = CUeglStreamConnection() - err = cydriver.cuEGLStreamConsumerConnectWithFlags(conn._pvt_ptr, cystream, flags) + with nogil: + err = cydriver.cuEGLStreamConsumerConnectWithFlags(conn._pvt_ptr, cystream, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], conn) @@ -52071,7 +52650,8 @@ def cuEGLStreamConsumerDisconnect(conn): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cydriver.cuEGLStreamConsumerDisconnect(cyconn) + with nogil: + err = cydriver.cuEGLStreamConsumerDisconnect(cyconn) return (_dict_CUresult[err],) {{endif}} @@ -52142,7 +52722,8 @@ def cuEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int t cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cydriver.cuEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout) + with nogil: + err = cydriver.cuEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout) return (_dict_CUresult[err],) {{endif}} @@ -52204,7 +52785,8 @@ def cuEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cydriver.cuEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream) + with nogil: + err = cydriver.cuEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream) return (_dict_CUresult[err],) {{endif}} @@ -52264,7 +52846,8 @@ def cuEGLStreamProducerConnect(stream, width, height): pstream = int(EGLStreamKHR(stream)) cystream = pstream cdef CUeglStreamConnection conn = CUeglStreamConnection() - err = cydriver.cuEGLStreamProducerConnect(conn._pvt_ptr, cystream, cywidth, cyheight) + with nogil: + err = cydriver.cuEGLStreamProducerConnect(conn._pvt_ptr, cystream, cywidth, cyheight) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], conn) @@ -52302,7 +52885,8 @@ def cuEGLStreamProducerDisconnect(conn): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cydriver.cuEGLStreamProducerDisconnect(cyconn) + with nogil: + err = cydriver.cuEGLStreamProducerDisconnect(cyconn) return (_dict_CUresult[err],) {{endif}} @@ -52371,7 +52955,8 @@ def cuEGLStreamProducerPresentFrame(conn, eglframe not None : CUeglFrame, pStrea cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cydriver.cuEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream) + with nogil: + err = cydriver.cuEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream) return (_dict_CUresult[err],) {{endif}} @@ -52425,7 +53010,8 @@ def cuEGLStreamProducerReturnFrame(conn, eglframe : Optional[CUeglFrame], pStrea else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) cdef cydriver.CUeglFrame* cyeglframe_ptr = eglframe._pvt_ptr if eglframe != None else NULL - err = cydriver.cuEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream) + with nogil: + err = cydriver.cuEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream) return (_dict_CUresult[err],) {{endif}} @@ -52471,7 +53057,8 @@ def cuGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned i presource = int(CUgraphicsResource(resource)) cyresource = presource cdef CUeglFrame eglFrame = CUeglFrame() - err = cydriver.cuGraphicsResourceGetMappedEglFrame(eglFrame._pvt_ptr, cyresource, index, mipLevel) + with nogil: + err = cydriver.cuGraphicsResourceGetMappedEglFrame(eglFrame._pvt_ptr, cyresource, index, mipLevel) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], eglFrame) @@ -52529,7 +53116,8 @@ def cuEventCreateFromEGLSync(eglSync, unsigned int flags): peglSync = int(EGLSyncKHR(eglSync)) cyeglSync = peglSync cdef CUevent phEvent = CUevent() - err = cydriver.cuEventCreateFromEGLSync(phEvent._pvt_ptr, cyeglSync, flags) + with nogil: + err = cydriver.cuEventCreateFromEGLSync(phEvent._pvt_ptr, cyeglSync, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], phEvent) @@ -52585,7 +53173,8 @@ def cuGraphicsGLRegisterBuffer(buffer, unsigned int Flags): pbuffer = int(GLuint(buffer)) cybuffer = pbuffer cdef CUgraphicsResource pCudaResource = CUgraphicsResource() - err = cydriver.cuGraphicsGLRegisterBuffer(pCudaResource._pvt_ptr, cybuffer, Flags) + with nogil: + err = cydriver.cuGraphicsGLRegisterBuffer(pCudaResource._pvt_ptr, cybuffer, Flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pCudaResource) @@ -52684,7 +53273,8 @@ def cuGraphicsGLRegisterImage(image, target, unsigned int Flags): pimage = int(GLuint(image)) cyimage = pimage cdef CUgraphicsResource pCudaResource = CUgraphicsResource() - err = cydriver.cuGraphicsGLRegisterImage(pCudaResource._pvt_ptr, cyimage, cytarget, Flags) + with nogil: + err = cydriver.cuGraphicsGLRegisterImage(pCudaResource._pvt_ptr, cyimage, cytarget, Flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pCudaResource) @@ -52748,7 +53338,8 @@ def cuGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : CUGLDevic if cypCudaDevices is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(cydriver.CUdevice))) cdef cydriver.CUGLDeviceList cydeviceList = deviceList.value - err = cydriver.cuGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) + with nogil: + err = cydriver.cuGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) if CUresult(err) == CUresult(0): pypCudaDevices = [CUdevice(init_value=cypCudaDevices[idx]) for idx in range(cudaDeviceCount)] if cypCudaDevices is not NULL: @@ -52804,7 +53395,8 @@ def cuVDPAUGetDevice(vdpDevice, vdpGetProcAddress): pvdpDevice = int(VdpDevice(vdpDevice)) cyvdpDevice = pvdpDevice cdef CUdevice pDevice = CUdevice() - err = cydriver.cuVDPAUGetDevice(pDevice._pvt_ptr, cyvdpDevice, cyvdpGetProcAddress) + with nogil: + err = cydriver.cuVDPAUGetDevice(pDevice._pvt_ptr, cyvdpDevice, cyvdpGetProcAddress) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pDevice) @@ -52871,7 +53463,8 @@ def cuVDPAUCtxCreate(unsigned int flags, device, vdpDevice, vdpGetProcAddress): pdevice = int(CUdevice(device)) cydevice = pdevice cdef CUcontext pCtx = CUcontext() - err = cydriver.cuVDPAUCtxCreate(pCtx._pvt_ptr, flags, cydevice, cyvdpDevice, cyvdpGetProcAddress) + with nogil: + err = cydriver.cuVDPAUCtxCreate(pCtx._pvt_ptr, flags, cydevice, cyvdpDevice, cyvdpGetProcAddress) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pCtx) @@ -52933,7 +53526,8 @@ def cuGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags): pvdpSurface = int(VdpVideoSurface(vdpSurface)) cyvdpSurface = pvdpSurface cdef CUgraphicsResource pCudaResource = CUgraphicsResource() - err = cydriver.cuGraphicsVDPAURegisterVideoSurface(pCudaResource._pvt_ptr, cyvdpSurface, flags) + with nogil: + err = cydriver.cuGraphicsVDPAURegisterVideoSurface(pCudaResource._pvt_ptr, cyvdpSurface, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pCudaResource) @@ -52995,7 +53589,8 @@ def cuGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags): pvdpSurface = int(VdpOutputSurface(vdpSurface)) cyvdpSurface = pvdpSurface cdef CUgraphicsResource pCudaResource = CUgraphicsResource() - err = cydriver.cuGraphicsVDPAURegisterOutputSurface(pCudaResource._pvt_ptr, cyvdpSurface, flags) + with nogil: + err = cydriver.cuGraphicsVDPAURegisterOutputSurface(pCudaResource._pvt_ptr, cyvdpSurface, flags) if err != cydriver.CUDA_SUCCESS: return (_dict_CUresult[err], None) return (_dict_CUresult[err], pCudaResource) diff --git a/cuda_bindings/cuda/bindings/nvrtc.pyx.in b/cuda_bindings/cuda/bindings/nvrtc.pyx.in index 2b664cda0..08abcbcf1 100644 --- a/cuda_bindings/cuda/bindings/nvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/nvrtc.pyx.in @@ -34,6 +34,7 @@ ctypedef unsigned long long unsigned_ptr ctypedef unsigned long long unsigned_long_long_ptr ctypedef unsigned long long long_long_ptr ctypedef unsigned long long size_t_ptr +ctypedef unsigned long long long_ptr ctypedef unsigned long long float_ptr ctypedef unsigned long long double_ptr ctypedef unsigned long long void_ptr @@ -133,7 +134,8 @@ def nvrtcGetErrorString(result not None : nvrtcResult): Message string for the given :py:obj:`~.nvrtcResult` code. """ cdef cynvrtc.nvrtcResult cyresult = result.value - err = cynvrtc.nvrtcGetErrorString(cyresult) + with nogil: + err = cynvrtc.nvrtcGetErrorString(cyresult) return (nvrtcResult.NVRTC_SUCCESS, err) {{endif}} @@ -155,7 +157,8 @@ def nvrtcVersion(): """ cdef int major = 0 cdef int minor = 0 - err = cynvrtc.nvrtcVersion(&major, &minor) + with nogil: + err = cynvrtc.nvrtcVersion(&major, &minor) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None, None) return (_dict_nvrtcResult[err], major, minor) @@ -178,7 +181,8 @@ def nvrtcGetNumSupportedArchs(): number of supported architectures. """ cdef int numArchs = 0 - err = cynvrtc.nvrtcGetNumSupportedArchs(&numArchs) + with nogil: + err = cynvrtc.nvrtcGetNumSupportedArchs(&numArchs) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], numArchs) @@ -204,7 +208,8 @@ def nvrtcGetSupportedArchs(): _, s = nvrtcGetNumSupportedArchs() supportedArchs.resize(s) - err = cynvrtc.nvrtcGetSupportedArchs(supportedArchs.data()) + with nogil: + err = cynvrtc.nvrtcGetSupportedArchs(supportedArchs.data()) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], supportedArchs) @@ -261,7 +266,8 @@ def nvrtcCreateProgram(char* src, char* name, int numHeaders, headers : Optional if numHeaders > len(includeNames): raise RuntimeError("List is too small: " + str(len(includeNames)) + " < " + str(numHeaders)) cdef vector[const char*] cyheaders = headers cdef vector[const char*] cyincludeNames = includeNames - err = cynvrtc.nvrtcCreateProgram(prog._pvt_ptr, src, name, numHeaders, cyheaders.data(), cyincludeNames.data()) + with nogil: + err = cynvrtc.nvrtcCreateProgram(prog._pvt_ptr, src, name, numHeaders, cyheaders.data(), cyincludeNames.data()) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], prog) @@ -298,7 +304,8 @@ def nvrtcDestroyProgram(prog): cyprog = prog else: raise TypeError("Argument 'prog' is not instance of type (expected , found " + str(type(prog))) - err = cynvrtc.nvrtcDestroyProgram(cyprog) + with nogil: + err = cynvrtc.nvrtcDestroyProgram(cyprog) return (_dict_nvrtcResult[err],) {{endif}} @@ -347,7 +354,8 @@ def nvrtcCompileProgram(prog, int numOptions, options : Optional[Tuple[bytes] | cyprog = pprog if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) cdef vector[const char*] cyoptions = options - err = cynvrtc.nvrtcCompileProgram(cyprog, numOptions, cyoptions.data()) + with nogil: + err = cynvrtc.nvrtcCompileProgram(cyprog, numOptions, cyoptions.data()) return (_dict_nvrtcResult[err],) {{endif}} @@ -384,7 +392,8 @@ def nvrtcGetPTXSize(prog): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef size_t ptxSizeRet = 0 - err = cynvrtc.nvrtcGetPTXSize(cyprog, &ptxSizeRet) + with nogil: + err = cynvrtc.nvrtcGetPTXSize(cyprog, &ptxSizeRet) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], ptxSizeRet) @@ -422,7 +431,8 @@ def nvrtcGetPTX(prog, char* ptx): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcGetPTX(cyprog, ptx) + with nogil: + err = cynvrtc.nvrtcGetPTX(cyprog, ptx) return (_dict_nvrtcResult[err],) {{endif}} @@ -459,7 +469,8 @@ def nvrtcGetCUBINSize(prog): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef size_t cubinSizeRet = 0 - err = cynvrtc.nvrtcGetCUBINSize(cyprog, &cubinSizeRet) + with nogil: + err = cynvrtc.nvrtcGetCUBINSize(cyprog, &cubinSizeRet) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], cubinSizeRet) @@ -497,7 +508,8 @@ def nvrtcGetCUBIN(prog, char* cubin): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcGetCUBIN(cyprog, cubin) + with nogil: + err = cynvrtc.nvrtcGetCUBIN(cyprog, cubin) return (_dict_nvrtcResult[err],) {{endif}} @@ -528,7 +540,8 @@ def nvrtcGetNVVMSize(prog): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef size_t nvvmSizeRet = 0 - err = cynvrtc.nvrtcGetNVVMSize(cyprog, &nvvmSizeRet) + with nogil: + err = cynvrtc.nvrtcGetNVVMSize(cyprog, &nvvmSizeRet) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], nvvmSizeRet) @@ -560,7 +573,8 @@ def nvrtcGetNVVM(prog, char* nvvm): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcGetNVVM(cyprog, nvvm) + with nogil: + err = cynvrtc.nvrtcGetNVVM(cyprog, nvvm) return (_dict_nvrtcResult[err],) {{endif}} @@ -597,7 +611,8 @@ def nvrtcGetLTOIRSize(prog): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef size_t LTOIRSizeRet = 0 - err = cynvrtc.nvrtcGetLTOIRSize(cyprog, <OIRSizeRet) + with nogil: + err = cynvrtc.nvrtcGetLTOIRSize(cyprog, <OIRSizeRet) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], LTOIRSizeRet) @@ -635,7 +650,8 @@ def nvrtcGetLTOIR(prog, char* LTOIR): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcGetLTOIR(cyprog, LTOIR) + with nogil: + err = cynvrtc.nvrtcGetLTOIR(cyprog, LTOIR) return (_dict_nvrtcResult[err],) {{endif}} @@ -672,7 +688,8 @@ def nvrtcGetOptiXIRSize(prog): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef size_t optixirSizeRet = 0 - err = cynvrtc.nvrtcGetOptiXIRSize(cyprog, &optixirSizeRet) + with nogil: + err = cynvrtc.nvrtcGetOptiXIRSize(cyprog, &optixirSizeRet) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], optixirSizeRet) @@ -710,7 +727,8 @@ def nvrtcGetOptiXIR(prog, char* optixir): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcGetOptiXIR(cyprog, optixir) + with nogil: + err = cynvrtc.nvrtcGetOptiXIR(cyprog, optixir) return (_dict_nvrtcResult[err],) {{endif}} @@ -750,7 +768,8 @@ def nvrtcGetProgramLogSize(prog): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef size_t logSizeRet = 0 - err = cynvrtc.nvrtcGetProgramLogSize(cyprog, &logSizeRet) + with nogil: + err = cynvrtc.nvrtcGetProgramLogSize(cyprog, &logSizeRet) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], logSizeRet) @@ -788,7 +807,8 @@ def nvrtcGetProgramLog(prog, char* log): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcGetProgramLog(cyprog, log) + with nogil: + err = cynvrtc.nvrtcGetProgramLog(cyprog, log) return (_dict_nvrtcResult[err],) {{endif}} @@ -829,7 +849,8 @@ def nvrtcAddNameExpression(prog, char* name_expression): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcAddNameExpression(cyprog, name_expression) + with nogil: + err = cynvrtc.nvrtcAddNameExpression(cyprog, name_expression) return (_dict_nvrtcResult[err],) {{endif}} @@ -871,7 +892,8 @@ def nvrtcGetLoweredName(prog, char* name_expression): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef const char* lowered_name = NULL - err = cynvrtc.nvrtcGetLoweredName(cyprog, name_expression, &lowered_name) + with nogil: + err = cynvrtc.nvrtcGetLoweredName(cyprog, name_expression, &lowered_name) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], lowered_name if lowered_name != NULL else None) @@ -892,7 +914,8 @@ def nvrtcGetPCHHeapSize(): pointer to location where the size of the PCH Heap will be stored """ cdef size_t ret = 0 - err = cynvrtc.nvrtcGetPCHHeapSize(&ret) + with nogil: + err = cynvrtc.nvrtcGetPCHHeapSize(&ret) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], ret) @@ -918,7 +941,8 @@ def nvrtcSetPCHHeapSize(size_t size): nvrtcResult - :py:obj:`~.NVRTC_SUCCESS` """ - err = cynvrtc.nvrtcSetPCHHeapSize(size) + with nogil: + err = cynvrtc.nvrtcSetPCHHeapSize(size) return (_dict_nvrtcResult[err],) {{endif}} @@ -965,7 +989,8 @@ def nvrtcGetPCHCreateStatus(prog): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - err = cynvrtc.nvrtcGetPCHCreateStatus(cyprog) + with nogil: + err = cynvrtc.nvrtcGetPCHCreateStatus(cyprog) return (_dict_nvrtcResult[err],) {{endif}} @@ -999,7 +1024,8 @@ def nvrtcGetPCHHeapSizeRequired(prog): pprog = int(nvrtcProgram(prog)) cyprog = pprog cdef size_t size = 0 - err = cynvrtc.nvrtcGetPCHHeapSizeRequired(cyprog, &size) + with nogil: + err = cynvrtc.nvrtcGetPCHHeapSizeRequired(cyprog, &size) if err != cynvrtc.NVRTC_SUCCESS: return (_dict_nvrtcResult[err], None) return (_dict_nvrtcResult[err], size) @@ -1061,7 +1087,8 @@ def nvrtcSetFlowCallback(prog, callback, payload): cdef void* cycallback_ptr = cycallback.cptr cypayload = utils.HelperInputVoidPtr(payload) cdef void* cypayload_ptr = cypayload.cptr - err = cynvrtc.nvrtcSetFlowCallback(cyprog, cycallback_ptr, cypayload_ptr) + with nogil: + err = cynvrtc.nvrtcSetFlowCallback(cyprog, cycallback_ptr, cypayload_ptr) return (_dict_nvrtcResult[err],) {{endif}} diff --git a/cuda_bindings/cuda/bindings/runtime.pyx.in b/cuda_bindings/cuda/bindings/runtime.pyx.in index d2545ec64..426664570 100644 --- a/cuda_bindings/cuda/bindings/runtime.pyx.in +++ b/cuda_bindings/cuda/bindings/runtime.pyx.in @@ -36,6 +36,7 @@ ctypedef unsigned long long unsigned_ptr ctypedef unsigned long long unsigned_long_long_ptr ctypedef unsigned long long long_long_ptr ctypedef unsigned long long size_t_ptr +ctypedef unsigned long long long_ptr ctypedef unsigned long long float_ptr ctypedef unsigned long long double_ptr ctypedef unsigned long long void_ptr @@ -17959,7 +17960,8 @@ def cudaDeviceReset(): If a non-primary :py:obj:`~.CUcontext` is current to the thread, :py:obj:`~.cudaDeviceReset()` will destroy only the internal CUDA RT state for that :py:obj:`~.CUcontext`. """ - err = cyruntime.cudaDeviceReset() + with nogil: + err = cyruntime.cudaDeviceReset() return (_dict_cudaError_t[err],) {{endif}} @@ -17987,7 +17989,6 @@ def cudaDeviceSynchronize(): """ with nogil: err = cyruntime.cudaDeviceSynchronize() - return (_dict_cudaError_t[err],) {{endif}} @@ -18089,7 +18090,8 @@ def cudaDeviceSetLimit(limit not None : cudaLimit, size_t value): :py:obj:`~.cudaDeviceGetLimit`, :py:obj:`~.cuCtxSetLimit` """ cdef cyruntime.cudaLimit cylimit = limit.value - err = cyruntime.cudaDeviceSetLimit(cylimit, value) + with nogil: + err = cyruntime.cudaDeviceSetLimit(cylimit, value) return (_dict_cudaError_t[err],) {{endif}} @@ -18146,7 +18148,8 @@ def cudaDeviceGetLimit(limit not None : cudaLimit): """ cdef size_t pValue = 0 cdef cyruntime.cudaLimit cylimit = limit.value - err = cyruntime.cudaDeviceGetLimit(&pValue, cylimit) + with nogil: + err = cyruntime.cudaDeviceGetLimit(&pValue, cylimit) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pValue) @@ -18183,7 +18186,8 @@ def cudaDeviceGetTexture1DLinearMaxWidth(fmtDesc : Optional[cudaChannelFormatDes """ cdef size_t maxWidthInElements = 0 cdef cyruntime.cudaChannelFormatDesc* cyfmtDesc_ptr = fmtDesc._pvt_ptr if fmtDesc != None else NULL - err = cyruntime.cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cyfmtDesc_ptr, device) + with nogil: + err = cyruntime.cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cyfmtDesc_ptr, device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], maxWidthInElements) @@ -18232,7 +18236,8 @@ def cudaDeviceGetCacheConfig(): :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxGetCacheConfig` """ cdef cyruntime.cudaFuncCache pCacheConfig - err = cyruntime.cudaDeviceGetCacheConfig(&pCacheConfig) + with nogil: + err = cyruntime.cudaDeviceGetCacheConfig(&pCacheConfig) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cudaFuncCache(pCacheConfig)) @@ -18278,7 +18283,8 @@ def cudaDeviceGetStreamPriorityRange(): """ cdef int leastPriority = 0 cdef int greatestPriority = 0 - err = cyruntime.cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority) + with nogil: + err = cyruntime.cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], leastPriority, greatestPriority) @@ -18337,7 +18343,8 @@ def cudaDeviceSetCacheConfig(cacheConfig not None : cudaFuncCache): :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxSetCacheConfig` """ cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value - err = cyruntime.cudaDeviceSetCacheConfig(cycacheConfig) + with nogil: + err = cyruntime.cudaDeviceSetCacheConfig(cycacheConfig) return (_dict_cudaError_t[err],) {{endif}} @@ -18369,7 +18376,8 @@ def cudaDeviceGetByPCIBusId(char* pciBusId): :py:obj:`~.cudaDeviceGetPCIBusId`, :py:obj:`~.cuDeviceGetByPCIBusId` """ cdef int device = 0 - err = cyruntime.cudaDeviceGetByPCIBusId(&device, pciBusId) + with nogil: + err = cyruntime.cudaDeviceGetByPCIBusId(&device, pciBusId) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], device) @@ -18409,7 +18417,8 @@ def cudaDeviceGetPCIBusId(int length, int device): """ pypciBusId = b" " * length cdef char* pciBusId = pypciBusId - err = cyruntime.cudaDeviceGetPCIBusId(pciBusId, length, device) + with nogil: + err = cyruntime.cudaDeviceGetPCIBusId(pciBusId, length, device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pypciBusId) @@ -18469,7 +18478,8 @@ def cudaIpcGetEventHandle(event): pevent = int(cudaEvent_t(event)) cyevent = pevent cdef cudaIpcEventHandle_t handle = cudaIpcEventHandle_t() - err = cyruntime.cudaIpcGetEventHandle(handle._pvt_ptr, cyevent) + with nogil: + err = cyruntime.cudaIpcGetEventHandle(handle._pvt_ptr, cyevent) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], handle) @@ -18515,7 +18525,8 @@ def cudaIpcOpenEventHandle(handle not None : cudaIpcEventHandle_t): :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcOpenEventHandle` """ cdef cudaEvent_t event = cudaEvent_t() - err = cyruntime.cudaIpcOpenEventHandle(event._pvt_ptr, handle._pvt_ptr[0]) + with nogil: + err = cyruntime.cudaIpcOpenEventHandle(event._pvt_ptr, handle._pvt_ptr[0]) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], event) @@ -18564,7 +18575,8 @@ def cudaIpcGetMemHandle(devPtr): cdef cudaIpcMemHandle_t handle = cudaIpcMemHandle_t() cydevPtr = utils.HelperInputVoidPtr(devPtr) cdef void* cydevPtr_ptr = cydevPtr.cptr - err = cyruntime.cudaIpcGetMemHandle(handle._pvt_ptr, cydevPtr_ptr) + with nogil: + err = cyruntime.cudaIpcGetMemHandle(handle._pvt_ptr, cydevPtr_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], handle) @@ -18636,7 +18648,8 @@ def cudaIpcOpenMemHandle(handle not None : cudaIpcMemHandle_t, unsigned int flag In particular, multiple processes may not receive the same address for the same `handle`. """ cdef void_ptr devPtr = 0 - err = cyruntime.cudaIpcOpenMemHandle(&devPtr, handle._pvt_ptr[0], flags) + with nogil: + err = cyruntime.cudaIpcOpenMemHandle(&devPtr, handle._pvt_ptr[0], flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], devPtr) @@ -18680,7 +18693,8 @@ def cudaIpcCloseMemHandle(devPtr): """ cydevPtr = utils.HelperInputVoidPtr(devPtr) cdef void* cydevPtr_ptr = cydevPtr.cptr - err = cyruntime.cudaIpcCloseMemHandle(cydevPtr_ptr) + with nogil: + err = cyruntime.cudaIpcCloseMemHandle(cydevPtr_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -18722,7 +18736,8 @@ def cudaDeviceFlushGPUDirectRDMAWrites(target not None : cudaFlushGPUDirectRDMAW """ cdef cyruntime.cudaFlushGPUDirectRDMAWritesTarget cytarget = target.value cdef cyruntime.cudaFlushGPUDirectRDMAWritesScope cyscope = scope.value - err = cyruntime.cudaDeviceFlushGPUDirectRDMAWrites(cytarget, cyscope) + with nogil: + err = cyruntime.cudaDeviceFlushGPUDirectRDMAWrites(cytarget, cyscope) return (_dict_cudaError_t[err],) {{endif}} @@ -18804,7 +18819,6 @@ def cudaDeviceRegisterAsyncNotification(int device, callbackFunc, userData): cdef cudaAsyncCallbackHandle_t callback = cudaAsyncCallbackHandle_t() with nogil: err = cyruntime.cudaDeviceRegisterAsyncNotification(device, cudaAsyncNotificationCallbackWrapper, cbData, callback._pvt_ptr) - if err != cyruntime.cudaSuccess: free(cbData) else: @@ -18848,7 +18862,8 @@ def cudaDeviceUnregisterAsyncNotification(int device, callback): else: pcallback = int(cudaAsyncCallbackHandle_t(callback)) cycallback = pcallback - err = cyruntime.cudaDeviceUnregisterAsyncNotification(device, cycallback) + with nogil: + err = cyruntime.cudaDeviceUnregisterAsyncNotification(device, cycallback) if err == cyruntime.cudaSuccess: free(m_global._allocated[pcallback]) m_global._allocated.erase(pcallback) @@ -18892,7 +18907,8 @@ def cudaDeviceGetSharedMemConfig(): :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxGetSharedMemConfig` """ cdef cyruntime.cudaSharedMemConfig pConfig - err = cyruntime.cudaDeviceGetSharedMemConfig(&pConfig) + with nogil: + err = cyruntime.cudaDeviceGetSharedMemConfig(&pConfig) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cudaSharedMemConfig(pConfig)) @@ -18950,7 +18966,8 @@ def cudaDeviceSetSharedMemConfig(config not None : cudaSharedMemConfig): :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxSetSharedMemConfig` """ cdef cyruntime.cudaSharedMemConfig cyconfig = config.value - err = cyruntime.cudaDeviceSetSharedMemConfig(cyconfig) + with nogil: + err = cyruntime.cudaDeviceSetSharedMemConfig(cyconfig) return (_dict_cudaError_t[err],) {{endif}} @@ -18977,7 +18994,8 @@ def cudaGetLastError(): -------- :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError` """ - err = cyruntime.cudaGetLastError() + with nogil: + err = cyruntime.cudaGetLastError() return (_dict_cudaError_t[err],) {{endif}} @@ -19005,7 +19023,8 @@ def cudaPeekAtLastError(): -------- :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError` """ - err = cyruntime.cudaPeekAtLastError() + with nogil: + err = cyruntime.cudaPeekAtLastError() return (_dict_cudaError_t[err],) {{endif}} @@ -19036,7 +19055,8 @@ def cudaGetErrorName(error not None : cudaError_t): :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorName` """ cdef cyruntime.cudaError_t cyerror = error.value - err = cyruntime.cudaGetErrorName(cyerror) + with nogil: + err = cyruntime.cudaGetErrorName(cyerror) return (cudaError_t.cudaSuccess, err) {{endif}} @@ -19066,7 +19086,8 @@ def cudaGetErrorString(error not None : cudaError_t): :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorString` """ cdef cyruntime.cudaError_t cyerror = error.value - err = cyruntime.cudaGetErrorString(cyerror) + with nogil: + err = cyruntime.cudaGetErrorString(cyerror) return (cudaError_t.cudaSuccess, err) {{endif}} @@ -19092,7 +19113,8 @@ def cudaGetDeviceCount(): :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetCount` """ cdef int count = 0 - err = cyruntime.cudaGetDeviceCount(&count) + with nogil: + err = cyruntime.cudaGetDeviceCount(&count) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], count) @@ -19420,7 +19442,8 @@ def cudaGetDeviceProperties(int device): None """ cdef cudaDeviceProp prop = cudaDeviceProp() - err = cyruntime.cudaGetDeviceProperties(prop._pvt_ptr, device) + with nogil: + err = cyruntime.cudaGetDeviceProperties(prop._pvt_ptr, device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], prop) @@ -19798,7 +19821,8 @@ def cudaDeviceGetAttribute(attr not None : cudaDeviceAttr, int device): """ cdef int value = 0 cdef cyruntime.cudaDeviceAttr cyattr = attr.value - err = cyruntime.cudaDeviceGetAttribute(&value, cyattr, device) + with nogil: + err = cyruntime.cudaDeviceGetAttribute(&value, cyattr, device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], value) @@ -19832,7 +19856,6 @@ def cudaDeviceGetDefaultMemPool(int device): cdef cudaMemPool_t memPool = cudaMemPool_t() with nogil: err = cyruntime.cudaDeviceGetDefaultMemPool(memPool._pvt_ptr, device) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], memPool) @@ -19880,7 +19903,6 @@ def cudaDeviceSetMemPool(int device, memPool): cymemPool = pmemPool with nogil: err = cyruntime.cudaDeviceSetMemPool(device, cymemPool) - return (_dict_cudaError_t[err],) {{endif}} @@ -19916,7 +19938,6 @@ def cudaDeviceGetMemPool(int device): cdef cudaMemPool_t memPool = cudaMemPool_t() with nogil: err = cyruntime.cudaDeviceGetMemPool(memPool._pvt_ptr, device) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], memPool) @@ -20003,7 +20024,8 @@ def cudaDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, int device, int flags): """ cynvSciSyncAttrList = utils.HelperInputVoidPtr(nvSciSyncAttrList) cdef void* cynvSciSyncAttrList_ptr = cynvSciSyncAttrList.cptr - err = cyruntime.cudaDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, device, flags) + with nogil: + err = cyruntime.cudaDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, device, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -20060,7 +20082,8 @@ def cudaDeviceGetP2PAttribute(attr not None : cudaDeviceP2PAttr, int srcDevice, """ cdef int value = 0 cdef cyruntime.cudaDeviceP2PAttr cyattr = attr.value - err = cyruntime.cudaDeviceGetP2PAttribute(&value, cyattr, srcDevice, dstDevice) + with nogil: + err = cyruntime.cudaDeviceGetP2PAttribute(&value, cyattr, srcDevice, dstDevice) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], value) @@ -20093,7 +20116,8 @@ def cudaChooseDevice(prop : Optional[cudaDeviceProp]): """ cdef int device = 0 cdef cyruntime.cudaDeviceProp* cyprop_ptr = prop._pvt_ptr if prop != None else NULL - err = cyruntime.cudaChooseDevice(&device, cyprop_ptr) + with nogil: + err = cyruntime.cudaChooseDevice(&device, cyprop_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], device) @@ -20137,7 +20161,8 @@ def cudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags): -------- :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaSetDevice` :py:obj:`~.cuCtxSetCurrent` """ - err = cyruntime.cudaInitDevice(device, deviceFlags, flags) + with nogil: + err = cyruntime.cudaInitDevice(device, deviceFlags, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -20192,7 +20217,8 @@ def cudaSetDevice(int device): -------- :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxSetCurrent` """ - err = cyruntime.cudaSetDevice(device) + with nogil: + err = cyruntime.cudaSetDevice(device) return (_dict_cudaError_t[err],) {{endif}} @@ -20217,7 +20243,8 @@ def cudaGetDevice(): :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuCtxGetCurrent` """ cdef int device = 0 - err = cyruntime.cudaGetDevice(&device) + with nogil: + err = cyruntime.cudaGetDevice(&device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], device) @@ -20305,7 +20332,8 @@ def cudaSetDeviceFlags(unsigned int flags): -------- :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetValidDevices`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuDevicePrimaryCtxSetFlags` """ - err = cyruntime.cudaSetDeviceFlags(flags) + with nogil: + err = cyruntime.cudaSetDeviceFlags(flags) return (_dict_cudaError_t[err],) {{endif}} @@ -20352,7 +20380,8 @@ def cudaGetDeviceFlags(): :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuDevicePrimaryCtxGetState` """ cdef unsigned int flags = 0 - err = cyruntime.cudaGetDeviceFlags(&flags) + with nogil: + err = cyruntime.cudaGetDeviceFlags(&flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], flags) @@ -20381,7 +20410,8 @@ def cudaStreamCreate(): :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate` """ cdef cudaStream_t pStream = cudaStream_t() - err = cyruntime.cudaStreamCreate(pStream._pvt_ptr) + with nogil: + err = cyruntime.cudaStreamCreate(pStream._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pStream) @@ -20424,7 +20454,8 @@ def cudaStreamCreateWithFlags(unsigned int flags): :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate` """ cdef cudaStream_t pStream = cudaStream_t() - err = cyruntime.cudaStreamCreateWithFlags(pStream._pvt_ptr, flags) + with nogil: + err = cyruntime.cudaStreamCreateWithFlags(pStream._pvt_ptr, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pStream) @@ -20484,7 +20515,8 @@ def cudaStreamCreateWithPriority(unsigned int flags, int priority): In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations. """ cdef cudaStream_t pStream = cudaStream_t() - err = cyruntime.cudaStreamCreateWithPriority(pStream._pvt_ptr, flags, priority) + with nogil: + err = cyruntime.cudaStreamCreateWithPriority(pStream._pvt_ptr, flags, priority) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pStream) @@ -20529,7 +20561,8 @@ def cudaStreamGetPriority(hStream): phStream = int(cudaStream_t(hStream)) cyhStream = phStream cdef int priority = 0 - err = cyruntime.cudaStreamGetPriority(cyhStream, &priority) + with nogil: + err = cyruntime.cudaStreamGetPriority(cyhStream, &priority) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], priority) @@ -20570,7 +20603,8 @@ def cudaStreamGetFlags(hStream): phStream = int(cudaStream_t(hStream)) cyhStream = phStream cdef unsigned int flags = 0 - err = cyruntime.cudaStreamGetFlags(cyhStream, &flags) + with nogil: + err = cyruntime.cudaStreamGetFlags(cyhStream, &flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], flags) @@ -20625,7 +20659,8 @@ def cudaStreamGetId(hStream): phStream = int(cudaStream_t(hStream)) cyhStream = phStream cdef unsigned long long streamId = 0 - err = cyruntime.cudaStreamGetId(cyhStream, &streamId) + with nogil: + err = cyruntime.cudaStreamGetId(cyhStream, &streamId) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], streamId) @@ -20664,7 +20699,8 @@ def cudaStreamGetDevice(hStream): phStream = int(cudaStream_t(hStream)) cyhStream = phStream cdef int device = 0 - err = cyruntime.cudaStreamGetDevice(cyhStream, &device) + with nogil: + err = cyruntime.cudaStreamGetDevice(cyhStream, &device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], device) @@ -20688,7 +20724,8 @@ def cudaCtxResetPersistingL2Cache(): -------- :py:obj:`~.cudaAccessPolicyWindow` """ - err = cyruntime.cudaCtxResetPersistingL2Cache() + with nogil: + err = cyruntime.cudaCtxResetPersistingL2Cache() return (_dict_cudaError_t[err],) {{endif}} @@ -20733,7 +20770,8 @@ def cudaStreamCopyAttributes(dst, src): else: pdst = int(cudaStream_t(dst)) cydst = pdst - err = cyruntime.cudaStreamCopyAttributes(cydst, cysrc) + with nogil: + err = cyruntime.cudaStreamCopyAttributes(cydst, cysrc) return (_dict_cudaError_t[err],) {{endif}} @@ -20774,7 +20812,8 @@ def cudaStreamGetAttribute(hStream, attr not None : cudaStreamAttrID): cyhStream = phStream cdef cyruntime.cudaStreamAttrID cyattr = attr.value cdef cudaStreamAttrValue value_out = cudaStreamAttrValue() - err = cyruntime.cudaStreamGetAttribute(cyhStream, cyattr, value_out._pvt_ptr) + with nogil: + err = cyruntime.cudaStreamGetAttribute(cyhStream, cyattr, value_out._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], value_out) @@ -20818,7 +20857,8 @@ def cudaStreamSetAttribute(hStream, attr not None : cudaStreamAttrID, value : Op cyhStream = phStream cdef cyruntime.cudaStreamAttrID cyattr = attr.value cdef cyruntime.cudaStreamAttrValue* cyvalue_ptr = value._pvt_ptr if value != None else NULL - err = cyruntime.cudaStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr) + with nogil: + err = cyruntime.cudaStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -20857,7 +20897,8 @@ def cudaStreamDestroy(stream): else: pstream = int(cudaStream_t(stream)) cystream = pstream - err = cyruntime.cudaStreamDestroy(cystream) + with nogil: + err = cyruntime.cudaStreamDestroy(cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -20916,7 +20957,6 @@ def cudaStreamWaitEvent(stream, event, unsigned int flags): cystream = pstream with nogil: err = cyruntime.cudaStreamWaitEvent(cystream, cyevent, flags) - return (_dict_cudaError_t[err],) {{endif}} @@ -21032,7 +21072,6 @@ def cudaStreamAddCallback(stream, callback, userData, unsigned int flags): with nogil: err = cyruntime.cudaStreamAddCallback(cystream, cudaStreamRtCallbackWrapper, cbData, flags) - if err != cyruntime.cudaSuccess: free(cbData) return (_dict_cudaError_t[err],) @@ -21073,7 +21112,6 @@ def cudaStreamSynchronize(stream): cystream = pstream with nogil: err = cyruntime.cudaStreamSynchronize(cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -21112,7 +21150,8 @@ def cudaStreamQuery(stream): else: pstream = int(cudaStream_t(stream)) cystream = pstream - err = cyruntime.cudaStreamQuery(cystream) + with nogil: + err = cyruntime.cudaStreamQuery(cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -21222,7 +21261,8 @@ def cudaStreamAttachMemAsync(stream, devPtr, size_t length, unsigned int flags): cystream = pstream cydevPtr = utils.HelperInputVoidPtr(devPtr) cdef void* cydevPtr_ptr = cydevPtr.cptr - err = cyruntime.cudaStreamAttachMemAsync(cystream, cydevPtr_ptr, length, flags) + with nogil: + err = cyruntime.cudaStreamAttachMemAsync(cystream, cydevPtr_ptr, length, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -21278,7 +21318,8 @@ def cudaStreamBeginCapture(stream, mode not None : cudaStreamCaptureMode): pstream = int(cudaStream_t(stream)) cystream = pstream cdef cyruntime.cudaStreamCaptureMode cymode = mode.value - err = cyruntime.cudaStreamBeginCapture(cystream, cymode) + with nogil: + err = cyruntime.cudaStreamBeginCapture(cystream, cymode) return (_dict_cudaError_t[err],) {{endif}} @@ -21358,26 +21399,31 @@ def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[Tuple[c pstream = int(cudaStream_t(stream)) cystream = pstream cdef cyruntime.cudaGraphNode_t* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 0: + if len(dependencyData) > 1: cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) if cydependencyData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) for idx in range(len(dependencyData)): string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaStreamCaptureMode cymode = mode.value - err = cyruntime.cudaStreamBeginCaptureToGraph(cystream, cygraph, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, (dependencyData[0])._pvt_ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cymode) - if cydependencies is not NULL: + with nogil: + err = cyruntime.cudaStreamBeginCaptureToGraph(cystream, cygraph, cydependencies, cydependencyData, numDependencies, cymode) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) - if cydependencyData is not NULL: + if len(dependencyData) > 1 and cydependencyData is not NULL: free(cydependencyData) return (_dict_cudaError_t[err],) {{endif}} @@ -21449,7 +21495,8 @@ def cudaThreadExchangeStreamCaptureMode(mode not None : cudaStreamCaptureMode): :py:obj:`~.cudaStreamBeginCapture` """ cdef cyruntime.cudaStreamCaptureMode cymode = mode.value - err = cyruntime.cudaThreadExchangeStreamCaptureMode(&cymode) + with nogil: + err = cyruntime.cudaThreadExchangeStreamCaptureMode(&cymode) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cudaStreamCaptureMode(cymode)) @@ -21496,7 +21543,8 @@ def cudaStreamEndCapture(stream): pstream = int(cudaStream_t(stream)) cystream = pstream cdef cudaGraph_t pGraph = cudaGraph_t() - err = cyruntime.cudaStreamEndCapture(cystream, pGraph._pvt_ptr) + with nogil: + err = cyruntime.cudaStreamEndCapture(cystream, pGraph._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pGraph) @@ -21557,7 +21605,8 @@ def cudaStreamIsCapturing(stream): pstream = int(cudaStream_t(stream)) cystream = pstream cdef cyruntime.cudaStreamCaptureStatus pCaptureStatus - err = cyruntime.cudaStreamIsCapturing(cystream, &pCaptureStatus) + with nogil: + err = cyruntime.cudaStreamIsCapturing(cystream, &pCaptureStatus) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cudaStreamCaptureStatus(pCaptureStatus)) @@ -21638,7 +21687,8 @@ def cudaStreamGetCaptureInfo(stream): cdef const cyruntime.cudaGraphNode_t* cydependencies_out = NULL pydependencies_out = [] cdef size_t numDependencies_out = 0 - err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, graph_out._pvt_ptr, &cydependencies_out, &numDependencies_out) + with nogil: + err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, graph_out._pvt_ptr, &cydependencies_out, &numDependencies_out) if cudaError_t(err) == cudaError_t(0): pydependencies_out = [cudaGraphNode_t(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] if err != cyruntime.cudaSuccess: @@ -21735,7 +21785,8 @@ def cudaStreamGetCaptureInfo_v3(stream): cdef const cyruntime.cudaGraphEdgeData* cyedgeData_out = NULL pyedgeData_out = [] cdef size_t numDependencies_out = 0 - err = cyruntime.cudaStreamGetCaptureInfo_v3(cystream, &captureStatus_out, &id_out, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) + with nogil: + err = cyruntime.cudaStreamGetCaptureInfo_v3(cystream, &captureStatus_out, &id_out, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) if cudaError_t(err) == cudaError_t(0): pydependencies_out = [cudaGraphNode_t(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] if cudaError_t(err) == cudaError_t(0): @@ -21804,16 +21855,19 @@ def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[Tuple[cu pstream = int(cudaStream_t(stream)) cystream = pstream cdef cyruntime.cudaGraphNode_t* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - err = cyruntime.cudaStreamUpdateCaptureDependencies(cystream, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, numDependencies, flags) - if cydependencies is not NULL: + with nogil: + err = cyruntime.cudaStreamUpdateCaptureDependencies(cystream, cydependencies, numDependencies, flags) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) return (_dict_cudaError_t[err],) {{endif}} @@ -21878,24 +21932,29 @@ def cudaStreamUpdateCaptureDependencies_v2(stream, dependencies : Optional[Tuple pstream = int(cudaStream_t(stream)) cystream = pstream cdef cyruntime.cudaGraphNode_t* cydependencies = NULL - if len(dependencies) > 0: + if len(dependencies) > 1: cydependencies = calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t)) if cydependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(dependencies)): cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 0: + if len(dependencyData) > 1: cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) if cydependencyData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) for idx in range(len(dependencyData)): string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - err = cyruntime.cudaStreamUpdateCaptureDependencies_v2(cystream, (dependencies[0])._pvt_ptr if len(dependencies) == 1 else cydependencies, (dependencyData[0])._pvt_ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, flags) - if cydependencies is not NULL: + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + with nogil: + err = cyruntime.cudaStreamUpdateCaptureDependencies_v2(cystream, cydependencies, cydependencyData, numDependencies, flags) + if len(dependencies) > 1 and cydependencies is not NULL: free(cydependencies) - if cydependencyData is not NULL: + if len(dependencyData) > 1 and cydependencyData is not NULL: free(cydependencyData) return (_dict_cudaError_t[err],) {{endif}} @@ -21921,7 +21980,8 @@ def cudaEventCreate(): cudaEventCreate (C++ API), :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate` """ cdef cudaEvent_t event = cudaEvent_t() - err = cyruntime.cudaEventCreate(event._pvt_ptr) + with nogil: + err = cyruntime.cudaEventCreate(event._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], event) @@ -21972,7 +22032,8 @@ def cudaEventCreateWithFlags(unsigned int flags): :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate` """ cdef cudaEvent_t event = cudaEvent_t() - err = cyruntime.cudaEventCreateWithFlags(event._pvt_ptr, flags) + with nogil: + err = cyruntime.cudaEventCreateWithFlags(event._pvt_ptr, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], event) @@ -22032,7 +22093,8 @@ def cudaEventRecord(event, stream): else: pevent = int(cudaEvent_t(event)) cyevent = pevent - err = cyruntime.cudaEventRecord(cyevent, cystream) + with nogil: + err = cyruntime.cudaEventRecord(cyevent, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -22099,7 +22161,8 @@ def cudaEventRecordWithFlags(event, stream, unsigned int flags): else: pevent = int(cudaEvent_t(event)) cyevent = pevent - err = cyruntime.cudaEventRecordWithFlags(cyevent, cystream, flags) + with nogil: + err = cyruntime.cudaEventRecordWithFlags(cyevent, cystream, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -22143,7 +22206,8 @@ def cudaEventQuery(event): else: pevent = int(cudaEvent_t(event)) cyevent = pevent - err = cyruntime.cudaEventQuery(cyevent) + with nogil: + err = cyruntime.cudaEventQuery(cyevent) return (_dict_cudaError_t[err],) {{endif}} @@ -22186,7 +22250,8 @@ def cudaEventSynchronize(event): else: pevent = int(cudaEvent_t(event)) cyevent = pevent - err = cyruntime.cudaEventSynchronize(cyevent) + with nogil: + err = cyruntime.cudaEventSynchronize(cyevent) return (_dict_cudaError_t[err],) {{endif}} @@ -22226,7 +22291,8 @@ def cudaEventDestroy(event): else: pevent = int(cudaEvent_t(event)) cyevent = pevent - err = cyruntime.cudaEventDestroy(cyevent) + with nogil: + err = cyruntime.cudaEventDestroy(cyevent) return (_dict_cudaError_t[err],) {{endif}} @@ -22292,7 +22358,8 @@ def cudaEventElapsedTime(start, end): pstart = int(cudaEvent_t(start)) cystart = pstart cdef float ms = 0 - err = cyruntime.cudaEventElapsedTime(&ms, cystart, cyend) + with nogil: + err = cyruntime.cudaEventElapsedTime(&ms, cystart, cyend) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], ms) @@ -22364,7 +22431,8 @@ def cudaEventElapsedTime_v2(start, end): pstart = int(cudaEvent_t(start)) cystart = pstart cdef float ms = 0 - err = cyruntime.cudaEventElapsedTime_v2(&ms, cystart, cyend) + with nogil: + err = cyruntime.cudaEventElapsedTime_v2(&ms, cystart, cyend) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], ms) @@ -22517,7 +22585,8 @@ def cudaImportExternalMemory(memHandleDesc : Optional[cudaExternalMemoryHandleDe """ cdef cudaExternalMemory_t extMem_out = cudaExternalMemory_t() cdef cyruntime.cudaExternalMemoryHandleDesc* cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc != None else NULL - err = cyruntime.cudaImportExternalMemory(extMem_out._pvt_ptr, cymemHandleDesc_ptr) + with nogil: + err = cyruntime.cudaImportExternalMemory(extMem_out._pvt_ptr, cymemHandleDesc_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], extMem_out) @@ -22584,7 +22653,8 @@ def cudaExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[cudaExternal cyextMem = pextMem cdef void_ptr devPtr = 0 cdef cyruntime.cudaExternalMemoryBufferDesc* cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc != None else NULL - err = cyruntime.cudaExternalMemoryGetMappedBuffer(&devPtr, cyextMem, cybufferDesc_ptr) + with nogil: + err = cyruntime.cudaExternalMemoryGetMappedBuffer(&devPtr, cyextMem, cybufferDesc_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], devPtr) @@ -22655,7 +22725,8 @@ def cudaExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[cuda cyextMem = pextMem cdef cudaMipmappedArray_t mipmap = cudaMipmappedArray_t() cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc* cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc != None else NULL - err = cyruntime.cudaExternalMemoryGetMappedMipmappedArray(mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr) + with nogil: + err = cyruntime.cudaExternalMemoryGetMappedMipmappedArray(mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], mipmap) @@ -22694,7 +22765,8 @@ def cudaDestroyExternalMemory(extMem): else: pextMem = int(cudaExternalMemory_t(extMem)) cyextMem = pextMem - err = cyruntime.cudaDestroyExternalMemory(cyextMem) + with nogil: + err = cyruntime.cudaDestroyExternalMemory(cyextMem) return (_dict_cudaError_t[err],) {{endif}} @@ -22840,7 +22912,8 @@ def cudaImportExternalSemaphore(semHandleDesc : Optional[cudaExternalSemaphoreHa """ cdef cudaExternalSemaphore_t extSem_out = cudaExternalSemaphore_t() cdef cyruntime.cudaExternalSemaphoreHandleDesc* cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc != None else NULL - err = cyruntime.cudaImportExternalSemaphore(extSem_out._pvt_ptr, cysemHandleDesc_ptr) + with nogil: + err = cyruntime.cudaImportExternalSemaphore(extSem_out._pvt_ptr, cysemHandleDesc_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], extSem_out) @@ -22957,26 +23030,31 @@ def cudaSignalExternalSemaphoresAsync(extSemArray : Optional[Tuple[cudaExternalS if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cyruntime.cudaExternalSemaphore_t,] or List[cyruntime.cudaExternalSemaphore_t,]") cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL - if len(extSemArray) > 0: + if len(extSemArray) > 1: cyextSemArray = calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t)) if cyextSemArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t))) else: for idx in range(len(extSemArray)): cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr cdef cyruntime.cudaExternalSemaphoreSignalParams* cyparamsArray = NULL - if len(paramsArray) > 0: + if len(paramsArray) > 1: cyparamsArray = calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreSignalParams)) if cyparamsArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams))) for idx in range(len(paramsArray)): string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - err = cyruntime.cudaSignalExternalSemaphoresAsync((extSemArray[0])._pvt_ptr if len(extSemArray) == 1 else cyextSemArray, (paramsArray[0])._pvt_ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream) - if cyextSemArray is not NULL: + with nogil: + err = cyruntime.cudaSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + if len(extSemArray) > 1 and cyextSemArray is not NULL: free(cyextSemArray) - if cyparamsArray is not NULL: + if len(paramsArray) > 1 and cyparamsArray is not NULL: free(cyparamsArray) return (_dict_cudaError_t[err],) {{endif}} @@ -23081,26 +23159,31 @@ def cudaWaitExternalSemaphoresAsync(extSemArray : Optional[Tuple[cudaExternalSem if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cyruntime.cudaExternalSemaphore_t,] or List[cyruntime.cudaExternalSemaphore_t,]") cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL - if len(extSemArray) > 0: + if len(extSemArray) > 1: cyextSemArray = calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t)) if cyextSemArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t))) else: for idx in range(len(extSemArray)): cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr cdef cyruntime.cudaExternalSemaphoreWaitParams* cyparamsArray = NULL - if len(paramsArray) > 0: + if len(paramsArray) > 1: cyparamsArray = calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreWaitParams)) if cyparamsArray is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams))) for idx in range(len(paramsArray)): string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - err = cyruntime.cudaWaitExternalSemaphoresAsync((extSemArray[0])._pvt_ptr if len(extSemArray) == 1 else cyextSemArray, (paramsArray[0])._pvt_ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream) - if cyextSemArray is not NULL: + with nogil: + err = cyruntime.cudaWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + if len(extSemArray) > 1 and cyextSemArray is not NULL: free(cyextSemArray) - if cyparamsArray is not NULL: + if len(paramsArray) > 1 and cyparamsArray is not NULL: free(cyparamsArray) return (_dict_cudaError_t[err],) {{endif}} @@ -23137,7 +23220,8 @@ def cudaDestroyExternalSemaphore(extSem): else: pextSem = int(cudaExternalSemaphore_t(extSem)) cyextSem = pextSem - err = cyruntime.cudaDestroyExternalSemaphore(cyextSem) + with nogil: + err = cyruntime.cudaDestroyExternalSemaphore(cyextSem) return (_dict_cudaError_t[err],) {{endif}} @@ -23203,7 +23287,8 @@ def cudaFuncSetCacheConfig(func, cacheConfig not None : cudaFuncCache): cyfunc = utils.HelperInputVoidPtr(func) cdef void* cyfunc_ptr = cyfunc.cptr cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value - err = cyruntime.cudaFuncSetCacheConfig(cyfunc_ptr, cycacheConfig) + with nogil: + err = cyruntime.cudaFuncSetCacheConfig(cyfunc_ptr, cycacheConfig) return (_dict_cudaError_t[err],) {{endif}} @@ -23244,7 +23329,8 @@ def cudaFuncGetAttributes(func): cdef cudaFuncAttributes attr = cudaFuncAttributes() cyfunc = utils.HelperInputVoidPtr(func) cdef void* cyfunc_ptr = cyfunc.cptr - err = cyruntime.cudaFuncGetAttributes(attr._pvt_ptr, cyfunc_ptr) + with nogil: + err = cyruntime.cudaFuncGetAttributes(attr._pvt_ptr, cyfunc_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], attr) @@ -23331,7 +23417,8 @@ def cudaFuncSetAttribute(func, attr not None : cudaFuncAttribute, int value): cyfunc = utils.HelperInputVoidPtr(func) cdef void* cyfunc_ptr = cyfunc.cptr cdef cyruntime.cudaFuncAttribute cyattr = attr.value - err = cyruntime.cudaFuncSetAttribute(cyfunc_ptr, cyattr, value) + with nogil: + err = cyruntime.cudaFuncSetAttribute(cyfunc_ptr, cyattr, value) return (_dict_cudaError_t[err],) {{endif}} @@ -23440,7 +23527,6 @@ def cudaLaunchHostFunc(stream, fn, userData): with nogil: err = cyruntime.cudaLaunchHostFunc(cystream, cudaStreamRtHostCallbackWrapper, cbData) - if err != cyruntime.cudaSuccess: free(cbData) return (_dict_cudaError_t[err],) @@ -23509,7 +23595,8 @@ def cudaFuncSetSharedMemConfig(func, config not None : cudaSharedMemConfig): cyfunc = utils.HelperInputVoidPtr(func) cdef void* cyfunc_ptr = cyfunc.cptr cdef cyruntime.cudaSharedMemConfig cyconfig = config.value - err = cyruntime.cudaFuncSetSharedMemConfig(cyfunc_ptr, cyconfig) + with nogil: + err = cyruntime.cudaFuncSetSharedMemConfig(cyfunc_ptr, cyconfig) return (_dict_cudaError_t[err],) {{endif}} @@ -23545,7 +23632,8 @@ def cudaOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dy cdef int numBlocks = 0 cyfunc = utils.HelperInputVoidPtr(func) cdef void* cyfunc_ptr = cyfunc.cptr - err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize) + with nogil: + err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], numBlocks) @@ -23583,7 +23671,8 @@ def cudaOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize cdef size_t dynamicSmemSize = 0 cyfunc = utils.HelperInputVoidPtr(func) cdef void* cyfunc_ptr = cyfunc.cptr - err = cyruntime.cudaOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc_ptr, numBlocks, blockSize) + with nogil: + err = cyruntime.cudaOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc_ptr, numBlocks, blockSize) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], dynamicSmemSize) @@ -23638,7 +23727,8 @@ def cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, cdef int numBlocks = 0 cyfunc = utils.HelperInputVoidPtr(func) cdef void* cyfunc_ptr = cyfunc.cptr - err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize, flags) + with nogil: + err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], numBlocks) @@ -23773,7 +23863,6 @@ def cudaMallocManaged(size_t size, unsigned int flags): cdef void_ptr devPtr = 0 with nogil: err = cyruntime.cudaMallocManaged(&devPtr, size, flags) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], devPtr) @@ -23813,7 +23902,6 @@ def cudaMalloc(size_t size): cdef void_ptr devPtr = 0 with nogil: err = cyruntime.cudaMalloc(&devPtr, size) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], devPtr) @@ -23860,7 +23948,8 @@ def cudaMallocHost(size_t size): :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, cudaMallocHost (C++ API), :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocHost` """ cdef void_ptr ptr = 0 - err = cyruntime.cudaMallocHost(&ptr, size) + with nogil: + err = cyruntime.cudaMallocHost(&ptr, size) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], ptr) @@ -23915,7 +24004,8 @@ def cudaMallocPitch(size_t width, size_t height): """ cdef void_ptr devPtr = 0 cdef size_t pitch = 0 - err = cyruntime.cudaMallocPitch(&devPtr, &pitch, width, height) + with nogil: + err = cyruntime.cudaMallocPitch(&devPtr, &pitch, width, height) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], devPtr, pitch) @@ -23993,7 +24083,6 @@ def cudaMallocArray(desc : Optional[cudaChannelFormatDesc], size_t width, size_t cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc != None else NULL with nogil: err = cyruntime.cudaMallocArray(array._pvt_ptr, cydesc_ptr, width, height, flags) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], array) @@ -24046,7 +24135,6 @@ def cudaFree(devPtr): cdef void* cydevPtr_ptr = cydevPtr.cptr with nogil: err = cyruntime.cudaFree(cydevPtr_ptr) - return (_dict_cudaError_t[err],) {{endif}} @@ -24078,7 +24166,6 @@ def cudaFreeHost(ptr): cdef void* cyptr_ptr = cyptr.cptr with nogil: err = cyruntime.cudaFreeHost(cyptr_ptr) - return (_dict_cudaError_t[err],) {{endif}} @@ -24116,7 +24203,6 @@ def cudaFreeArray(array): cyarray = parray with nogil: err = cyruntime.cudaFreeArray(cyarray) - return (_dict_cudaError_t[err],) {{endif}} @@ -24152,7 +24238,8 @@ def cudaFreeMipmappedArray(mipmappedArray): else: pmipmappedArray = int(cudaMipmappedArray_t(mipmappedArray)) cymipmappedArray = pmipmappedArray - err = cyruntime.cudaFreeMipmappedArray(cymipmappedArray) + with nogil: + err = cyruntime.cudaFreeMipmappedArray(cymipmappedArray) return (_dict_cudaError_t[err],) {{endif}} @@ -24236,7 +24323,6 @@ def cudaHostAlloc(size_t size, unsigned int flags): cdef void_ptr pHost = 0 with nogil: err = cyruntime.cudaHostAlloc(&pHost, size, flags) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pHost) @@ -24356,7 +24442,6 @@ def cudaHostRegister(ptr, size_t size, unsigned int flags): cdef void* cyptr_ptr = cyptr.cptr with nogil: err = cyruntime.cudaHostRegister(cyptr_ptr, size, flags) - return (_dict_cudaError_t[err],) {{endif}} @@ -24390,7 +24475,6 @@ def cudaHostUnregister(ptr): cdef void* cyptr_ptr = cyptr.cptr with nogil: err = cyruntime.cudaHostUnregister(cyptr_ptr) - return (_dict_cudaError_t[err],) {{endif}} @@ -24449,7 +24533,8 @@ def cudaHostGetDevicePointer(pHost, unsigned int flags): cdef void_ptr pDevice = 0 cypHost = utils.HelperInputVoidPtr(pHost) cdef void* cypHost_ptr = cypHost.cptr - err = cyruntime.cudaHostGetDevicePointer(&pDevice, cypHost_ptr, flags) + with nogil: + err = cyruntime.cudaHostGetDevicePointer(&pDevice, cypHost_ptr, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pDevice) @@ -24483,7 +24568,8 @@ def cudaHostGetFlags(pHost): cdef unsigned int pFlags = 0 cypHost = utils.HelperInputVoidPtr(pHost) cdef void* cypHost_ptr = cypHost.cptr - err = cyruntime.cudaHostGetFlags(&pFlags, cypHost_ptr) + with nogil: + err = cyruntime.cudaHostGetFlags(&pFlags, cypHost_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pFlags) @@ -24531,7 +24617,8 @@ def cudaMalloc3D(extent not None : cudaExtent): :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMemAllocPitch` """ cdef cudaPitchedPtr pitchedDevPtr = cudaPitchedPtr() - err = cyruntime.cudaMalloc3D(pitchedDevPtr._pvt_ptr, extent._pvt_ptr[0]) + with nogil: + err = cyruntime.cudaMalloc3D(pitchedDevPtr._pvt_ptr, extent._pvt_ptr[0]) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pitchedDevPtr) @@ -24655,7 +24742,6 @@ def cudaMalloc3DArray(desc : Optional[cudaChannelFormatDesc], extent not None : cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc != None else NULL with nogil: err = cyruntime.cudaMalloc3DArray(array._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], flags) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], array) @@ -24780,7 +24866,8 @@ def cudaMallocMipmappedArray(desc : Optional[cudaChannelFormatDesc], extent not """ cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t() cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc != None else NULL - err = cyruntime.cudaMallocMipmappedArray(mipmappedArray._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], numLevels, flags) + with nogil: + err = cyruntime.cudaMallocMipmappedArray(mipmappedArray._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], numLevels, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], mipmappedArray) @@ -24828,7 +24915,8 @@ def cudaGetMipmappedArrayLevel(mipmappedArray, unsigned int level): pmipmappedArray = int(cudaMipmappedArray_const_t(mipmappedArray)) cymipmappedArray = pmipmappedArray cdef cudaArray_t levelArray = cudaArray_t() - err = cyruntime.cudaGetMipmappedArrayLevel(levelArray._pvt_ptr, cymipmappedArray, level) + with nogil: + err = cyruntime.cudaGetMipmappedArrayLevel(levelArray._pvt_ptr, cymipmappedArray, level) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], levelArray) @@ -24915,7 +25003,6 @@ def cudaMemcpy3D(p : Optional[cudaMemcpy3DParms]): cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p != None else NULL with nogil: err = cyruntime.cudaMemcpy3D(cyp_ptr) - return (_dict_cudaError_t[err],) {{endif}} @@ -24951,7 +25038,8 @@ def cudaMemcpy3DPeer(p : Optional[cudaMemcpy3DPeerParms]): :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeer` """ cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p != None else NULL - err = cyruntime.cudaMemcpy3DPeer(cyp_ptr) + with nogil: + err = cyruntime.cudaMemcpy3DPeer(cyp_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -25057,7 +25145,6 @@ def cudaMemcpy3DAsync(p : Optional[cudaMemcpy3DParms], stream): cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p != None else NULL with nogil: err = cyruntime.cudaMemcpy3DAsync(cyp_ptr, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -25096,7 +25183,8 @@ def cudaMemcpy3DPeerAsync(p : Optional[cudaMemcpy3DPeerParms], stream): pstream = int(cudaStream_t(stream)) cystream = pstream cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p != None else NULL - err = cyruntime.cudaMemcpy3DPeerAsync(cyp_ptr, cystream) + with nogil: + err = cyruntime.cudaMemcpy3DPeerAsync(cyp_ptr, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -25138,7 +25226,8 @@ def cudaMemGetInfo(): """ cdef size_t free = 0 cdef size_t total = 0 - err = cyruntime.cudaMemGetInfo(&free, &total) + with nogil: + err = cyruntime.cudaMemGetInfo(&free, &total) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], free, total) @@ -25186,7 +25275,8 @@ def cudaArrayGetInfo(array): cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc() cdef cudaExtent extent = cudaExtent() cdef unsigned int flags = 0 - err = cyruntime.cudaArrayGetInfo(desc._pvt_ptr, extent._pvt_ptr, &flags, cyarray) + with nogil: + err = cyruntime.cudaArrayGetInfo(desc._pvt_ptr, extent._pvt_ptr, &flags, cyarray) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None, None) return (_dict_cudaError_t[err], desc, extent, flags) @@ -25241,7 +25331,8 @@ def cudaArrayGetPlane(hArray, unsigned int planeIdx): phArray = int(cudaArray_t(hArray)) cyhArray = phArray cdef cudaArray_t pPlaneArray = cudaArray_t() - err = cyruntime.cudaArrayGetPlane(pPlaneArray._pvt_ptr, cyhArray, planeIdx) + with nogil: + err = cyruntime.cudaArrayGetPlane(pPlaneArray._pvt_ptr, cyhArray, planeIdx) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pPlaneArray) @@ -25290,7 +25381,8 @@ def cudaArrayGetMemoryRequirements(array, int device): parray = int(cudaArray_t(array)) cyarray = parray cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements() - err = cyruntime.cudaArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cyarray, device) + with nogil: + err = cyruntime.cudaArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cyarray, device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], memoryRequirements) @@ -25339,7 +25431,8 @@ def cudaMipmappedArrayGetMemoryRequirements(mipmap, int device): pmipmap = int(cudaMipmappedArray_t(mipmap)) cymipmap = pmipmap cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements() - err = cyruntime.cudaMipmappedArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cymipmap, device) + with nogil: + err = cyruntime.cudaMipmappedArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cymipmap, device) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], memoryRequirements) @@ -25394,7 +25487,8 @@ def cudaArrayGetSparseProperties(array): parray = int(cudaArray_t(array)) cyarray = parray cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties() - err = cyruntime.cudaArrayGetSparseProperties(sparseProperties._pvt_ptr, cyarray) + with nogil: + err = cyruntime.cudaArrayGetSparseProperties(sparseProperties._pvt_ptr, cyarray) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], sparseProperties) @@ -25449,7 +25543,8 @@ def cudaMipmappedArrayGetSparseProperties(mipmap): pmipmap = int(cudaMipmappedArray_t(mipmap)) cymipmap = pmipmap cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties() - err = cyruntime.cudaMipmappedArrayGetSparseProperties(sparseProperties._pvt_ptr, cymipmap) + with nogil: + err = cyruntime.cudaMipmappedArrayGetSparseProperties(sparseProperties._pvt_ptr, cymipmap) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], sparseProperties) @@ -25502,7 +25597,6 @@ def cudaMemcpy(dst, src, size_t count, kind not None : cudaMemcpyKind): cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpy(cydst_ptr, cysrc_ptr, count, cykind) - return (_dict_cudaError_t[err],) {{endif}} @@ -25551,7 +25645,6 @@ def cudaMemcpyPeer(dst, int dstDevice, src, int srcDevice, size_t count): cdef void* cysrc_ptr = cysrc.cptr with nogil: err = cyruntime.cudaMemcpyPeer(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count) - return (_dict_cudaError_t[err],) {{endif}} @@ -25612,7 +25705,6 @@ def cudaMemcpy2D(dst, size_t dpitch, src, size_t spitch, size_t width, size_t he cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpy2D(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind) - return (_dict_cudaError_t[err],) {{endif}} @@ -25680,7 +25772,6 @@ def cudaMemcpy2DToArray(dst, size_t wOffset, size_t hOffset, src, size_t spitch, cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpy2DToArray(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind) - return (_dict_cudaError_t[err],) {{endif}} @@ -25748,7 +25839,6 @@ def cudaMemcpy2DFromArray(dst, size_t dpitch, src, size_t wOffset, size_t hOffse cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpy2DFromArray(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind) - return (_dict_cudaError_t[err],) {{endif}} @@ -25820,7 +25910,8 @@ def cudaMemcpy2DArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, siz pdst = int(cudaArray_t(dst)) cydst = pdst cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaMemcpy2DArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, width, height, cykind) + with nogil: + err = cyruntime.cudaMemcpy2DArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, width, height, cykind) return (_dict_cudaError_t[err],) {{endif}} @@ -25891,7 +25982,6 @@ def cudaMemcpyAsync(dst, src, size_t count, kind not None : cudaMemcpyKind, stre cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpyAsync(cydst_ptr, cysrc_ptr, count, cykind, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -25948,7 +26038,6 @@ def cudaMemcpyPeerAsync(dst, int dstDevice, src, int srcDevice, size_t count, st cdef void* cysrc_ptr = cysrc.cptr with nogil: err = cyruntime.cudaMemcpyPeerAsync(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -26083,25 +26172,30 @@ def cudaMemcpyBatchAsync(dsts : Optional[Tuple[Any] | List[Any]], srcs : Optiona dsts = [] if dsts is None else dsts pylist = [utils.HelperInputVoidPtr(pydsts) for pydsts in dsts] cdef utils.InputVoidPtrPtrHelper voidStarHelperdsts = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cydsts_ptr = voidStarHelperdsts.cptr pylist = [utils.HelperInputVoidPtr(pysrcs) for pysrcs in srcs] cdef utils.InputVoidPtrPtrHelper voidStarHelpersrcs = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cysrcs_ptr = voidStarHelpersrcs.cptr cdef vector[size_t] cysizes = sizes if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) cdef cyruntime.cudaMemcpyAttributes* cyattrs = NULL - if len(attrs) > 0: + if len(attrs) > 1: cyattrs = calloc(len(attrs), sizeof(cyruntime.cudaMemcpyAttributes)) if cyattrs is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cyruntime.cudaMemcpyAttributes))) for idx in range(len(attrs)): string.memcpy(&cyattrs[idx], (attrs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpyAttributes)) + elif len(attrs) == 1: + cyattrs = (attrs[0])._pvt_ptr cdef vector[size_t] cyattrsIdxs = attrsIdxs if numAttrs > len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs)) if numAttrs > len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs)) cdef size_t failIdx = 0 - err = cyruntime.cudaMemcpyBatchAsync(voidStarHelperdsts.cptr, voidStarHelpersrcs.cptr, cysizes.data(), count, (attrs[0])._pvt_ptr if len(attrs) == 1 else cyattrs, cyattrsIdxs.data(), numAttrs, &failIdx, cystream) - if cyattrs is not NULL: + with nogil: + err = cyruntime.cudaMemcpyBatchAsync(cydsts_ptr, cysrcs_ptr, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, &failIdx, cystream) + if len(attrs) > 1 and cyattrs is not NULL: free(cyattrs) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -26231,15 +26325,18 @@ def cudaMemcpy3DBatchAsync(size_t numOps, opList : Optional[Tuple[cudaMemcpy3DBa raise TypeError("Argument 'opList' is not instance of type (expected Tuple[cyruntime.cudaMemcpy3DBatchOp,] or List[cyruntime.cudaMemcpy3DBatchOp,]") if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) cdef cyruntime.cudaMemcpy3DBatchOp* cyopList = NULL - if len(opList) > 0: + if len(opList) > 1: cyopList = calloc(len(opList), sizeof(cyruntime.cudaMemcpy3DBatchOp)) if cyopList is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cyruntime.cudaMemcpy3DBatchOp))) for idx in range(len(opList)): string.memcpy(&cyopList[idx], (opList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpy3DBatchOp)) + elif len(opList) == 1: + cyopList = (opList[0])._pvt_ptr cdef size_t failIdx = 0 - err = cyruntime.cudaMemcpy3DBatchAsync(numOps, (opList[0])._pvt_ptr if len(opList) == 1 else cyopList, &failIdx, flags, cystream) - if cyopList is not NULL: + with nogil: + err = cyruntime.cudaMemcpy3DBatchAsync(numOps, cyopList, &failIdx, flags, cystream) + if len(opList) > 1 and cyopList is not NULL: free(cyopList) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -26324,7 +26421,6 @@ def cudaMemcpy2DAsync(dst, size_t dpitch, src, size_t spitch, size_t width, size cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpy2DAsync(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -26413,7 +26509,6 @@ def cudaMemcpy2DToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t sp cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpy2DToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -26501,7 +26596,6 @@ def cudaMemcpy2DFromArrayAsync(dst, size_t dpitch, src, size_t wOffset, size_t h cdef cyruntime.cudaMemcpyKind cykind = kind.value with nogil: err = cyruntime.cudaMemcpy2DFromArrayAsync(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -26539,7 +26633,6 @@ def cudaMemset(devPtr, int value, size_t count): cdef void* cydevPtr_ptr = cydevPtr.cptr with nogil: err = cyruntime.cudaMemset(cydevPtr_ptr, value, count) - return (_dict_cudaError_t[err],) {{endif}} @@ -26582,7 +26675,8 @@ def cudaMemset2D(devPtr, size_t pitch, int value, size_t width, size_t height): """ cydevPtr = utils.HelperInputVoidPtr(devPtr) cdef void* cydevPtr_ptr = cydevPtr.cptr - err = cyruntime.cudaMemset2D(cydevPtr_ptr, pitch, value, width, height) + with nogil: + err = cyruntime.cudaMemset2D(cydevPtr_ptr, pitch, value, width, height) return (_dict_cudaError_t[err],) {{endif}} @@ -26635,7 +26729,8 @@ def cudaMemset3D(pitchedDevPtr not None : cudaPitchedPtr, int value, extent not -------- :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent` """ - err = cyruntime.cudaMemset3D(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0]) + with nogil: + err = cyruntime.cudaMemset3D(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0]) return (_dict_cudaError_t[err],) {{endif}} @@ -26689,7 +26784,6 @@ def cudaMemsetAsync(devPtr, int value, size_t count, stream): cdef void* cydevPtr_ptr = cydevPtr.cptr with nogil: err = cyruntime.cudaMemsetAsync(cydevPtr_ptr, value, count, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -26748,7 +26842,8 @@ def cudaMemset2DAsync(devPtr, size_t pitch, int value, size_t width, size_t heig cystream = pstream cydevPtr = utils.HelperInputVoidPtr(devPtr) cdef void* cydevPtr_ptr = cydevPtr.cptr - err = cyruntime.cudaMemset2DAsync(cydevPtr_ptr, pitch, value, width, height, cystream) + with nogil: + err = cyruntime.cudaMemset2DAsync(cydevPtr_ptr, pitch, value, width, height, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -26817,7 +26912,8 @@ def cudaMemset3DAsync(pitchedDevPtr not None : cudaPitchedPtr, int value, extent else: pstream = int(cudaStream_t(stream)) cystream = pstream - err = cyruntime.cudaMemset3DAsync(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0], cystream) + with nogil: + err = cyruntime.cudaMemset3DAsync(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0], cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -26917,7 +27013,6 @@ def cudaMemPrefetchAsync(devPtr, size_t count, int dstDevice, stream): cdef void* cydevPtr_ptr = cydevPtr.cptr with nogil: err = cyruntime.cudaMemPrefetchAsync(cydevPtr_ptr, count, dstDevice, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -27035,7 +27130,6 @@ def cudaMemPrefetchAsync_v2(devPtr, size_t count, location not None : cudaMemLoc cdef void* cydevPtr_ptr = cydevPtr.cptr with nogil: err = cyruntime.cudaMemPrefetchAsync_v2(cydevPtr_ptr, count, location._pvt_ptr[0], flags, cystream) - return (_dict_cudaError_t[err],) {{endif}} @@ -27202,7 +27296,6 @@ def cudaMemAdvise(devPtr, size_t count, advice not None : cudaMemoryAdvise, int cdef cyruntime.cudaMemoryAdvise cyadvice = advice.value with nogil: err = cyruntime.cudaMemAdvise(cydevPtr_ptr, count, cyadvice, device) - return (_dict_cudaError_t[err],) {{endif}} @@ -27400,7 +27493,6 @@ def cudaMemAdvise_v2(devPtr, size_t count, advice not None : cudaMemoryAdvise, l cdef cyruntime.cudaMemoryAdvise cyadvice = advice.value with nogil: err = cyruntime.cudaMemAdvise_v2(cydevPtr_ptr, count, cyadvice, location._pvt_ptr[0]) - return (_dict_cudaError_t[err],) {{endif}} @@ -27547,7 +27639,8 @@ def cudaMemRangeGetAttribute(size_t dataSize, attribute not None : cudaMemRangeA cdef cyruntime.cudaMemRangeAttribute cyattribute = attribute.value cydevPtr = utils.HelperInputVoidPtr(devPtr) cdef void* cydevPtr_ptr = cydevPtr.cptr - err = cyruntime.cudaMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr_ptr, count) + with nogil: + err = cyruntime.cudaMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr_ptr, count) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cydata.pyObj()) @@ -27627,7 +27720,8 @@ def cudaMemRangeGetAttributes(dataSizes : Tuple[int] | List[int], attributes : O if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) cydevPtr = utils.HelperInputVoidPtr(devPtr) cdef void* cydevPtr_ptr = cydevPtr.cptr - err = cyruntime.cudaMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr_ptr, count) + with nogil: + err = cyruntime.cudaMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr_ptr, count) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], [obj.pyObj() for obj in pylist]) @@ -27687,7 +27781,8 @@ def cudaMemcpyToArray(dst, size_t wOffset, size_t hOffset, src, size_t count, ki cysrc = utils.HelperInputVoidPtr(src) cdef void* cysrc_ptr = cysrc.cptr cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaMemcpyToArray(cydst, wOffset, hOffset, cysrc_ptr, count, cykind) + with nogil: + err = cyruntime.cudaMemcpyToArray(cydst, wOffset, hOffset, cysrc_ptr, count, cykind) return (_dict_cudaError_t[err],) {{endif}} @@ -27745,7 +27840,8 @@ def cudaMemcpyFromArray(dst, src, size_t wOffset, size_t hOffset, size_t count, cydst = utils.HelperInputVoidPtr(dst) cdef void* cydst_ptr = cydst.cptr cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaMemcpyFromArray(cydst_ptr, cysrc, wOffset, hOffset, count, cykind) + with nogil: + err = cyruntime.cudaMemcpyFromArray(cydst_ptr, cysrc, wOffset, hOffset, count, cykind) return (_dict_cudaError_t[err],) {{endif}} @@ -27814,7 +27910,8 @@ def cudaMemcpyArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_ pdst = int(cudaArray_t(dst)) cydst = pdst cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaMemcpyArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, count, cykind) + with nogil: + err = cyruntime.cudaMemcpyArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, count, cykind) return (_dict_cudaError_t[err],) {{endif}} @@ -27889,7 +27986,8 @@ def cudaMemcpyToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t coun cysrc = utils.HelperInputVoidPtr(src) cdef void* cysrc_ptr = cysrc.cptr cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaMemcpyToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, count, cykind, cystream) + with nogil: + err = cyruntime.cudaMemcpyToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, count, cykind, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -27964,7 +28062,8 @@ def cudaMemcpyFromArrayAsync(dst, src, size_t wOffset, size_t hOffset, size_t co cydst = utils.HelperInputVoidPtr(dst) cdef void* cydst_ptr = cydst.cptr cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaMemcpyFromArrayAsync(cydst_ptr, cysrc, wOffset, hOffset, count, cykind, cystream) + with nogil: + err = cyruntime.cudaMemcpyFromArrayAsync(cydst_ptr, cysrc, wOffset, hOffset, count, cykind, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -28018,7 +28117,6 @@ def cudaMallocAsync(size_t size, hStream): cdef void_ptr devPtr = 0 with nogil: err = cyruntime.cudaMallocAsync(&devPtr, size, cyhStream) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], devPtr) @@ -28067,7 +28165,6 @@ def cudaFreeAsync(devPtr, hStream): cdef void* cydevPtr_ptr = cydevPtr.cptr with nogil: err = cyruntime.cudaFreeAsync(cydevPtr_ptr, cyhStream) - return (_dict_cudaError_t[err],) {{endif}} @@ -28118,7 +28215,6 @@ def cudaMemPoolTrimTo(memPool, size_t minBytesToKeep): cymemPool = pmemPool with nogil: err = cyruntime.cudaMemPoolTrimTo(cymemPool, minBytesToKeep) - return (_dict_cudaError_t[err],) {{endif}} @@ -28195,7 +28291,6 @@ def cudaMemPoolSetAttribute(memPool, attr not None : cudaMemPoolAttr, value): cdef void* cyvalue_ptr = cyvalue.cptr with nogil: err = cyruntime.cudaMemPoolSetAttribute(cymemPool, cyattr, cyvalue_ptr) - return (_dict_cudaError_t[err],) {{endif}} @@ -28278,7 +28373,6 @@ def cudaMemPoolGetAttribute(memPool, attr not None : cudaMemPoolAttr): cdef void* cyvalue_ptr = cyvalue.cptr with nogil: err = cyruntime.cudaMemPoolGetAttribute(cymemPool, cyattr, cyvalue_ptr) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cyvalue.pyObj()) @@ -28321,15 +28415,18 @@ def cudaMemPoolSetAccess(memPool, descList : Optional[Tuple[cudaMemAccessDesc] | pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool cdef cyruntime.cudaMemAccessDesc* cydescList = NULL - if len(descList) > 0: + if len(descList) > 1: cydescList = calloc(len(descList), sizeof(cyruntime.cudaMemAccessDesc)) if cydescList is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(descList)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc))) for idx in range(len(descList)): string.memcpy(&cydescList[idx], (descList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc)) + elif len(descList) == 1: + cydescList = (descList[0])._pvt_ptr if count > len(descList): raise RuntimeError("List is too small: " + str(len(descList)) + " < " + str(count)) - err = cyruntime.cudaMemPoolSetAccess(cymemPool, (descList[0])._pvt_ptr if len(descList) == 1 else cydescList, count) - if cydescList is not NULL: + with nogil: + err = cyruntime.cudaMemPoolSetAccess(cymemPool, cydescList, count) + if len(descList) > 1 and cydescList is not NULL: free(cydescList) return (_dict_cudaError_t[err],) {{endif}} @@ -28371,7 +28468,8 @@ def cudaMemPoolGetAccess(memPool, location : Optional[cudaMemLocation]): cymemPool = pmemPool cdef cyruntime.cudaMemAccessFlags flags cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location != None else NULL - err = cyruntime.cudaMemPoolGetAccess(&flags, cymemPool, cylocation_ptr) + with nogil: + err = cyruntime.cudaMemPoolGetAccess(&flags, cymemPool, cylocation_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cudaMemAccessFlags(flags)) @@ -28446,7 +28544,8 @@ def cudaMemPoolCreate(poolProps : Optional[cudaMemPoolProps]): """ cdef cudaMemPool_t memPool = cudaMemPool_t() cdef cyruntime.cudaMemPoolProps* cypoolProps_ptr = poolProps._pvt_ptr if poolProps != None else NULL - err = cyruntime.cudaMemPoolCreate(memPool._pvt_ptr, cypoolProps_ptr) + with nogil: + err = cyruntime.cudaMemPoolCreate(memPool._pvt_ptr, cypoolProps_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], memPool) @@ -28493,7 +28592,8 @@ def cudaMemPoolDestroy(memPool): else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - err = cyruntime.cudaMemPoolDestroy(cymemPool) + with nogil: + err = cyruntime.cudaMemPoolDestroy(cymemPool) return (_dict_cudaError_t[err],) {{endif}} @@ -28549,7 +28649,8 @@ def cudaMallocFromPoolAsync(size_t size, memPool, stream): pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool cdef void_ptr ptr = 0 - err = cyruntime.cudaMallocFromPoolAsync(&ptr, size, cymemPool, cystream) + with nogil: + err = cyruntime.cudaMallocFromPoolAsync(&ptr, size, cymemPool, cystream) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], ptr) @@ -28605,7 +28706,8 @@ def cudaMemPoolExportToShareableHandle(memPool, handleType not None : cudaMemAll cdef utils.HelperCUmemAllocationHandleType cyshareableHandle = utils.HelperCUmemAllocationHandleType(handleType) cdef void* cyshareableHandle_ptr = cyshareableHandle.cptr cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value - err = cyruntime.cudaMemPoolExportToShareableHandle(cyshareableHandle_ptr, cymemPool, cyhandleType, flags) + with nogil: + err = cyruntime.cudaMemPoolExportToShareableHandle(cyshareableHandle_ptr, cymemPool, cyhandleType, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cyshareableHandle.pyObj()) @@ -28648,7 +28750,8 @@ def cudaMemPoolImportFromShareableHandle(shareableHandle, handleType not None : cyshareableHandle = utils.HelperInputVoidPtr(shareableHandle) cdef void* cyshareableHandle_ptr = cyshareableHandle.cptr cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value - err = cyruntime.cudaMemPoolImportFromShareableHandle(memPool._pvt_ptr, cyshareableHandle_ptr, cyhandleType, flags) + with nogil: + err = cyruntime.cudaMemPoolImportFromShareableHandle(memPool._pvt_ptr, cyshareableHandle_ptr, cyhandleType, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], memPool) @@ -28684,7 +28787,8 @@ def cudaMemPoolExportPointer(ptr): cdef cudaMemPoolPtrExportData exportData = cudaMemPoolPtrExportData() cyptr = utils.HelperInputVoidPtr(ptr) cdef void* cyptr_ptr = cyptr.cptr - err = cyruntime.cudaMemPoolExportPointer(exportData._pvt_ptr, cyptr_ptr) + with nogil: + err = cyruntime.cudaMemPoolExportPointer(exportData._pvt_ptr, cyptr_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], exportData) @@ -28736,7 +28840,8 @@ def cudaMemPoolImportPointer(memPool, exportData : Optional[cudaMemPoolPtrExport cymemPool = pmemPool cdef void_ptr ptr = 0 cdef cyruntime.cudaMemPoolPtrExportData* cyexportData_ptr = exportData._pvt_ptr if exportData != None else NULL - err = cyruntime.cudaMemPoolImportPointer(&ptr, cymemPool, cyexportData_ptr) + with nogil: + err = cyruntime.cudaMemPoolImportPointer(&ptr, cymemPool, cyexportData_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], ptr) @@ -28807,7 +28912,8 @@ def cudaPointerGetAttributes(ptr): cdef cudaPointerAttributes attributes = cudaPointerAttributes() cyptr = utils.HelperInputVoidPtr(ptr) cdef void* cyptr_ptr = cyptr.cptr - err = cyruntime.cudaPointerGetAttributes(attributes._pvt_ptr, cyptr_ptr) + with nogil: + err = cyruntime.cudaPointerGetAttributes(attributes._pvt_ptr, cyptr_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], attributes) @@ -28845,7 +28951,8 @@ def cudaDeviceCanAccessPeer(int device, int peerDevice): :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuDeviceCanAccessPeer` """ cdef int canAccessPeer = 0 - err = cyruntime.cudaDeviceCanAccessPeer(&canAccessPeer, device, peerDevice) + with nogil: + err = cyruntime.cudaDeviceCanAccessPeer(&canAccessPeer, device, peerDevice) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], canAccessPeer) @@ -28897,7 +29004,8 @@ def cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags): -------- :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuCtxEnablePeerAccess` """ - err = cyruntime.cudaDeviceEnablePeerAccess(peerDevice, flags) + with nogil: + err = cyruntime.cudaDeviceEnablePeerAccess(peerDevice, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -28925,7 +29033,8 @@ def cudaDeviceDisablePeerAccess(int peerDevice): -------- :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess` """ - err = cyruntime.cudaDeviceDisablePeerAccess(peerDevice) + with nogil: + err = cyruntime.cudaDeviceDisablePeerAccess(peerDevice) return (_dict_cudaError_t[err],) {{endif}} @@ -28963,7 +29072,8 @@ def cudaGraphicsUnregisterResource(resource): else: presource = int(cudaGraphicsResource_t(resource)) cyresource = presource - err = cyruntime.cudaGraphicsUnregisterResource(cyresource) + with nogil: + err = cyruntime.cudaGraphicsUnregisterResource(cyresource) return (_dict_cudaError_t[err],) {{endif}} @@ -29018,7 +29128,8 @@ def cudaGraphicsResourceSetMapFlags(resource, unsigned int flags): else: presource = int(cudaGraphicsResource_t(resource)) cyresource = presource - err = cyruntime.cudaGraphicsResourceSetMapFlags(cyresource, flags) + with nogil: + err = cyruntime.cudaGraphicsResourceSetMapFlags(cyresource, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -29080,7 +29191,8 @@ def cudaGraphicsMapResources(int count, resources, stream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) - err = cyruntime.cudaGraphicsMapResources(count, cyresources, cystream) + with nogil: + err = cyruntime.cudaGraphicsMapResources(count, cyresources, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -29140,7 +29252,8 @@ def cudaGraphicsUnmapResources(int count, resources, stream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) - err = cyruntime.cudaGraphicsUnmapResources(count, cyresources, cystream) + with nogil: + err = cyruntime.cudaGraphicsUnmapResources(count, cyresources, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -29183,7 +29296,8 @@ def cudaGraphicsResourceGetMappedPointer(resource): cyresource = presource cdef void_ptr devPtr = 0 cdef size_t size = 0 - err = cyruntime.cudaGraphicsResourceGetMappedPointer(&devPtr, &size, cyresource) + with nogil: + err = cyruntime.cudaGraphicsResourceGetMappedPointer(&devPtr, &size, cyresource) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], devPtr, size) @@ -29239,7 +29353,8 @@ def cudaGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, uns presource = int(cudaGraphicsResource_t(resource)) cyresource = presource cdef cudaArray_t array = cudaArray_t() - err = cyruntime.cudaGraphicsSubResourceGetMappedArray(array._pvt_ptr, cyresource, arrayIndex, mipLevel) + with nogil: + err = cyruntime.cudaGraphicsSubResourceGetMappedArray(array._pvt_ptr, cyresource, arrayIndex, mipLevel) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], array) @@ -29284,7 +29399,8 @@ def cudaGraphicsResourceGetMappedMipmappedArray(resource): presource = int(cudaGraphicsResource_t(resource)) cyresource = presource cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t() - err = cyruntime.cudaGraphicsResourceGetMappedMipmappedArray(mipmappedArray._pvt_ptr, cyresource) + with nogil: + err = cyruntime.cudaGraphicsResourceGetMappedMipmappedArray(mipmappedArray._pvt_ptr, cyresource) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], mipmappedArray) @@ -29325,7 +29441,6 @@ def cudaGetChannelDesc(array): cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc() with nogil: err = cyruntime.cudaGetChannelDesc(desc._pvt_ptr, cyarray) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], desc) @@ -29373,7 +29488,8 @@ def cudaCreateChannelDesc(int x, int y, int z, int w, f not None : cudaChannelFo cudaCreateChannelDesc (C++ API), :py:obj:`~.cudaGetChannelDesc`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject` """ cdef cyruntime.cudaChannelFormatKind cyf = f.value - err = cyruntime.cudaCreateChannelDesc(x, y, z, w, cyf) + with nogil: + err = cyruntime.cudaCreateChannelDesc(x, y, z, w, cyf) cdef cudaChannelFormatDesc wrapper = cudaChannelFormatDesc() wrapper._pvt_ptr[0] = err return (cudaError_t.cudaSuccess, wrapper) @@ -29617,7 +29733,8 @@ def cudaCreateTextureObject(pResDesc : Optional[cudaResourceDesc], pTexDesc : Op cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc != None else NULL cdef cyruntime.cudaTextureDesc* cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc != None else NULL cdef cyruntime.cudaResourceViewDesc* cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc != None else NULL - err = cyruntime.cudaCreateTextureObject(pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr) + with nogil: + err = cyruntime.cudaCreateTextureObject(pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pTexObject) @@ -29655,7 +29772,6 @@ def cudaDestroyTextureObject(texObject): cytexObject = ptexObject with nogil: err = cyruntime.cudaDestroyTextureObject(cytexObject) - return (_dict_cudaError_t[err],) {{endif}} @@ -29695,7 +29811,6 @@ def cudaGetTextureObjectResourceDesc(texObject): cdef cudaResourceDesc pResDesc = cudaResourceDesc() with nogil: err = cyruntime.cudaGetTextureObjectResourceDesc(pResDesc._pvt_ptr, cytexObject) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pResDesc) @@ -29737,7 +29852,6 @@ def cudaGetTextureObjectTextureDesc(texObject): cdef cudaTextureDesc pTexDesc = cudaTextureDesc() with nogil: err = cyruntime.cudaGetTextureObjectTextureDesc(pTexDesc._pvt_ptr, cytexObject) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pTexDesc) @@ -29778,7 +29892,8 @@ def cudaGetTextureObjectResourceViewDesc(texObject): ptexObject = int(cudaTextureObject_t(texObject)) cytexObject = ptexObject cdef cudaResourceViewDesc pResViewDesc = cudaResourceViewDesc() - err = cyruntime.cudaGetTextureObjectResourceViewDesc(pResViewDesc._pvt_ptr, cytexObject) + with nogil: + err = cyruntime.cudaGetTextureObjectResourceViewDesc(pResViewDesc._pvt_ptr, cytexObject) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pResViewDesc) @@ -29821,7 +29936,6 @@ def cudaCreateSurfaceObject(pResDesc : Optional[cudaResourceDesc]): cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc != None else NULL with nogil: err = cyruntime.cudaCreateSurfaceObject(pSurfObject._pvt_ptr, cypResDesc_ptr) - if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pSurfObject) @@ -29859,7 +29973,6 @@ def cudaDestroySurfaceObject(surfObject): cysurfObject = psurfObject with nogil: err = cyruntime.cudaDestroySurfaceObject(cysurfObject) - return (_dict_cudaError_t[err],) {{endif}} @@ -29894,7 +30007,8 @@ def cudaGetSurfaceObjectResourceDesc(surfObject): psurfObject = int(cudaSurfaceObject_t(surfObject)) cysurfObject = psurfObject cdef cudaResourceDesc pResDesc = cudaResourceDesc() - err = cyruntime.cudaGetSurfaceObjectResourceDesc(pResDesc._pvt_ptr, cysurfObject) + with nogil: + err = cyruntime.cudaGetSurfaceObjectResourceDesc(pResDesc._pvt_ptr, cysurfObject) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pResDesc) @@ -29926,7 +30040,8 @@ def cudaDriverGetVersion(): :py:obj:`~.cudaRuntimeGetVersion`, :py:obj:`~.cuDriverGetVersion` """ cdef int driverVersion = 0 - err = cyruntime.cudaDriverGetVersion(&driverVersion) + with nogil: + err = cyruntime.cudaDriverGetVersion(&driverVersion) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], driverVersion) @@ -29961,7 +30076,8 @@ def cudaRuntimeGetVersion(): :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cuDriverGetVersion` """ cdef int runtimeVersion = 0 - err = cyruntime.cudaRuntimeGetVersion(&runtimeVersion) + with nogil: + err = cyruntime.cudaRuntimeGetVersion(&runtimeVersion) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], runtimeVersion) @@ -29992,7 +30108,8 @@ def cudaGraphCreate(unsigned int flags): :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphDestroy`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphClone` """ cdef cudaGraph_t pGraph = cudaGraph_t() - err = cyruntime.cudaGraphCreate(pGraph._pvt_ptr, flags) + with nogil: + err = cyruntime.cudaGraphCreate(pGraph._pvt_ptr, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pGraph) @@ -30100,17 +30217,20 @@ def cudaGraphAddKernelNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphAddKernelNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypNodeParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddKernelNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -30159,7 +30279,8 @@ def cudaGraphKernelNodeGetParams(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaKernelNodeParams pNodeParams = cudaKernelNodeParams() - err = cyruntime.cudaGraphKernelNodeGetParams(cynode, pNodeParams._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphKernelNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pNodeParams) @@ -30198,7 +30319,8 @@ def cudaGraphKernelNodeSetParams(node, pNodeParams : Optional[cudaKernelNodePara pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphKernelNodeSetParams(cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphKernelNodeSetParams(cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -30244,7 +30366,8 @@ def cudaGraphKernelNodeCopyAttributes(hSrc, hDst): else: phSrc = int(cudaGraphNode_t(hSrc)) cyhSrc = phSrc - err = cyruntime.cudaGraphKernelNodeCopyAttributes(cyhSrc, cyhDst) + with nogil: + err = cyruntime.cudaGraphKernelNodeCopyAttributes(cyhSrc, cyhDst) return (_dict_cudaError_t[err],) {{endif}} @@ -30285,7 +30408,8 @@ def cudaGraphKernelNodeGetAttribute(hNode, attr not None : cudaKernelNodeAttrID) cyhNode = phNode cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value cdef cudaKernelNodeAttrValue value_out = cudaKernelNodeAttrValue() - err = cyruntime.cudaGraphKernelNodeGetAttribute(cyhNode, cyattr, value_out._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphKernelNodeGetAttribute(cyhNode, cyattr, value_out._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], value_out) @@ -30328,7 +30452,8 @@ def cudaGraphKernelNodeSetAttribute(hNode, attr not None : cudaKernelNodeAttrID, cyhNode = phNode cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value cdef cyruntime.cudaKernelNodeAttrValue* cyvalue_ptr = value._pvt_ptr if value != None else NULL - err = cyruntime.cudaGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr) + with nogil: + err = cyruntime.cudaGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -30388,17 +30513,20 @@ def cudaGraphAddMemcpyNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaMemcpy3DParms* cypCopyParams_ptr = pCopyParams._pvt_ptr if pCopyParams != None else NULL - err = cyruntime.cudaGraphAddMemcpyNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypCopyParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddMemcpyNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypCopyParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -30477,20 +30605,23 @@ def cudaGraphAddMemcpyNode1D(graph, pDependencies : Optional[Tuple[cudaGraphNode cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr cydst = utils.HelperInputVoidPtr(dst) cdef void* cydst_ptr = cydst.cptr cysrc = utils.HelperInputVoidPtr(src) cdef void* cysrc_ptr = cysrc.cptr cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaGraphAddMemcpyNode1D(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cydst_ptr, cysrc_ptr, count, cykind) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddMemcpyNode1D(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydst_ptr, cysrc_ptr, count, cykind) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -30530,7 +30661,8 @@ def cudaGraphMemcpyNodeGetParams(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaMemcpy3DParms pNodeParams = cudaMemcpy3DParms() - err = cyruntime.cudaGraphMemcpyNodeGetParams(cynode, pNodeParams._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphMemcpyNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pNodeParams) @@ -30569,7 +30701,8 @@ def cudaGraphMemcpyNodeSetParams(node, pNodeParams : Optional[cudaMemcpy3DParms] pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphMemcpyNodeSetParams(cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphMemcpyNodeSetParams(cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -30630,7 +30763,8 @@ def cudaGraphMemcpyNodeSetParams1D(node, dst, src, size_t count, kind not None : cysrc = utils.HelperInputVoidPtr(src) cdef void* cysrc_ptr = cysrc.cptr cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaGraphMemcpyNodeSetParams1D(cynode, cydst_ptr, cysrc_ptr, count, cykind) + with nogil: + err = cyruntime.cudaGraphMemcpyNodeSetParams1D(cynode, cydst_ptr, cysrc_ptr, count, cykind) return (_dict_cudaError_t[err],) {{endif}} @@ -30684,17 +30818,20 @@ def cudaGraphAddMemsetNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaMemsetParams* cypMemsetParams_ptr = pMemsetParams._pvt_ptr if pMemsetParams != None else NULL - err = cyruntime.cudaGraphAddMemsetNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypMemsetParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddMemsetNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypMemsetParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -30734,7 +30871,8 @@ def cudaGraphMemsetNodeGetParams(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaMemsetParams pNodeParams = cudaMemsetParams() - err = cyruntime.cudaGraphMemsetNodeGetParams(cynode, pNodeParams._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphMemsetNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pNodeParams) @@ -30773,7 +30911,8 @@ def cudaGraphMemsetNodeSetParams(node, pNodeParams : Optional[cudaMemsetParams]) pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphMemsetNodeSetParams(cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphMemsetNodeSetParams(cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -30828,17 +30967,20 @@ def cudaGraphAddHostNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphAddHostNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypNodeParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddHostNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -30878,7 +31020,8 @@ def cudaGraphHostNodeGetParams(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaHostNodeParams pNodeParams = cudaHostNodeParams() - err = cyruntime.cudaGraphHostNodeGetParams(cynode, pNodeParams._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphHostNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pNodeParams) @@ -30917,7 +31060,8 @@ def cudaGraphHostNodeSetParams(node, pNodeParams : Optional[cudaHostNodeParams]) pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphHostNodeSetParams(cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphHostNodeSetParams(cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -30983,16 +31127,19 @@ def cudaGraphAddChildGraphNode(graph, pDependencies : Optional[Tuple[cudaGraphNo cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - err = cyruntime.cudaGraphAddChildGraphNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cychildGraph) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddChildGraphNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cychildGraph) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -31037,7 +31184,8 @@ def cudaGraphChildGraphNodeGetGraph(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaGraph_t pGraph = cudaGraph_t() - err = cyruntime.cudaGraphChildGraphNodeGetGraph(cynode, pGraph._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphChildGraphNodeGetGraph(cynode, pGraph._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pGraph) @@ -31095,16 +31243,19 @@ def cudaGraphAddEmptyNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - err = cyruntime.cudaGraphAddEmptyNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddEmptyNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -31172,16 +31323,19 @@ def cudaGraphAddEventRecordNode(graph, pDependencies : Optional[Tuple[cudaGraphN cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - err = cyruntime.cudaGraphAddEventRecordNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cyevent) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddEventRecordNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -31221,7 +31375,8 @@ def cudaGraphEventRecordNodeGetEvent(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaEvent_t event_out = cudaEvent_t() - err = cyruntime.cudaGraphEventRecordNodeGetEvent(cynode, event_out._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphEventRecordNodeGetEvent(cynode, event_out._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], event_out) @@ -31267,7 +31422,8 @@ def cudaGraphEventRecordNodeSetEvent(node, event): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - err = cyruntime.cudaGraphEventRecordNodeSetEvent(cynode, cyevent) + with nogil: + err = cyruntime.cudaGraphEventRecordNodeSetEvent(cynode, cyevent) return (_dict_cudaError_t[err],) {{endif}} @@ -31335,16 +31491,19 @@ def cudaGraphAddEventWaitNode(graph, pDependencies : Optional[Tuple[cudaGraphNod cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - err = cyruntime.cudaGraphAddEventWaitNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cyevent) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddEventWaitNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -31384,7 +31543,8 @@ def cudaGraphEventWaitNodeGetEvent(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaEvent_t event_out = cudaEvent_t() - err = cyruntime.cudaGraphEventWaitNodeGetEvent(cynode, event_out._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphEventWaitNodeGetEvent(cynode, event_out._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], event_out) @@ -31430,7 +31590,8 @@ def cudaGraphEventWaitNodeSetEvent(node, event): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - err = cyruntime.cudaGraphEventWaitNodeSetEvent(cynode, cyevent) + with nogil: + err = cyruntime.cudaGraphEventWaitNodeSetEvent(cynode, cyevent) return (_dict_cudaError_t[err],) {{endif}} @@ -31486,17 +31647,20 @@ def cudaGraphAddExternalSemaphoresSignalNode(graph, pDependencies : Optional[Tup cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphAddExternalSemaphoresSignalNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddExternalSemaphoresSignalNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -31542,7 +31706,8 @@ def cudaGraphExternalSemaphoresSignalNodeGetParams(hNode): phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode cdef cudaExternalSemaphoreSignalNodeParams params_out = cudaExternalSemaphoreSignalNodeParams() - err = cyruntime.cudaGraphExternalSemaphoresSignalNodeGetParams(cyhNode, params_out._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphExternalSemaphoresSignalNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], params_out) @@ -31582,7 +31747,8 @@ def cudaGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[ phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -31638,17 +31804,20 @@ def cudaGraphAddExternalSemaphoresWaitNode(graph, pDependencies : Optional[Tuple cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphAddExternalSemaphoresWaitNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddExternalSemaphoresWaitNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -31694,7 +31863,8 @@ def cudaGraphExternalSemaphoresWaitNodeGetParams(hNode): phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode cdef cudaExternalSemaphoreWaitNodeParams params_out = cudaExternalSemaphoreWaitNodeParams() - err = cyruntime.cudaGraphExternalSemaphoresWaitNodeGetParams(cyhNode, params_out._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphExternalSemaphoresWaitNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], params_out) @@ -31734,7 +31904,8 @@ def cudaGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[cu phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -31829,17 +32000,20 @@ def cudaGraphAddMemAllocNode(graph, pDependencies : Optional[Tuple[cudaGraphNode cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaMemAllocNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphAddMemAllocNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddMemAllocNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -31882,7 +32056,8 @@ def cudaGraphMemAllocNodeGetParams(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cudaMemAllocNodeParams params_out = cudaMemAllocNodeParams() - err = cyruntime.cudaGraphMemAllocNodeGetParams(cynode, params_out._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphMemAllocNodeGetParams(cynode, params_out._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], params_out) @@ -31957,18 +32132,21 @@ def cudaGraphAddMemFreeNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_ cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cydptr = utils.HelperInputVoidPtr(dptr) cdef void* cydptr_ptr = cydptr.cptr - err = cyruntime.cudaGraphAddMemFreeNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cydptr_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddMemFreeNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydptr_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -32009,7 +32187,8 @@ def cudaGraphMemFreeNodeGetParams(node): cynode = pnode cdef void_ptr dptr_out = 0 cdef void* cydptr_out_ptr = &dptr_out - err = cyruntime.cudaGraphMemFreeNodeGetParams(cynode, cydptr_out_ptr) + with nogil: + err = cyruntime.cudaGraphMemFreeNodeGetParams(cynode, cydptr_out_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], dptr_out) @@ -32039,7 +32218,8 @@ def cudaDeviceGraphMemTrim(int device): -------- :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync` """ - err = cyruntime.cudaDeviceGraphMemTrim(device) + with nogil: + err = cyruntime.cudaDeviceGraphMemTrim(device) return (_dict_cudaError_t[err],) {{endif}} @@ -32087,7 +32267,8 @@ def cudaDeviceGetGraphMemAttribute(int device, attr not None : cudaGraphMemAttri cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, 0, is_getter=True) cdef void* cyvalue_ptr = cyvalue.cptr - err = cyruntime.cudaDeviceGetGraphMemAttribute(device, cyattr, cyvalue_ptr) + with nogil: + err = cyruntime.cudaDeviceGetGraphMemAttribute(device, cyattr, cyvalue_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cyvalue.pyObj()) @@ -32130,7 +32311,8 @@ def cudaDeviceSetGraphMemAttribute(int device, attr not None : cudaGraphMemAttri cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, value, is_getter=False) cdef void* cyvalue_ptr = cyvalue.cptr - err = cyruntime.cudaDeviceSetGraphMemAttribute(device, cyattr, cyvalue_ptr) + with nogil: + err = cyruntime.cudaDeviceSetGraphMemAttribute(device, cyattr, cyvalue_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -32177,7 +32359,8 @@ def cudaGraphClone(originalGraph): poriginalGraph = int(cudaGraph_t(originalGraph)) cyoriginalGraph = poriginalGraph cdef cudaGraph_t pGraphClone = cudaGraph_t() - err = cyruntime.cudaGraphClone(pGraphClone._pvt_ptr, cyoriginalGraph) + with nogil: + err = cyruntime.cudaGraphClone(pGraphClone._pvt_ptr, cyoriginalGraph) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pGraphClone) @@ -32233,7 +32416,8 @@ def cudaGraphNodeFindInClone(originalNode, clonedGraph): poriginalNode = int(cudaGraphNode_t(originalNode)) cyoriginalNode = poriginalNode cdef cudaGraphNode_t pNode = cudaGraphNode_t() - err = cyruntime.cudaGraphNodeFindInClone(pNode._pvt_ptr, cyoriginalNode, cyclonedGraph) + with nogil: + err = cyruntime.cudaGraphNodeFindInClone(pNode._pvt_ptr, cyoriginalNode, cyclonedGraph) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pNode) @@ -32272,7 +32456,8 @@ def cudaGraphNodeGetType(node): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cyruntime.cudaGraphNodeType pType - err = cyruntime.cudaGraphNodeGetType(cynode, &pType) + with nogil: + err = cyruntime.cudaGraphNodeGetType(cynode, &pType) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], cudaGraphNodeType(pType)) @@ -32326,7 +32511,8 @@ def cudaGraphGetNodes(graph, size_t numNodes = 0): cynodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) if cynodes is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes) + with nogil: + err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes) if cudaError_t(err) == cudaError_t(0): pynodes = [cudaGraphNode_t(init_value=cynodes[idx]) for idx in range(_graph_length)] if cynodes is not NULL: @@ -32384,7 +32570,8 @@ def cudaGraphGetRootNodes(graph, size_t pNumRootNodes = 0): cypRootNodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) if cypRootNodes is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes) + with nogil: + err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes) if cudaError_t(err) == cudaError_t(0): pypRootNodes = [cudaGraphNode_t(init_value=cypRootNodes[idx]) for idx in range(_graph_length)] if cypRootNodes is not NULL: @@ -32452,7 +32639,8 @@ def cudaGraphGetEdges(graph, size_t numEdges = 0): cyto = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, &numEdges) + with nogil: + err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, &numEdges) if cudaError_t(err) == cudaError_t(0): pyfrom_ = [cudaGraphNode_t(init_value=cyfrom_[idx]) for idx in range(_graph_length)] if cyfrom_ is not NULL: @@ -32537,7 +32725,8 @@ def cudaGraphGetEdges_v2(graph, size_t numEdges = 0): cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - err = cyruntime.cudaGraphGetEdges_v2(cygraph, cyfrom_, cyto, cyedgeData, &numEdges) + with nogil: + err = cyruntime.cudaGraphGetEdges_v2(cygraph, cyfrom_, cyto, cyedgeData, &numEdges) if cudaError_t(err) == cudaError_t(0): pyfrom_ = [cudaGraphNode_t(init_value=cyfrom_[idx]) for idx in range(_graph_length)] if cyfrom_ is not NULL: @@ -32604,7 +32793,8 @@ def cudaGraphNodeGetDependencies(node, size_t pNumDependencies = 0): cypDependencies = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, &pNumDependencies) + with nogil: + err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, &pNumDependencies) if cudaError_t(err) == cudaError_t(0): pypDependencies = [cudaGraphNode_t(init_value=cypDependencies[idx]) for idx in range(_graph_length)] if cypDependencies is not NULL: @@ -32676,7 +32866,8 @@ def cudaGraphNodeGetDependencies_v2(node, size_t pNumDependencies = 0): cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - err = cyruntime.cudaGraphNodeGetDependencies_v2(cynode, cypDependencies, cyedgeData, &pNumDependencies) + with nogil: + err = cyruntime.cudaGraphNodeGetDependencies_v2(cynode, cypDependencies, cyedgeData, &pNumDependencies) if cudaError_t(err) == cudaError_t(0): pypDependencies = [cudaGraphNode_t(init_value=cypDependencies[idx]) for idx in range(_graph_length)] if cypDependencies is not NULL: @@ -32739,7 +32930,8 @@ def cudaGraphNodeGetDependentNodes(node, size_t pNumDependentNodes = 0): cypDependentNodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) if cypDependentNodes is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, &pNumDependentNodes) + with nogil: + err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, &pNumDependentNodes) if cudaError_t(err) == cudaError_t(0): pypDependentNodes = [cudaGraphNode_t(init_value=cypDependentNodes[idx]) for idx in range(_graph_length)] if cypDependentNodes is not NULL: @@ -32811,7 +33003,8 @@ def cudaGraphNodeGetDependentNodes_v2(node, size_t pNumDependentNodes = 0): cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - err = cyruntime.cudaGraphNodeGetDependentNodes_v2(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes) + with nogil: + err = cyruntime.cudaGraphNodeGetDependentNodes_v2(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes) if cudaError_t(err) == cudaError_t(0): pypDependentNodes = [cudaGraphNode_t(init_value=cypDependentNodes[idx]) for idx in range(_graph_length)] if cypDependentNodes is not NULL: @@ -32873,27 +33066,32 @@ def cudaGraphAddDependencies(graph, from_ : Optional[Tuple[cudaGraphNode_t] | Li pgraph = int(cudaGraph_t(graph)) cygraph = pgraph cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cyruntime.cudaGraphNode_t* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr if numDependencies > len(from_): raise RuntimeError("List is too small: " + str(len(from_)) + " < " + str(numDependencies)) if numDependencies > len(to): raise RuntimeError("List is too small: " + str(len(to)) + " < " + str(numDependencies)) - err = cyruntime.cudaGraphAddDependencies(cygraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, numDependencies) - if cyfrom_ is not NULL: + with nogil: + err = cyruntime.cudaGraphAddDependencies(cygraph, cyfrom_, cyto, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) return (_dict_cudaError_t[err],) {{endif}} @@ -32952,34 +33150,41 @@ def cudaGraphAddDependencies_v2(graph, from_ : Optional[Tuple[cudaGraphNode_t] | pgraph = int(cudaGraph_t(graph)) cygraph = pgraph cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cyruntime.cudaGraphNode_t* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL - if len(edgeData) > 0: + if len(edgeData) > 1: cyedgeData = calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) for idx in range(len(edgeData)): string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - err = cyruntime.cudaGraphAddDependencies_v2(cygraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, (edgeData[0])._pvt_ptr if len(edgeData) == 1 else cyedgeData, numDependencies) - if cyfrom_ is not NULL: + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cyruntime.cudaGraphAddDependencies_v2(cygraph, cyfrom_, cyto, cyedgeData, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) - if cyedgeData is not NULL: + if len(edgeData) > 1 and cyedgeData is not NULL: free(cyedgeData) return (_dict_cudaError_t[err],) {{endif}} @@ -33033,27 +33238,32 @@ def cudaGraphRemoveDependencies(graph, from_ : Optional[Tuple[cudaGraphNode_t] | pgraph = int(cudaGraph_t(graph)) cygraph = pgraph cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cyruntime.cudaGraphNode_t* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr if numDependencies > len(from_): raise RuntimeError("List is too small: " + str(len(from_)) + " < " + str(numDependencies)) if numDependencies > len(to): raise RuntimeError("List is too small: " + str(len(to)) + " < " + str(numDependencies)) - err = cyruntime.cudaGraphRemoveDependencies(cygraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, numDependencies) - if cyfrom_ is not NULL: + with nogil: + err = cyruntime.cudaGraphRemoveDependencies(cygraph, cyfrom_, cyto, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) return (_dict_cudaError_t[err],) {{endif}} @@ -33115,34 +33325,41 @@ def cudaGraphRemoveDependencies_v2(graph, from_ : Optional[Tuple[cudaGraphNode_t pgraph = int(cudaGraph_t(graph)) cygraph = pgraph cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL - if len(from_) > 0: + if len(from_) > 1: cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) if cyfrom_ is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(from_)): cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr cdef cyruntime.cudaGraphNode_t* cyto = NULL - if len(to) > 0: + if len(to) > 1: cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) if cyto is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(to)): cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL - if len(edgeData) > 0: + if len(edgeData) > 1: cyedgeData = calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) for idx in range(len(edgeData)): string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - err = cyruntime.cudaGraphRemoveDependencies_v2(cygraph, (from_[0])._pvt_ptr if len(from_) == 1 else cyfrom_, (to[0])._pvt_ptr if len(to) == 1 else cyto, (edgeData[0])._pvt_ptr if len(edgeData) == 1 else cyedgeData, numDependencies) - if cyfrom_ is not NULL: + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cyruntime.cudaGraphRemoveDependencies_v2(cygraph, cyfrom_, cyto, cyedgeData, numDependencies) + if len(from_) > 1 and cyfrom_ is not NULL: free(cyfrom_) - if cyto is not NULL: + if len(to) > 1 and cyto is not NULL: free(cyto) - if cyedgeData is not NULL: + if len(edgeData) > 1 and cyedgeData is not NULL: free(cyedgeData) return (_dict_cudaError_t[err],) {{endif}} @@ -33181,7 +33398,8 @@ def cudaGraphDestroyNode(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - err = cyruntime.cudaGraphDestroyNode(cynode) + with nogil: + err = cyruntime.cudaGraphDestroyNode(cynode) return (_dict_cudaError_t[err],) {{endif}} @@ -33282,7 +33500,8 @@ def cudaGraphInstantiate(graph, unsigned long long flags): pgraph = int(cudaGraph_t(graph)) cygraph = pgraph cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t() - err = cyruntime.cudaGraphInstantiate(pGraphExec._pvt_ptr, cygraph, flags) + with nogil: + err = cyruntime.cudaGraphInstantiate(pGraphExec._pvt_ptr, cygraph, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pGraphExec) @@ -33387,7 +33606,8 @@ def cudaGraphInstantiateWithFlags(graph, unsigned long long flags): pgraph = int(cudaGraph_t(graph)) cygraph = pgraph cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t() - err = cyruntime.cudaGraphInstantiateWithFlags(pGraphExec._pvt_ptr, cygraph, flags) + with nogil: + err = cyruntime.cudaGraphInstantiateWithFlags(pGraphExec._pvt_ptr, cygraph, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pGraphExec) @@ -33533,7 +33753,8 @@ def cudaGraphInstantiateWithParams(graph, instantiateParams : Optional[cudaGraph cygraph = pgraph cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t() cdef cyruntime.cudaGraphInstantiateParams* cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams != None else NULL - err = cyruntime.cudaGraphInstantiateWithParams(pGraphExec._pvt_ptr, cygraph, cyinstantiateParams_ptr) + with nogil: + err = cyruntime.cudaGraphInstantiateWithParams(pGraphExec._pvt_ptr, cygraph, cyinstantiateParams_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pGraphExec) @@ -33575,7 +33796,8 @@ def cudaGraphExecGetFlags(graphExec): pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec cdef unsigned long long flags = 0 - err = cyruntime.cudaGraphExecGetFlags(cygraphExec, &flags) + with nogil: + err = cyruntime.cudaGraphExecGetFlags(cygraphExec, &flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], flags) @@ -33658,7 +33880,8 @@ def cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphExecKernelNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExecKernelNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -33722,7 +33945,8 @@ def cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphExecMemcpyNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExecMemcpyNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -33794,7 +34018,8 @@ def cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, size_t count, cysrc = utils.HelperInputVoidPtr(src) cdef void* cysrc_ptr = cysrc.cptr cdef cyruntime.cudaMemcpyKind cykind = kind.value - err = cyruntime.cudaGraphExecMemcpyNodeSetParams1D(cyhGraphExec, cynode, cydst_ptr, cysrc_ptr, count, cykind) + with nogil: + err = cyruntime.cudaGraphExecMemcpyNodeSetParams1D(cyhGraphExec, cynode, cydst_ptr, cysrc_ptr, count, cykind) return (_dict_cudaError_t[err],) {{endif}} @@ -33863,7 +34088,8 @@ def cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphExecMemsetNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExecMemsetNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -33917,7 +34143,8 @@ def cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams : Optional[cuda phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams != None else NULL - err = cyruntime.cudaGraphExecHostNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExecHostNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -33986,7 +34213,8 @@ def cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - err = cyruntime.cudaGraphExecChildGraphNodeSetParams(cyhGraphExec, cynode, cychildGraph) + with nogil: + err = cyruntime.cudaGraphExecChildGraphNodeSetParams(cyhGraphExec, cynode, cychildGraph) return (_dict_cudaError_t[err],) {{endif}} @@ -34048,7 +34276,8 @@ def cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - err = cyruntime.cudaGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent) + with nogil: + err = cyruntime.cudaGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_dict_cudaError_t[err],) {{endif}} @@ -34110,7 +34339,8 @@ def cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - err = cyruntime.cudaGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent) + with nogil: + err = cyruntime.cudaGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_dict_cudaError_t[err],) {{endif}} @@ -34169,7 +34399,8 @@ def cudaGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodePa phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -34228,7 +34459,8 @@ def cudaGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodePara phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -34290,7 +34522,8 @@ def cudaGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - err = cyruntime.cudaGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled) + with nogil: + err = cyruntime.cudaGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled) return (_dict_cudaError_t[err],) {{endif}} @@ -34346,7 +34579,8 @@ def cudaGraphNodeGetEnabled(hGraphExec, hNode): phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef unsigned int isEnabled = 0 - err = cyruntime.cudaGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled) + with nogil: + err = cyruntime.cudaGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], isEnabled) @@ -34521,7 +34755,8 @@ def cudaGraphExecUpdate(hGraphExec, hGraph): phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec cdef cudaGraphExecUpdateResultInfo resultInfo = cudaGraphExecUpdateResultInfo() - err = cyruntime.cudaGraphExecUpdate(cyhGraphExec, cyhGraph, resultInfo._pvt_ptr) + with nogil: + err = cyruntime.cudaGraphExecUpdate(cyhGraphExec, cyhGraph, resultInfo._pvt_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], resultInfo) @@ -34571,7 +34806,8 @@ def cudaGraphUpload(graphExec, stream): else: pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec - err = cyruntime.cudaGraphUpload(cygraphExec, cystream) + with nogil: + err = cyruntime.cudaGraphUpload(cygraphExec, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -34624,7 +34860,8 @@ def cudaGraphLaunch(graphExec, stream): else: pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec - err = cyruntime.cudaGraphLaunch(cygraphExec, cystream) + with nogil: + err = cyruntime.cudaGraphLaunch(cygraphExec, cystream) return (_dict_cudaError_t[err],) {{endif}} @@ -34658,7 +34895,8 @@ def cudaGraphExecDestroy(graphExec): else: pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec - err = cyruntime.cudaGraphExecDestroy(cygraphExec) + with nogil: + err = cyruntime.cudaGraphExecDestroy(cygraphExec) return (_dict_cudaError_t[err],) {{endif}} @@ -34692,7 +34930,8 @@ def cudaGraphDestroy(graph): else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - err = cyruntime.cudaGraphDestroy(cygraph) + with nogil: + err = cyruntime.cudaGraphDestroy(cygraph) return (_dict_cudaError_t[err],) {{endif}} @@ -34731,7 +34970,8 @@ def cudaGraphDebugDotPrint(graph, char* path, unsigned int flags): else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - err = cyruntime.cudaGraphDebugDotPrint(cygraph, path, flags) + with nogil: + err = cyruntime.cudaGraphDebugDotPrint(cygraph, path, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -34790,7 +35030,8 @@ def cudaUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned in cdef cudaUserObject_t object_out = cudaUserObject_t() cyptr = utils.HelperInputVoidPtr(ptr) cdef void* cyptr_ptr = cyptr.cptr - err = cyruntime.cudaUserObjectCreate(object_out._pvt_ptr, cyptr_ptr, cydestroy, initialRefcount, flags) + with nogil: + err = cyruntime.cudaUserObjectCreate(object_out._pvt_ptr, cyptr_ptr, cydestroy, initialRefcount, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], object_out) @@ -34833,7 +35074,8 @@ def cudaUserObjectRetain(object, unsigned int count): else: pobject = int(cudaUserObject_t(object)) cyobject = pobject - err = cyruntime.cudaUserObjectRetain(cyobject, count) + with nogil: + err = cyruntime.cudaUserObjectRetain(cyobject, count) return (_dict_cudaError_t[err],) {{endif}} @@ -34877,7 +35119,8 @@ def cudaUserObjectRelease(object, unsigned int count): else: pobject = int(cudaUserObject_t(object)) cyobject = pobject - err = cyruntime.cudaUserObjectRelease(cyobject, count) + with nogil: + err = cyruntime.cudaUserObjectRelease(cyobject, count) return (_dict_cudaError_t[err],) {{endif}} @@ -34932,7 +35175,8 @@ def cudaGraphRetainUserObject(graph, object, unsigned int count, unsigned int fl else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - err = cyruntime.cudaGraphRetainUserObject(cygraph, cyobject, count, flags) + with nogil: + err = cyruntime.cudaGraphRetainUserObject(cygraph, cyobject, count, flags) return (_dict_cudaError_t[err],) {{endif}} @@ -34982,7 +35226,8 @@ def cudaGraphReleaseUserObject(graph, object, unsigned int count): else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - err = cyruntime.cudaGraphReleaseUserObject(cygraph, cyobject, count) + with nogil: + err = cyruntime.cudaGraphReleaseUserObject(cygraph, cyobject, count) return (_dict_cudaError_t[err],) {{endif}} @@ -35046,17 +35291,20 @@ def cudaGraphAddNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | Li cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphAddNode(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -35129,27 +35377,32 @@ def cudaGraphAddNode_v2(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | cygraph = pgraph cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 0: + if len(pDependencies) > 1: cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) else: for idx in range(len(pDependencies)): cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 0: + if len(dependencyData) > 1: cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) if cydependencyData is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) for idx in range(len(dependencyData)): string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) if numDependencies > len(dependencyData): raise RuntimeError("List is too small: " + str(len(dependencyData)) + " < " + str(numDependencies)) cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphAddNode_v2(pGraphNode._pvt_ptr, cygraph, (pDependencies[0])._pvt_ptr if len(pDependencies) == 1 else cypDependencies, (dependencyData[0])._pvt_ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cynodeParams_ptr) - if cypDependencies is not NULL: + with nogil: + err = cyruntime.cudaGraphAddNode_v2(pGraphNode._pvt_ptr, cygraph, cypDependencies, cydependencyData, numDependencies, cynodeParams_ptr) + if len(pDependencies) > 1 and cypDependencies is not NULL: free(cypDependencies) - if cydependencyData is not NULL: + if len(dependencyData) > 1 and cydependencyData is not NULL: free(cydependencyData) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) @@ -35195,7 +35448,8 @@ def cudaGraphNodeSetParams(node, nodeParams : Optional[cudaGraphNodeParams]): pnode = int(cudaGraphNode_t(node)) cynode = pnode cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphNodeSetParams(cynode, cynodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphNodeSetParams(cynode, cynodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -35254,7 +35508,8 @@ def cudaGraphExecNodeSetParams(graphExec, node, nodeParams : Optional[cudaGraphN pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams != None else NULL - err = cyruntime.cudaGraphExecNodeSetParams(cygraphExec, cynode, cynodeParams_ptr) + with nogil: + err = cyruntime.cudaGraphExecNodeSetParams(cygraphExec, cynode, cynodeParams_ptr) return (_dict_cudaError_t[err],) {{endif}} @@ -35303,7 +35558,8 @@ def cudaGraphConditionalHandleCreate(graph, unsigned int defaultLaunchValue, uns pgraph = int(cudaGraph_t(graph)) cygraph = pgraph cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle() - err = cyruntime.cudaGraphConditionalHandleCreate(pHandle_out._pvt_ptr, cygraph, defaultLaunchValue, flags) + with nogil: + err = cyruntime.cudaGraphConditionalHandleCreate(pHandle_out._pvt_ptr, cygraph, defaultLaunchValue, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pHandle_out) @@ -35400,7 +35656,8 @@ def cudaGetDriverEntryPoint(char* symbol, unsigned long long flags): """ cdef void_ptr funcPtr = 0 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus - err = cyruntime.cudaGetDriverEntryPoint(symbol, &funcPtr, flags, &driverStatus) + with nogil: + err = cyruntime.cudaGetDriverEntryPoint(symbol, &funcPtr, flags, &driverStatus) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], funcPtr, cudaDriverEntryPointQueryResult(driverStatus)) @@ -35505,7 +35762,8 @@ def cudaGetDriverEntryPointByVersion(char* symbol, unsigned int cudaVersion, uns """ cdef void_ptr funcPtr = 0 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus - err = cyruntime.cudaGetDriverEntryPointByVersion(symbol, &funcPtr, cudaVersion, flags, &driverStatus) + with nogil: + err = cyruntime.cudaGetDriverEntryPointByVersion(symbol, &funcPtr, cudaVersion, flags, &driverStatus) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], funcPtr, cudaDriverEntryPointQueryResult(driverStatus)) @@ -35594,14 +35852,17 @@ def cudaLibraryLoadData(code, jitOptions : Optional[Tuple[cudaJitOption] | List[ cdef vector[cyruntime.cudaJitOption] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)] pylist = [utils.HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)] pylist = [utils.HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) - err = cyruntime.cudaLibraryLoadData(library._pvt_ptr, cycode_ptr, cyjitOptions.data(), voidStarHelperjitOptionsValues.cptr, numJitOptions, cylibraryOptions.data(), voidStarHelperlibraryOptionValues.cptr, numLibraryOptions) + with nogil: + err = cyruntime.cudaLibraryLoadData(library._pvt_ptr, cycode_ptr, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], library) @@ -35688,14 +35949,17 @@ def cudaLibraryLoadFromFile(char* fileName, jitOptions : Optional[Tuple[cudaJitO cdef vector[cyruntime.cudaJitOption] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)] pylist = [utils.HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)] pylist = [utils.HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] cdef utils.InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = utils.InputVoidPtrPtrHelper(pylist) + cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) - err = cyruntime.cudaLibraryLoadFromFile(library._pvt_ptr, fileName, cyjitOptions.data(), voidStarHelperjitOptionsValues.cptr, numJitOptions, cylibraryOptions.data(), voidStarHelperlibraryOptionValues.cptr, numLibraryOptions) + with nogil: + err = cyruntime.cudaLibraryLoadFromFile(library._pvt_ptr, fileName, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], library) @@ -35731,7 +35995,8 @@ def cudaLibraryUnload(library): else: plibrary = int(cudaLibrary_t(library)) cylibrary = plibrary - err = cyruntime.cudaLibraryUnload(cylibrary) + with nogil: + err = cyruntime.cudaLibraryUnload(cylibrary) return (_dict_cudaError_t[err],) {{endif}} @@ -35772,7 +36037,8 @@ def cudaLibraryGetKernel(library, char* name): plibrary = int(cudaLibrary_t(library)) cylibrary = plibrary cdef cudaKernel_t pKernel = cudaKernel_t() - err = cyruntime.cudaLibraryGetKernel(pKernel._pvt_ptr, cylibrary, name) + with nogil: + err = cyruntime.cudaLibraryGetKernel(pKernel._pvt_ptr, cylibrary, name) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pKernel) @@ -35823,7 +36089,8 @@ def cudaLibraryGetGlobal(library, char* name): cylibrary = plibrary cdef void_ptr dptr = 0 cdef size_t numbytes = 0 - err = cyruntime.cudaLibraryGetGlobal(&dptr, &numbytes, cylibrary, name) + with nogil: + err = cyruntime.cudaLibraryGetGlobal(&dptr, &numbytes, cylibrary, name) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], dptr, numbytes) @@ -35876,7 +36143,8 @@ def cudaLibraryGetManaged(library, char* name): cylibrary = plibrary cdef void_ptr dptr = 0 cdef size_t numbytes = 0 - err = cyruntime.cudaLibraryGetManaged(&dptr, &numbytes, cylibrary, name) + with nogil: + err = cyruntime.cudaLibraryGetManaged(&dptr, &numbytes, cylibrary, name) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None, None) return (_dict_cudaError_t[err], dptr, numbytes) @@ -35921,7 +36189,8 @@ def cudaLibraryGetUnifiedFunction(library, char* symbol): plibrary = int(cudaLibrary_t(library)) cylibrary = plibrary cdef void_ptr fptr = 0 - err = cyruntime.cudaLibraryGetUnifiedFunction(&fptr, cylibrary, symbol) + with nogil: + err = cyruntime.cudaLibraryGetUnifiedFunction(&fptr, cylibrary, symbol) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], fptr) @@ -35960,7 +36229,8 @@ def cudaLibraryGetKernelCount(lib): plib = int(cudaLibrary_t(lib)) cylib = plib cdef unsigned int count = 0 - err = cyruntime.cudaLibraryGetKernelCount(&count, cylib) + with nogil: + err = cyruntime.cudaLibraryGetKernelCount(&count, cylib) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], count) @@ -36008,7 +36278,8 @@ def cudaLibraryEnumerateKernels(unsigned int numKernels, lib): cykernels = calloc(numKernels, sizeof(cyruntime.cudaKernel_t)) if cykernels is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cyruntime.cudaKernel_t))) - err = cyruntime.cudaLibraryEnumerateKernels(cykernels, numKernels, cylib) + with nogil: + err = cyruntime.cudaLibraryEnumerateKernels(cykernels, numKernels, cylib) if cudaError_t(err) == cudaError_t(0): pykernels = [cudaKernel_t(init_value=cykernels[idx]) for idx in range(numKernels)] if cykernels is not NULL: @@ -36119,7 +36390,8 @@ def cudaKernelSetAttributeForDevice(kernel, attr not None : cudaFuncAttribute, i pkernel = int(cudaKernel_t(kernel)) cykernel = pkernel cdef cyruntime.cudaFuncAttribute cyattr = attr.value - err = cyruntime.cudaKernelSetAttributeForDevice(cykernel, cyattr, value, device) + with nogil: + err = cyruntime.cudaKernelSetAttributeForDevice(cykernel, cyattr, value, device) return (_dict_cudaError_t[err],) {{endif}} @@ -36130,7 +36402,8 @@ def cudaGetExportTable(pExportTableId : Optional[cudaUUID_t]): """""" cdef void_ptr ppExportTable = 0 cdef cyruntime.cudaUUID_t* cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId != None else NULL - err = cyruntime.cudaGetExportTable(&ppExportTable, cypExportTableId_ptr) + with nogil: + err = cyruntime.cudaGetExportTable(&ppExportTable, cypExportTableId_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], ppExportTable) @@ -36173,7 +36446,8 @@ def cudaGetKernel(entryFuncAddr): cdef cudaKernel_t kernelPtr = cudaKernel_t() cyentryFuncAddr = utils.HelperInputVoidPtr(entryFuncAddr) cdef void* cyentryFuncAddr_ptr = cyentryFuncAddr.cptr - err = cyruntime.cudaGetKernel(kernelPtr._pvt_ptr, cyentryFuncAddr_ptr) + with nogil: + err = cyruntime.cudaGetKernel(kernelPtr._pvt_ptr, cyentryFuncAddr_ptr) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], kernelPtr) @@ -36212,7 +36486,8 @@ def make_cudaPitchedPtr(d, size_t p, size_t xsz, size_t ysz): """ cyd = utils.HelperInputVoidPtr(d) cdef void* cyd_ptr = cyd.cptr - err = cyruntime.make_cudaPitchedPtr(cyd_ptr, p, xsz, ysz) + with nogil: + err = cyruntime.make_cudaPitchedPtr(cyd_ptr, p, xsz, ysz) cdef cudaPitchedPtr wrapper = cudaPitchedPtr() wrapper._pvt_ptr[0] = err return wrapper @@ -36247,7 +36522,8 @@ def make_cudaPos(size_t x, size_t y, size_t z): -------- make_cudaExtent, make_cudaPitchedPtr """ - err = cyruntime.make_cudaPos(x, y, z) + with nogil: + err = cyruntime.make_cudaPos(x, y, z) cdef cudaPos wrapper = cudaPos() wrapper._pvt_ptr[0] = err return wrapper @@ -36283,7 +36559,8 @@ def make_cudaExtent(size_t w, size_t h, size_t d): -------- make_cudaPitchedPtr, make_cudaPos """ - err = cyruntime.make_cudaExtent(w, h, d) + with nogil: + err = cyruntime.make_cudaExtent(w, h, d) cdef cudaExtent wrapper = cudaExtent() wrapper._pvt_ptr[0] = err return wrapper @@ -36358,7 +36635,8 @@ def cudaGraphicsEGLRegisterImage(image, unsigned int flags): pimage = int(EGLImageKHR(image)) cyimage = pimage cdef cudaGraphicsResource_t pCudaResource = cudaGraphicsResource_t() - err = cyruntime.cudaGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags) + with nogil: + err = cyruntime.cudaGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], pCudaResource) @@ -36400,7 +36678,8 @@ def cudaEGLStreamConsumerConnect(eglStream): peglStream = int(EGLStreamKHR(eglStream)) cyeglStream = peglStream cdef cudaEglStreamConnection conn = cudaEglStreamConnection() - err = cyruntime.cudaEGLStreamConsumerConnect(conn._pvt_ptr, cyeglStream) + with nogil: + err = cyruntime.cudaEGLStreamConsumerConnect(conn._pvt_ptr, cyeglStream) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], conn) @@ -36446,7 +36725,8 @@ def cudaEGLStreamConsumerConnectWithFlags(eglStream, unsigned int flags): peglStream = int(EGLStreamKHR(eglStream)) cyeglStream = peglStream cdef cudaEglStreamConnection conn = cudaEglStreamConnection() - err = cyruntime.cudaEGLStreamConsumerConnectWithFlags(conn._pvt_ptr, cyeglStream, flags) + with nogil: + err = cyruntime.cudaEGLStreamConsumerConnectWithFlags(conn._pvt_ptr, cyeglStream, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], conn) @@ -36484,7 +36764,8 @@ def cudaEGLStreamConsumerDisconnect(conn): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cyruntime.cudaEGLStreamConsumerDisconnect(cyconn) + with nogil: + err = cyruntime.cudaEGLStreamConsumerDisconnect(cyconn) return (_dict_cudaError_t[err],) {{endif}} @@ -36549,7 +36830,8 @@ def cudaEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cyruntime.cudaEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout) + with nogil: + err = cyruntime.cudaEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout) return (_dict_cudaError_t[err],) {{endif}} @@ -36608,7 +36890,8 @@ def cudaEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cyruntime.cudaEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream) + with nogil: + err = cyruntime.cudaEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream) return (_dict_cudaError_t[err],) {{endif}} @@ -36668,7 +36951,8 @@ def cudaEGLStreamProducerConnect(eglStream, width, height): peglStream = int(EGLStreamKHR(eglStream)) cyeglStream = peglStream cdef cudaEglStreamConnection conn = cudaEglStreamConnection() - err = cyruntime.cudaEGLStreamProducerConnect(conn._pvt_ptr, cyeglStream, cywidth, cyheight) + with nogil: + err = cyruntime.cudaEGLStreamProducerConnect(conn._pvt_ptr, cyeglStream, cywidth, cyheight) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], conn) @@ -36706,7 +36990,8 @@ def cudaEGLStreamProducerDisconnect(conn): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cyruntime.cudaEGLStreamProducerDisconnect(cyconn) + with nogil: + err = cyruntime.cudaEGLStreamProducerDisconnect(cyconn) return (_dict_cudaError_t[err],) {{endif}} @@ -36765,7 +37050,8 @@ def cudaEGLStreamProducerPresentFrame(conn, eglframe not None : cudaEglFrame, pS cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - err = cyruntime.cudaEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream) + with nogil: + err = cyruntime.cudaEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream) return (_dict_cudaError_t[err],) {{endif}} @@ -36819,7 +37105,8 @@ def cudaEGLStreamProducerReturnFrame(conn, eglframe : Optional[cudaEglFrame], pS else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) cdef cyruntime.cudaEglFrame* cyeglframe_ptr = eglframe._pvt_ptr if eglframe != None else NULL - err = cyruntime.cudaEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream) + with nogil: + err = cyruntime.cudaEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream) return (_dict_cudaError_t[err],) {{endif}} @@ -36870,7 +37157,8 @@ def cudaGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned presource = int(cudaGraphicsResource_t(resource)) cyresource = presource cdef cudaEglFrame eglFrame = cudaEglFrame() - err = cyruntime.cudaGraphicsResourceGetMappedEglFrame(eglFrame._pvt_ptr, cyresource, index, mipLevel) + with nogil: + err = cyruntime.cudaGraphicsResourceGetMappedEglFrame(eglFrame._pvt_ptr, cyresource, index, mipLevel) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], eglFrame) @@ -36925,7 +37213,8 @@ def cudaEventCreateFromEGLSync(eglSync, unsigned int flags): peglSync = int(EGLSyncKHR(eglSync)) cyeglSync = peglSync cdef cudaEvent_t phEvent = cudaEvent_t() - err = cyruntime.cudaEventCreateFromEGLSync(phEvent._pvt_ptr, cyeglSync, flags) + with nogil: + err = cyruntime.cudaEventCreateFromEGLSync(phEvent._pvt_ptr, cyeglSync, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], phEvent) @@ -36954,7 +37243,8 @@ def cudaProfilerStart(): -------- :py:obj:`~.cudaProfilerStop`, :py:obj:`~.cuProfilerStart` """ - err = cyruntime.cudaProfilerStart() + with nogil: + err = cyruntime.cudaProfilerStart() return (_dict_cudaError_t[err],) {{endif}} @@ -36981,7 +37271,8 @@ def cudaProfilerStop(): -------- :py:obj:`~.cudaProfilerStart`, :py:obj:`~.cuProfilerStop` """ - err = cyruntime.cudaProfilerStop() + with nogil: + err = cyruntime.cudaProfilerStop() return (_dict_cudaError_t[err],) {{endif}} @@ -37042,7 +37333,8 @@ def cudaGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : cudaGLD if cypCudaDevices is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(int))) cdef cyruntime.cudaGLDeviceList cydeviceList = deviceList.value - err = cyruntime.cudaGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) + with nogil: + err = cyruntime.cudaGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) if cudaError_t(err) == cudaError_t(0): pypCudaDevices = [cypCudaDevices[idx] for idx in range(cudaDeviceCount)] if cypCudaDevices is not NULL: @@ -37145,7 +37437,8 @@ def cudaGraphicsGLRegisterImage(image, target, unsigned int flags): pimage = int(GLuint(image)) cyimage = pimage cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() - err = cyruntime.cudaGraphicsGLRegisterImage(resource._pvt_ptr, cyimage, cytarget, flags) + with nogil: + err = cyruntime.cudaGraphicsGLRegisterImage(resource._pvt_ptr, cyimage, cytarget, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], resource) @@ -37201,7 +37494,8 @@ def cudaGraphicsGLRegisterBuffer(buffer, unsigned int flags): pbuffer = int(GLuint(buffer)) cybuffer = pbuffer cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() - err = cyruntime.cudaGraphicsGLRegisterBuffer(resource._pvt_ptr, cybuffer, flags) + with nogil: + err = cyruntime.cudaGraphicsGLRegisterBuffer(resource._pvt_ptr, cybuffer, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], resource) @@ -37253,7 +37547,8 @@ def cudaVDPAUGetDevice(vdpDevice, vdpGetProcAddress): pvdpDevice = int(VdpDevice(vdpDevice)) cyvdpDevice = pvdpDevice cdef int device = 0 - err = cyruntime.cudaVDPAUGetDevice(&device, cyvdpDevice, cyvdpGetProcAddress) + with nogil: + err = cyruntime.cudaVDPAUGetDevice(&device, cyvdpDevice, cyvdpGetProcAddress) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], device) @@ -37313,7 +37608,8 @@ def cudaVDPAUSetVDPAUDevice(int device, vdpDevice, vdpGetProcAddress): else: pvdpDevice = int(VdpDevice(vdpDevice)) cyvdpDevice = pvdpDevice - err = cyruntime.cudaVDPAUSetVDPAUDevice(device, cyvdpDevice, cyvdpGetProcAddress) + with nogil: + err = cyruntime.cudaVDPAUSetVDPAUDevice(device, cyvdpDevice, cyvdpGetProcAddress) return (_dict_cudaError_t[err],) {{endif}} @@ -37367,7 +37663,8 @@ def cudaGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags): pvdpSurface = int(VdpVideoSurface(vdpSurface)) cyvdpSurface = pvdpSurface cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() - err = cyruntime.cudaGraphicsVDPAURegisterVideoSurface(resource._pvt_ptr, cyvdpSurface, flags) + with nogil: + err = cyruntime.cudaGraphicsVDPAURegisterVideoSurface(resource._pvt_ptr, cyvdpSurface, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], resource) @@ -37423,7 +37720,8 @@ def cudaGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags): pvdpSurface = int(VdpOutputSurface(vdpSurface)) cyvdpSurface = pvdpSurface cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() - err = cyruntime.cudaGraphicsVDPAURegisterOutputSurface(resource._pvt_ptr, cyvdpSurface, flags) + with nogil: + err = cyruntime.cudaGraphicsVDPAURegisterOutputSurface(resource._pvt_ptr, cyvdpSurface, flags) if err != cyruntime.cudaSuccess: return (_dict_cudaError_t[err], None) return (_dict_cudaError_t[err], resource) diff --git a/cuda_bindings/docs/source/module/driver.rst b/cuda_bindings/docs/source/module/driver.rst index 8adc8272d..f0abf24a7 100644 --- a/cuda_bindings/docs/source/module/driver.rst +++ b/cuda_bindings/docs/source/module/driver.rst @@ -1,4 +1,4 @@ -.. SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. .. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE ------ diff --git a/cuda_bindings/docs/source/module/nvrtc.rst b/cuda_bindings/docs/source/module/nvrtc.rst index 391125896..324db4f05 100644 --- a/cuda_bindings/docs/source/module/nvrtc.rst +++ b/cuda_bindings/docs/source/module/nvrtc.rst @@ -1,4 +1,4 @@ -.. SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. .. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE ----- @@ -243,7 +243,7 @@ Enable device code optimization. When specified along with ``-G``\ , enables lim - ``--Ofast-compile={0|min|mid|max}``\ (``-Ofc``\ ) -Specify level to prefer device code compilation speed, where 'max' focuses only on the fastest compilation speed, 'mid' balances compile time and runtime, 'min' has a more minimal impact on both, and 0 (default) is normal compilation +Specify the fast-compile level for device code, which controls the tradeoff between compilation speed and runtime performance by disabling certain optimizations at varying levels. diff --git a/cuda_bindings/docs/source/module/runtime.rst b/cuda_bindings/docs/source/module/runtime.rst index 87c276051..5795c5249 100644 --- a/cuda_bindings/docs/source/module/runtime.rst +++ b/cuda_bindings/docs/source/module/runtime.rst @@ -1,4 +1,4 @@ -.. SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. .. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE ------- diff --git a/cuda_bindings/docs/source/release/12.X.Y-notes.rst b/cuda_bindings/docs/source/release/12.X.Y-notes.rst index 4ac1f4da6..f6989f686 100644 --- a/cuda_bindings/docs/source/release/12.X.Y-notes.rst +++ b/cuda_bindings/docs/source/release/12.X.Y-notes.rst @@ -16,6 +16,9 @@ Highlights * Currently using this module requires NumPy to be present. Any recent NumPy 1.x or 2.x should work. +* Python bindings in every module, including ``driver``, ``runtime``, and ``nvrtc``, now have the GIL + released before calling the underlying C APIs. + Bug fixes ---------