diff --git a/cuda_bindings/cuda/bindings/_internal/cufile.pxd b/cuda_bindings/cuda/bindings/_internal/cufile.pxd index ac9c3b702..cdbb776fd 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile.pxd +++ b/cuda_bindings/cuda/bindings/_internal/cufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.2. Do not modify it directly. +# This code was automatically generated across versions from 12.9.1 to 13.0.2. Do not modify it directly. from ..cycufile cimport * @@ -18,6 +18,7 @@ cdef CUfileError_t _cuFileBufDeregister(const void* bufPtr_base) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil cdef long _cuFileUseCount() except* nogil cdef CUfileError_t _cuFileDriverGetProperties(CUfileDrvProps_t* props) except?CUFILE_LOADING_ERROR nogil @@ -41,7 +42,6 @@ cdef CUfileError_t _cuFileGetParameterString(CUFileStringConfigParameter_t param cdef CUfileError_t _cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx index 797aa6934..33b638464 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.2. Do not modify it directly. +# This code was automatically generated across versions from 12.9.1 to 13.0.2. Do not modify it directly. from libc.stdint cimport intptr_t, uintptr_t import threading @@ -68,6 +68,7 @@ cdef void* __cuFileBufDeregister = NULL cdef void* __cuFileRead = NULL cdef void* __cuFileWrite = NULL cdef void* __cuFileDriverOpen = NULL +cdef void* __cuFileDriverClose = NULL cdef void* __cuFileDriverClose_v2 = NULL cdef void* __cuFileUseCount = NULL cdef void* __cuFileDriverGetProperties = NULL @@ -91,7 +92,6 @@ cdef void* __cuFileGetParameterString = NULL cdef void* __cuFileSetParameterSizeT = NULL cdef void* __cuFileSetParameterBool = NULL cdef void* __cuFileSetParameterString = NULL -cdef void* __cuFileDriverClose = NULL cdef void* __cuFileGetParameterMinMaxValue = NULL cdef void* __cuFileSetStatsLevel = NULL cdef void* __cuFileGetStatsLevel = NULL @@ -167,6 +167,13 @@ cdef int __check_or_init_cufile() except -1 nogil: handle = load_library() __cuFileDriverOpen = dlsym(handle, 'cuFileDriverOpen') + global __cuFileDriverClose + __cuFileDriverClose = dlsym(RTLD_DEFAULT, 'cuFileDriverClose') + if __cuFileDriverClose == NULL: + if handle == NULL: + handle = load_library() + __cuFileDriverClose = dlsym(handle, 'cuFileDriverClose') + global __cuFileDriverClose_v2 __cuFileDriverClose_v2 = dlsym(RTLD_DEFAULT, 'cuFileDriverClose_v2') if __cuFileDriverClose_v2 == NULL: @@ -328,13 +335,6 @@ cdef int __check_or_init_cufile() except -1 nogil: handle = load_library() __cuFileSetParameterString = dlsym(handle, 'cuFileSetParameterString') - global __cuFileDriverClose - __cuFileDriverClose = dlsym(RTLD_DEFAULT, 'cuFileDriverClose') - if __cuFileDriverClose == NULL: - if handle == NULL: - handle = load_library() - __cuFileDriverClose = dlsym(handle, 'cuFileDriverClose') - global __cuFileGetParameterMinMaxValue __cuFileGetParameterMinMaxValue = dlsym(RTLD_DEFAULT, 'cuFileGetParameterMinMaxValue') if __cuFileGetParameterMinMaxValue == NULL: @@ -462,6 +462,9 @@ cpdef dict _inspect_function_pointers(): global __cuFileDriverOpen data["__cuFileDriverOpen"] = __cuFileDriverOpen + global __cuFileDriverClose + data["__cuFileDriverClose"] = __cuFileDriverClose + global __cuFileDriverClose_v2 data["__cuFileDriverClose_v2"] = __cuFileDriverClose_v2 @@ -531,9 +534,6 @@ cpdef dict _inspect_function_pointers(): global __cuFileSetParameterString data["__cuFileSetParameterString"] = __cuFileSetParameterString - global __cuFileDriverClose - data["__cuFileDriverClose"] = __cuFileDriverClose - global __cuFileGetParameterMinMaxValue data["__cuFileGetParameterMinMaxValue"] = __cuFileGetParameterMinMaxValue @@ -656,6 +656,16 @@ cdef CUfileError_t _cuFileDriverOpen() except?CUFILE_LOADING_ERRO ) +cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: + global __cuFileDriverClose + _check_or_init_cufile() + if __cuFileDriverClose == NULL: + with gil: + raise FunctionNotFoundError("function cuFileDriverClose is not found") + return (__cuFileDriverClose)( + ) + + cdef CUfileError_t _cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil: global __cuFileDriverClose_v2 _check_or_init_cufile() @@ -887,16 +897,6 @@ cdef CUfileError_t _cuFileSetParameterString(CUFileStringConfigParameter_t param param, desc_str) -cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: - global __cuFileDriverClose - _check_or_init_cufile() - if __cuFileDriverClose == NULL: - with gil: - raise FunctionNotFoundError("function cuFileDriverClose is not found") - return (__cuFileDriverClose)( - ) - - cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: global __cuFileGetParameterMinMaxValue _check_or_init_cufile() diff --git a/cuda_bindings/cuda/bindings/cufile.pxd b/cuda_bindings/cuda/bindings/cufile.pxd index a5503f1d9..13fbb68cb 100644 --- a/cuda_bindings/cuda/bindings/cufile.pxd +++ b/cuda_bindings/cuda/bindings/cufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.2. Do not modify it directly. +# This code was automatically generated across versions from 12.9.1 to 13.0.2. Do not modify it directly. from libc.stdint cimport intptr_t @@ -18,12 +18,7 @@ ctypedef CUfileBatchHandle_t BatchHandle ctypedef CUfileError_t Error ctypedef cufileRDMAInfo_t RDMAInfo ctypedef CUfileFSOps_t FSOps -ctypedef CUfileOpCounter_t OpCounter -ctypedef CUfilePerGpuStats_t PerGpuStats ctypedef CUfileDrvProps_t DrvProps -ctypedef CUfileStatsLevel1_t StatsLevel1 -ctypedef CUfileStatsLevel2_t StatsLevel2 -ctypedef CUfileStatsLevel3_t StatsLevel3 ############################################################################### @@ -77,3 +72,15 @@ cpdef str get_parameter_string(int param, int len) cpdef set_parameter_size_t(int param, size_t value) cpdef set_parameter_bool(int param, bint value) cpdef set_parameter_string(int param, intptr_t desc_str) +cpdef tuple get_parameter_min_max_value(int param) +cpdef set_stats_level(int level) +cpdef int get_stats_level() except? 0 +cpdef stats_start() +cpdef stats_stop() +cpdef stats_reset() +cpdef get_stats_l1(intptr_t stats) +cpdef get_stats_l2(intptr_t stats) +cpdef get_stats_l3(intptr_t stats) +cpdef size_t get_bar_size_in_kb(int gpu_ind_ex) except? 0 +cpdef set_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len) +cpdef get_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 9fc6f9644..8f4ab7b11 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.2. Do not modify it directly. +# This code was automatically generated across versions from 12.9.1 to 13.0.2. Do not modify it directly. cimport cython # NOQA from libc cimport errno @@ -393,6 +393,488 @@ cdef class IOEvents: return obj +op_counter_dtype = _numpy.dtype([ + ("ok", _numpy.uint64, ), + ("err", _numpy.uint64, ), + ], align=True) + + +cdef class OpCounter: + """Empty-initialize an instance of `CUfileOpCounter_t`. + + + .. seealso:: `CUfileOpCounter_t` + """ + cdef: + readonly object _data + + def __init__(self): + arr = _numpy.empty(1, dtype=op_counter_dtype) + self._data = arr.view(_numpy.recarray) + assert self._data.itemsize == sizeof(CUfileOpCounter_t), \ + f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileOpCounter_t)}" + + def __repr__(self): + return f"<{__name__}.OpCounter object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return self._data.ctypes.data + + def __int__(self): + return self._data.ctypes.data + + def __eq__(self, other): + if not isinstance(other, OpCounter): + return False + if self._data.size != other._data.size: + return False + if self._data.dtype != other._data.dtype: + return False + return bool((self._data == other._data).all()) + + @property + def ok(self): + """int: """ + return int(self._data.ok[0]) + + @ok.setter + def ok(self, val): + self._data.ok = val + + @property + def err(self): + """int: """ + return int(self._data.err[0]) + + @err.setter + def err(self, val): + self._data.err = val + + def __setitem__(self, key, val): + self._data[key] = val + + @staticmethod + def from_data(data): + """Create an OpCounter instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a 1D array of dtype `op_counter_dtype` holding the data. + """ + cdef OpCounter obj = OpCounter.__new__(OpCounter) + if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + raise TypeError("data argument must be a NumPy ndarray") + if data.ndim != 1: + raise ValueError("data array must be 1D") + if data.dtype != op_counter_dtype: + raise ValueError("data array must be of dtype op_counter_dtype") + obj._data = data.view(_numpy.recarray) + + return obj + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False): + """Create an OpCounter instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef OpCounter obj = OpCounter.__new__(OpCounter) + cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE + cdef object buf = PyMemoryView_FromMemory( + ptr, sizeof(CUfileOpCounter_t), flag) + data = _numpy.ndarray((1,), buffer=buf, + dtype=op_counter_dtype) + obj._data = data.view(_numpy.recarray) + + return obj + + +per_gpu_stats_dtype = _numpy.dtype([ + ("uuid", _numpy.int8, (16,)), + ("read_bytes", _numpy.uint64, ), + ("read_bw_bytes_per_sec", _numpy.uint64, ), + ("read_utilization", _numpy.uint64, ), + ("read_duration_us", _numpy.uint64, ), + ("n_total_reads", _numpy.uint64, ), + ("n_p2p_reads", _numpy.uint64, ), + ("n_nvfs_reads", _numpy.uint64, ), + ("n_posix_reads", _numpy.uint64, ), + ("n_unaligned_reads", _numpy.uint64, ), + ("n_dr_reads", _numpy.uint64, ), + ("n_sparse_regions", _numpy.uint64, ), + ("n_inline_regions", _numpy.uint64, ), + ("n_reads_err", _numpy.uint64, ), + ("writes_bytes", _numpy.uint64, ), + ("write_bw_bytes_per_sec", _numpy.uint64, ), + ("write_utilization", _numpy.uint64, ), + ("write_duration_us", _numpy.uint64, ), + ("n_total_writes", _numpy.uint64, ), + ("n_p2p_writes", _numpy.uint64, ), + ("n_nvfs_writes", _numpy.uint64, ), + ("n_posix_writes", _numpy.uint64, ), + ("n_unaligned_writes", _numpy.uint64, ), + ("n_dr_writes", _numpy.uint64, ), + ("n_writes_err", _numpy.uint64, ), + ("n_mmap", _numpy.uint64, ), + ("n_mmap_ok", _numpy.uint64, ), + ("n_mmap_err", _numpy.uint64, ), + ("n_mmap_free", _numpy.uint64, ), + ("reg_bytes", _numpy.uint64, ), + ], align=True) + + +cdef class PerGpuStats: + """Empty-initialize an instance of `CUfilePerGpuStats_t`. + + + .. seealso:: `CUfilePerGpuStats_t` + """ + cdef: + readonly object _data + + def __init__(self): + arr = _numpy.empty(1, dtype=per_gpu_stats_dtype) + self._data = arr.view(_numpy.recarray) + assert self._data.itemsize == sizeof(CUfilePerGpuStats_t), \ + f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfilePerGpuStats_t)}" + + def __repr__(self): + return f"<{__name__}.PerGpuStats object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return self._data.ctypes.data + + def __int__(self): + return self._data.ctypes.data + + def __eq__(self, other): + if not isinstance(other, PerGpuStats): + return False + if self._data.size != other._data.size: + return False + if self._data.dtype != other._data.dtype: + return False + return bool((self._data == other._data).all()) + + @property + def uuid(self): + """~_numpy.int8: (array of length 16).""" + return self._data.uuid + + @uuid.setter + def uuid(self, val): + self._data.uuid = val + + @property + def read_bytes(self): + """int: """ + return int(self._data.read_bytes[0]) + + @read_bytes.setter + def read_bytes(self, val): + self._data.read_bytes = val + + @property + def read_bw_bytes_per_sec(self): + """int: """ + return int(self._data.read_bw_bytes_per_sec[0]) + + @read_bw_bytes_per_sec.setter + def read_bw_bytes_per_sec(self, val): + self._data.read_bw_bytes_per_sec = val + + @property + def read_utilization(self): + """int: """ + return int(self._data.read_utilization[0]) + + @read_utilization.setter + def read_utilization(self, val): + self._data.read_utilization = val + + @property + def read_duration_us(self): + """int: """ + return int(self._data.read_duration_us[0]) + + @read_duration_us.setter + def read_duration_us(self, val): + self._data.read_duration_us = val + + @property + def n_total_reads(self): + """int: """ + return int(self._data.n_total_reads[0]) + + @n_total_reads.setter + def n_total_reads(self, val): + self._data.n_total_reads = val + + @property + def n_p2p_reads(self): + """int: """ + return int(self._data.n_p2p_reads[0]) + + @n_p2p_reads.setter + def n_p2p_reads(self, val): + self._data.n_p2p_reads = val + + @property + def n_nvfs_reads(self): + """int: """ + return int(self._data.n_nvfs_reads[0]) + + @n_nvfs_reads.setter + def n_nvfs_reads(self, val): + self._data.n_nvfs_reads = val + + @property + def n_posix_reads(self): + """int: """ + return int(self._data.n_posix_reads[0]) + + @n_posix_reads.setter + def n_posix_reads(self, val): + self._data.n_posix_reads = val + + @property + def n_unaligned_reads(self): + """int: """ + return int(self._data.n_unaligned_reads[0]) + + @n_unaligned_reads.setter + def n_unaligned_reads(self, val): + self._data.n_unaligned_reads = val + + @property + def n_dr_reads(self): + """int: """ + return int(self._data.n_dr_reads[0]) + + @n_dr_reads.setter + def n_dr_reads(self, val): + self._data.n_dr_reads = val + + @property + def n_sparse_regions(self): + """int: """ + return int(self._data.n_sparse_regions[0]) + + @n_sparse_regions.setter + def n_sparse_regions(self, val): + self._data.n_sparse_regions = val + + @property + def n_inline_regions(self): + """int: """ + return int(self._data.n_inline_regions[0]) + + @n_inline_regions.setter + def n_inline_regions(self, val): + self._data.n_inline_regions = val + + @property + def n_reads_err(self): + """int: """ + return int(self._data.n_reads_err[0]) + + @n_reads_err.setter + def n_reads_err(self, val): + self._data.n_reads_err = val + + @property + def writes_bytes(self): + """int: """ + return int(self._data.writes_bytes[0]) + + @writes_bytes.setter + def writes_bytes(self, val): + self._data.writes_bytes = val + + @property + def write_bw_bytes_per_sec(self): + """int: """ + return int(self._data.write_bw_bytes_per_sec[0]) + + @write_bw_bytes_per_sec.setter + def write_bw_bytes_per_sec(self, val): + self._data.write_bw_bytes_per_sec = val + + @property + def write_utilization(self): + """int: """ + return int(self._data.write_utilization[0]) + + @write_utilization.setter + def write_utilization(self, val): + self._data.write_utilization = val + + @property + def write_duration_us(self): + """int: """ + return int(self._data.write_duration_us[0]) + + @write_duration_us.setter + def write_duration_us(self, val): + self._data.write_duration_us = val + + @property + def n_total_writes(self): + """int: """ + return int(self._data.n_total_writes[0]) + + @n_total_writes.setter + def n_total_writes(self, val): + self._data.n_total_writes = val + + @property + def n_p2p_writes(self): + """int: """ + return int(self._data.n_p2p_writes[0]) + + @n_p2p_writes.setter + def n_p2p_writes(self, val): + self._data.n_p2p_writes = val + + @property + def n_nvfs_writes(self): + """int: """ + return int(self._data.n_nvfs_writes[0]) + + @n_nvfs_writes.setter + def n_nvfs_writes(self, val): + self._data.n_nvfs_writes = val + + @property + def n_posix_writes(self): + """int: """ + return int(self._data.n_posix_writes[0]) + + @n_posix_writes.setter + def n_posix_writes(self, val): + self._data.n_posix_writes = val + + @property + def n_unaligned_writes(self): + """int: """ + return int(self._data.n_unaligned_writes[0]) + + @n_unaligned_writes.setter + def n_unaligned_writes(self, val): + self._data.n_unaligned_writes = val + + @property + def n_dr_writes(self): + """int: """ + return int(self._data.n_dr_writes[0]) + + @n_dr_writes.setter + def n_dr_writes(self, val): + self._data.n_dr_writes = val + + @property + def n_writes_err(self): + """int: """ + return int(self._data.n_writes_err[0]) + + @n_writes_err.setter + def n_writes_err(self, val): + self._data.n_writes_err = val + + @property + def n_mmap(self): + """int: """ + return int(self._data.n_mmap[0]) + + @n_mmap.setter + def n_mmap(self, val): + self._data.n_mmap = val + + @property + def n_mmap_ok(self): + """int: """ + return int(self._data.n_mmap_ok[0]) + + @n_mmap_ok.setter + def n_mmap_ok(self, val): + self._data.n_mmap_ok = val + + @property + def n_mmap_err(self): + """int: """ + return int(self._data.n_mmap_err[0]) + + @n_mmap_err.setter + def n_mmap_err(self, val): + self._data.n_mmap_err = val + + @property + def n_mmap_free(self): + """int: """ + return int(self._data.n_mmap_free[0]) + + @n_mmap_free.setter + def n_mmap_free(self, val): + self._data.n_mmap_free = val + + @property + def reg_bytes(self): + """int: """ + return int(self._data.reg_bytes[0]) + + @reg_bytes.setter + def reg_bytes(self, val): + self._data.reg_bytes = val + + def __setitem__(self, key, val): + self._data[key] = val + + @staticmethod + def from_data(data): + """Create an PerGpuStats instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a 1D array of dtype `per_gpu_stats_dtype` holding the data. + """ + cdef PerGpuStats obj = PerGpuStats.__new__(PerGpuStats) + if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + raise TypeError("data argument must be a NumPy ndarray") + if data.ndim != 1: + raise ValueError("data array must be 1D") + if data.dtype != per_gpu_stats_dtype: + raise ValueError("data array must be of dtype per_gpu_stats_dtype") + obj._data = data.view(_numpy.recarray) + + return obj + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False): + """Create an PerGpuStats instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef PerGpuStats obj = PerGpuStats.__new__(PerGpuStats) + cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE + cdef object buf = PyMemoryView_FromMemory( + ptr, sizeof(CUfilePerGpuStats_t), flag) + data = _numpy.ndarray((1,), buffer=buf, + dtype=per_gpu_stats_dtype) + obj._data = data.view(_numpy.recarray) + + return obj + + descr_dtype = _numpy.dtype([ ("type", _numpy.int32, ), ("handle", _py_anon_pod1_dtype, ), @@ -583,36 +1065,549 @@ cdef class _py_anon_pod2: return bool((self._data == other._data).all()) @property - def batch(self): - """_py_anon_pod3: """ - return self._batch + def batch(self): + """_py_anon_pod3: """ + return self._batch + + def __setitem__(self, key, val): + self._data[key] = val + + @staticmethod + def from_data(data): + """Create an _py_anon_pod2 instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a 1D array of dtype `_py_anon_pod2_dtype` holding the data. + """ + cdef _py_anon_pod2 obj = _py_anon_pod2.__new__(_py_anon_pod2) + if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + raise TypeError("data argument must be a NumPy ndarray") + if data.ndim != 1: + raise ValueError("data array must be 1D") + if data.dtype != _py_anon_pod2_dtype: + raise ValueError("data array must be of dtype _py_anon_pod2_dtype") + obj._data = data.view(_numpy.recarray) + + batch_addr = obj._data.batch[0].__array_interface__['data'][0] + obj._batch = _py_anon_pod3.from_ptr(batch_addr) + return obj + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False): + """Create an _py_anon_pod2 instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef _py_anon_pod2 obj = _py_anon_pod2.__new__(_py_anon_pod2) + cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE + cdef object buf = PyMemoryView_FromMemory( + ptr, sizeof((NULL).u), flag) + data = _numpy.ndarray((1,), buffer=buf, + dtype=_py_anon_pod2_dtype) + obj._data = data.view(_numpy.recarray) + + batch_addr = obj._data.batch[0].__array_interface__['data'][0] + obj._batch = _py_anon_pod3.from_ptr(batch_addr) + return obj + + +stats_level1_dtype = _numpy.dtype([ + ("read_ops", op_counter_dtype, ), + ("write_ops", op_counter_dtype, ), + ("hdl_register_ops", op_counter_dtype, ), + ("hdl_deregister_ops", op_counter_dtype, ), + ("buf_register_ops", op_counter_dtype, ), + ("buf_deregister_ops", op_counter_dtype, ), + ("read_bytes", _numpy.uint64, ), + ("write_bytes", _numpy.uint64, ), + ("read_bw_bytes_per_sec", _numpy.uint64, ), + ("write_bw_bytes_per_sec", _numpy.uint64, ), + ("read_lat_avg_us", _numpy.uint64, ), + ("write_lat_avg_us", _numpy.uint64, ), + ("read_ops_per_sec", _numpy.uint64, ), + ("write_ops_per_sec", _numpy.uint64, ), + ("read_lat_sum_us", _numpy.uint64, ), + ("write_lat_sum_us", _numpy.uint64, ), + ("batch_submit_ops", op_counter_dtype, ), + ("batch_complete_ops", op_counter_dtype, ), + ("batch_setup_ops", op_counter_dtype, ), + ("batch_cancel_ops", op_counter_dtype, ), + ("batch_destroy_ops", op_counter_dtype, ), + ("batch_enqueued_ops", op_counter_dtype, ), + ("batch_posix_enqueued_ops", op_counter_dtype, ), + ("batch_processed_ops", op_counter_dtype, ), + ("batch_posix_processed_ops", op_counter_dtype, ), + ("batch_nvfs_submit_ops", op_counter_dtype, ), + ("batch_p2p_submit_ops", op_counter_dtype, ), + ("batch_aio_submit_ops", op_counter_dtype, ), + ("batch_iouring_submit_ops", op_counter_dtype, ), + ("batch_mixed_io_submit_ops", op_counter_dtype, ), + ("batch_total_submit_ops", op_counter_dtype, ), + ("batch_read_bytes", _numpy.uint64, ), + ("batch_write_bytes", _numpy.uint64, ), + ("batch_read_bw_bytes", _numpy.uint64, ), + ("batch_write_bw_bytes", _numpy.uint64, ), + ("batch_submit_lat_avg_us", _numpy.uint64, ), + ("batch_completion_lat_avg_us", _numpy.uint64, ), + ("batch_submit_ops_per_sec", _numpy.uint64, ), + ("batch_complete_ops_per_sec", _numpy.uint64, ), + ("batch_submit_lat_sum_us", _numpy.uint64, ), + ("batch_completion_lat_sum_us", _numpy.uint64, ), + ("last_batch_read_bytes", _numpy.uint64, ), + ("last_batch_write_bytes", _numpy.uint64, ), + ], align=True) + + +cdef class StatsLevel1: + """Empty-initialize an instance of `CUfileStatsLevel1_t`. + + + .. seealso:: `CUfileStatsLevel1_t` + """ + cdef: + readonly object _data + + def __init__(self): + arr = _numpy.empty(1, dtype=stats_level1_dtype) + self._data = arr.view(_numpy.recarray) + assert self._data.itemsize == sizeof(CUfileStatsLevel1_t), \ + f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileStatsLevel1_t)}" + + def __repr__(self): + return f"<{__name__}.StatsLevel1 object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return self._data.ctypes.data + + def __int__(self): + return self._data.ctypes.data + + def __eq__(self, other): + if not isinstance(other, StatsLevel1): + return False + if self._data.size != other._data.size: + return False + if self._data.dtype != other._data.dtype: + return False + return bool((self._data == other._data).all()) + + @property + def read_ops(self): + """: """ + return self._data.read_ops + + @read_ops.setter + def read_ops(self, val): + self._data.read_ops = val + + @property + def write_ops(self): + """: """ + return self._data.write_ops + + @write_ops.setter + def write_ops(self, val): + self._data.write_ops = val + + @property + def hdl_register_ops(self): + """: """ + return self._data.hdl_register_ops + + @hdl_register_ops.setter + def hdl_register_ops(self, val): + self._data.hdl_register_ops = val + + @property + def hdl_deregister_ops(self): + """: """ + return self._data.hdl_deregister_ops + + @hdl_deregister_ops.setter + def hdl_deregister_ops(self, val): + self._data.hdl_deregister_ops = val + + @property + def buf_register_ops(self): + """: """ + return self._data.buf_register_ops + + @buf_register_ops.setter + def buf_register_ops(self, val): + self._data.buf_register_ops = val + + @property + def buf_deregister_ops(self): + """: """ + return self._data.buf_deregister_ops + + @buf_deregister_ops.setter + def buf_deregister_ops(self, val): + self._data.buf_deregister_ops = val + + @property + def read_bytes(self): + """int: """ + return int(self._data.read_bytes[0]) + + @read_bytes.setter + def read_bytes(self, val): + self._data.read_bytes = val + + @property + def write_bytes(self): + """int: """ + return int(self._data.write_bytes[0]) + + @write_bytes.setter + def write_bytes(self, val): + self._data.write_bytes = val + + @property + def read_bw_bytes_per_sec(self): + """int: """ + return int(self._data.read_bw_bytes_per_sec[0]) + + @read_bw_bytes_per_sec.setter + def read_bw_bytes_per_sec(self, val): + self._data.read_bw_bytes_per_sec = val + + @property + def write_bw_bytes_per_sec(self): + """int: """ + return int(self._data.write_bw_bytes_per_sec[0]) + + @write_bw_bytes_per_sec.setter + def write_bw_bytes_per_sec(self, val): + self._data.write_bw_bytes_per_sec = val + + @property + def read_lat_avg_us(self): + """int: """ + return int(self._data.read_lat_avg_us[0]) + + @read_lat_avg_us.setter + def read_lat_avg_us(self, val): + self._data.read_lat_avg_us = val + + @property + def write_lat_avg_us(self): + """int: """ + return int(self._data.write_lat_avg_us[0]) + + @write_lat_avg_us.setter + def write_lat_avg_us(self, val): + self._data.write_lat_avg_us = val + + @property + def read_ops_per_sec(self): + """int: """ + return int(self._data.read_ops_per_sec[0]) + + @read_ops_per_sec.setter + def read_ops_per_sec(self, val): + self._data.read_ops_per_sec = val + + @property + def write_ops_per_sec(self): + """int: """ + return int(self._data.write_ops_per_sec[0]) + + @write_ops_per_sec.setter + def write_ops_per_sec(self, val): + self._data.write_ops_per_sec = val + + @property + def read_lat_sum_us(self): + """int: """ + return int(self._data.read_lat_sum_us[0]) + + @read_lat_sum_us.setter + def read_lat_sum_us(self, val): + self._data.read_lat_sum_us = val + + @property + def write_lat_sum_us(self): + """int: """ + return int(self._data.write_lat_sum_us[0]) + + @write_lat_sum_us.setter + def write_lat_sum_us(self, val): + self._data.write_lat_sum_us = val + + @property + def batch_submit_ops(self): + """: """ + return self._data.batch_submit_ops + + @batch_submit_ops.setter + def batch_submit_ops(self, val): + self._data.batch_submit_ops = val + + @property + def batch_complete_ops(self): + """: """ + return self._data.batch_complete_ops + + @batch_complete_ops.setter + def batch_complete_ops(self, val): + self._data.batch_complete_ops = val + + @property + def batch_setup_ops(self): + """: """ + return self._data.batch_setup_ops + + @batch_setup_ops.setter + def batch_setup_ops(self, val): + self._data.batch_setup_ops = val + + @property + def batch_cancel_ops(self): + """: """ + return self._data.batch_cancel_ops + + @batch_cancel_ops.setter + def batch_cancel_ops(self, val): + self._data.batch_cancel_ops = val + + @property + def batch_destroy_ops(self): + """: """ + return self._data.batch_destroy_ops + + @batch_destroy_ops.setter + def batch_destroy_ops(self, val): + self._data.batch_destroy_ops = val + + @property + def batch_enqueued_ops(self): + """: """ + return self._data.batch_enqueued_ops + + @batch_enqueued_ops.setter + def batch_enqueued_ops(self, val): + self._data.batch_enqueued_ops = val + + @property + def batch_posix_enqueued_ops(self): + """: """ + return self._data.batch_posix_enqueued_ops + + @batch_posix_enqueued_ops.setter + def batch_posix_enqueued_ops(self, val): + self._data.batch_posix_enqueued_ops = val + + @property + def batch_processed_ops(self): + """: """ + return self._data.batch_processed_ops + + @batch_processed_ops.setter + def batch_processed_ops(self, val): + self._data.batch_processed_ops = val + + @property + def batch_posix_processed_ops(self): + """: """ + return self._data.batch_posix_processed_ops + + @batch_posix_processed_ops.setter + def batch_posix_processed_ops(self, val): + self._data.batch_posix_processed_ops = val + + @property + def batch_nvfs_submit_ops(self): + """: """ + return self._data.batch_nvfs_submit_ops + + @batch_nvfs_submit_ops.setter + def batch_nvfs_submit_ops(self, val): + self._data.batch_nvfs_submit_ops = val + + @property + def batch_p2p_submit_ops(self): + """: """ + return self._data.batch_p2p_submit_ops + + @batch_p2p_submit_ops.setter + def batch_p2p_submit_ops(self, val): + self._data.batch_p2p_submit_ops = val + + @property + def batch_aio_submit_ops(self): + """: """ + return self._data.batch_aio_submit_ops + + @batch_aio_submit_ops.setter + def batch_aio_submit_ops(self, val): + self._data.batch_aio_submit_ops = val + + @property + def batch_iouring_submit_ops(self): + """: """ + return self._data.batch_iouring_submit_ops + + @batch_iouring_submit_ops.setter + def batch_iouring_submit_ops(self, val): + self._data.batch_iouring_submit_ops = val + + @property + def batch_mixed_io_submit_ops(self): + """: """ + return self._data.batch_mixed_io_submit_ops + + @batch_mixed_io_submit_ops.setter + def batch_mixed_io_submit_ops(self, val): + self._data.batch_mixed_io_submit_ops = val + + @property + def batch_total_submit_ops(self): + """: """ + return self._data.batch_total_submit_ops + + @batch_total_submit_ops.setter + def batch_total_submit_ops(self, val): + self._data.batch_total_submit_ops = val + + @property + def batch_read_bytes(self): + """int: """ + return int(self._data.batch_read_bytes[0]) + + @batch_read_bytes.setter + def batch_read_bytes(self, val): + self._data.batch_read_bytes = val + + @property + def batch_write_bytes(self): + """int: """ + return int(self._data.batch_write_bytes[0]) + + @batch_write_bytes.setter + def batch_write_bytes(self, val): + self._data.batch_write_bytes = val + + @property + def batch_read_bw_bytes(self): + """int: """ + return int(self._data.batch_read_bw_bytes[0]) + + @batch_read_bw_bytes.setter + def batch_read_bw_bytes(self, val): + self._data.batch_read_bw_bytes = val + + @property + def batch_write_bw_bytes(self): + """int: """ + return int(self._data.batch_write_bw_bytes[0]) + + @batch_write_bw_bytes.setter + def batch_write_bw_bytes(self, val): + self._data.batch_write_bw_bytes = val + + @property + def batch_submit_lat_avg_us(self): + """int: """ + return int(self._data.batch_submit_lat_avg_us[0]) + + @batch_submit_lat_avg_us.setter + def batch_submit_lat_avg_us(self, val): + self._data.batch_submit_lat_avg_us = val + + @property + def batch_completion_lat_avg_us(self): + """int: """ + return int(self._data.batch_completion_lat_avg_us[0]) + + @batch_completion_lat_avg_us.setter + def batch_completion_lat_avg_us(self, val): + self._data.batch_completion_lat_avg_us = val + + @property + def batch_submit_ops_per_sec(self): + """int: """ + return int(self._data.batch_submit_ops_per_sec[0]) + + @batch_submit_ops_per_sec.setter + def batch_submit_ops_per_sec(self, val): + self._data.batch_submit_ops_per_sec = val + + @property + def batch_complete_ops_per_sec(self): + """int: """ + return int(self._data.batch_complete_ops_per_sec[0]) + + @batch_complete_ops_per_sec.setter + def batch_complete_ops_per_sec(self, val): + self._data.batch_complete_ops_per_sec = val + + @property + def batch_submit_lat_sum_us(self): + """int: """ + return int(self._data.batch_submit_lat_sum_us[0]) + + @batch_submit_lat_sum_us.setter + def batch_submit_lat_sum_us(self, val): + self._data.batch_submit_lat_sum_us = val + + @property + def batch_completion_lat_sum_us(self): + """int: """ + return int(self._data.batch_completion_lat_sum_us[0]) + + @batch_completion_lat_sum_us.setter + def batch_completion_lat_sum_us(self, val): + self._data.batch_completion_lat_sum_us = val + + @property + def last_batch_read_bytes(self): + """int: """ + return int(self._data.last_batch_read_bytes[0]) + + @last_batch_read_bytes.setter + def last_batch_read_bytes(self, val): + self._data.last_batch_read_bytes = val + + @property + def last_batch_write_bytes(self): + """int: """ + return int(self._data.last_batch_write_bytes[0]) + + @last_batch_write_bytes.setter + def last_batch_write_bytes(self, val): + self._data.last_batch_write_bytes = val def __setitem__(self, key, val): self._data[key] = val @staticmethod def from_data(data): - """Create an _py_anon_pod2 instance wrapping the given NumPy array. + """Create an StatsLevel1 instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `_py_anon_pod2_dtype` holding the data. + data (_numpy.ndarray): a 1D array of dtype `stats_level1_dtype` holding the data. """ - cdef _py_anon_pod2 obj = _py_anon_pod2.__new__(_py_anon_pod2) + cdef StatsLevel1 obj = StatsLevel1.__new__(StatsLevel1) if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): raise TypeError("data argument must be a NumPy ndarray") if data.ndim != 1: raise ValueError("data array must be 1D") - if data.dtype != _py_anon_pod2_dtype: - raise ValueError("data array must be of dtype _py_anon_pod2_dtype") + if data.dtype != stats_level1_dtype: + raise ValueError("data array must be of dtype stats_level1_dtype") obj._data = data.view(_numpy.recarray) - batch_addr = obj._data.batch[0].__array_interface__['data'][0] - obj._batch = _py_anon_pod3.from_ptr(batch_addr) return obj @staticmethod def from_ptr(intptr_t ptr, bint readonly=False): - """Create an _py_anon_pod2 instance wrapping the given pointer. + """Create an StatsLevel1 instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. @@ -620,16 +1615,14 @@ cdef class _py_anon_pod2: """ if ptr == 0: raise ValueError("ptr must not be null (0)") - cdef _py_anon_pod2 obj = _py_anon_pod2.__new__(_py_anon_pod2) + cdef StatsLevel1 obj = StatsLevel1.__new__(StatsLevel1) cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof((NULL).u), flag) + ptr, sizeof(CUfileStatsLevel1_t), flag) data = _numpy.ndarray((1,), buffer=buf, - dtype=_py_anon_pod2_dtype) + dtype=stats_level1_dtype) obj._data = data.view(_numpy.recarray) - batch_addr = obj._data.batch[0].__array_interface__['data'][0] - obj._batch = _py_anon_pod3.from_ptr(batch_addr) return obj @@ -801,6 +1794,228 @@ cdef class IOParams: return obj +stats_level2_dtype = _numpy.dtype([ + ("basic", stats_level1_dtype, ), + ("read_size_kb_hist", _numpy.uint64, (32,)), + ("write_size_kb_hist", _numpy.uint64, (32,)), + ], align=True) + + +cdef class StatsLevel2: + """Empty-initialize an instance of `CUfileStatsLevel2_t`. + + + .. seealso:: `CUfileStatsLevel2_t` + """ + cdef: + readonly object _data + + def __init__(self): + arr = _numpy.empty(1, dtype=stats_level2_dtype) + self._data = arr.view(_numpy.recarray) + assert self._data.itemsize == sizeof(CUfileStatsLevel2_t), \ + f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileStatsLevel2_t)}" + + def __repr__(self): + return f"<{__name__}.StatsLevel2 object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return self._data.ctypes.data + + def __int__(self): + return self._data.ctypes.data + + def __eq__(self, other): + if not isinstance(other, StatsLevel2): + return False + if self._data.size != other._data.size: + return False + if self._data.dtype != other._data.dtype: + return False + return bool((self._data == other._data).all()) + + @property + def basic(self): + """: """ + return self._data.basic + + @basic.setter + def basic(self, val): + self._data.basic = val + + @property + def read_size_kb_hist(self): + """~_numpy.uint64: (array of length 32).""" + return self._data.read_size_kb_hist + + @read_size_kb_hist.setter + def read_size_kb_hist(self, val): + self._data.read_size_kb_hist = val + + @property + def write_size_kb_hist(self): + """~_numpy.uint64: (array of length 32).""" + return self._data.write_size_kb_hist + + @write_size_kb_hist.setter + def write_size_kb_hist(self, val): + self._data.write_size_kb_hist = val + + def __setitem__(self, key, val): + self._data[key] = val + + @staticmethod + def from_data(data): + """Create an StatsLevel2 instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a 1D array of dtype `stats_level2_dtype` holding the data. + """ + cdef StatsLevel2 obj = StatsLevel2.__new__(StatsLevel2) + if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + raise TypeError("data argument must be a NumPy ndarray") + if data.ndim != 1: + raise ValueError("data array must be 1D") + if data.dtype != stats_level2_dtype: + raise ValueError("data array must be of dtype stats_level2_dtype") + obj._data = data.view(_numpy.recarray) + + return obj + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False): + """Create an StatsLevel2 instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef StatsLevel2 obj = StatsLevel2.__new__(StatsLevel2) + cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE + cdef object buf = PyMemoryView_FromMemory( + ptr, sizeof(CUfileStatsLevel2_t), flag) + data = _numpy.ndarray((1,), buffer=buf, + dtype=stats_level2_dtype) + obj._data = data.view(_numpy.recarray) + + return obj + + +stats_level3_dtype = _numpy.dtype([ + ("detailed", stats_level2_dtype, ), + ("num_gpus", _numpy.uint32, ), + ("per_gpu_stats", per_gpu_stats_dtype, (16,)), + ], align=True) + + +cdef class StatsLevel3: + """Empty-initialize an instance of `CUfileStatsLevel3_t`. + + + .. seealso:: `CUfileStatsLevel3_t` + """ + cdef: + readonly object _data + + def __init__(self): + arr = _numpy.empty(1, dtype=stats_level3_dtype) + self._data = arr.view(_numpy.recarray) + assert self._data.itemsize == sizeof(CUfileStatsLevel3_t), \ + f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileStatsLevel3_t)}" + + def __repr__(self): + return f"<{__name__}.StatsLevel3 object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return self._data.ctypes.data + + def __int__(self): + return self._data.ctypes.data + + def __eq__(self, other): + if not isinstance(other, StatsLevel3): + return False + if self._data.size != other._data.size: + return False + if self._data.dtype != other._data.dtype: + return False + return bool((self._data == other._data).all()) + + @property + def detailed(self): + """: """ + return self._data.detailed + + @detailed.setter + def detailed(self, val): + self._data.detailed = val + + @property + def num_gpus(self): + """int: """ + return int(self._data.num_gpus[0]) + + @num_gpus.setter + def num_gpus(self, val): + self._data.num_gpus = val + + @property + def per_gpu_stats(self): + """per_gpu_stats_dtype: (array of length 16).""" + return self._data.per_gpu_stats + + @per_gpu_stats.setter + def per_gpu_stats(self, val): + self._data.per_gpu_stats = val + + def __setitem__(self, key, val): + self._data[key] = val + + @staticmethod + def from_data(data): + """Create an StatsLevel3 instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a 1D array of dtype `stats_level3_dtype` holding the data. + """ + cdef StatsLevel3 obj = StatsLevel3.__new__(StatsLevel3) + if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + raise TypeError("data argument must be a NumPy ndarray") + if data.ndim != 1: + raise ValueError("data array must be 1D") + if data.dtype != stats_level3_dtype: + raise ValueError("data array must be of dtype stats_level3_dtype") + obj._data = data.view(_numpy.recarray) + + return obj + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False): + """Create an StatsLevel3 instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef StatsLevel3 obj = StatsLevel3.__new__(StatsLevel3) + cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE + cdef object buf = PyMemoryView_FromMemory( + ptr, sizeof(CUfileStatsLevel3_t), flag) + data = _numpy.ndarray((1,), buffer=buf, + dtype=stats_level3_dtype) + obj._data = data.view(_numpy.recarray) + + return obj + + # Hack: Overwrite the generated descr_dtype, which NumPy deduced the offset wrong. descr_dtype = _numpy.dtype({ "names": ['type', 'handle', 'fs_ops'], @@ -1298,6 +2513,163 @@ cpdef set_parameter_string(int param, intptr_t desc_str): check_status(__status__) +cpdef tuple get_parameter_min_max_value(int param): + """Get both the minimum and maximum settable values for a given size_t parameter in a single call. + + Args: + param (SizeTConfigParameter): CUfile SizeT configuration parameter. + + Returns: + A 2-tuple containing: + + - size_t: Pointer to store the minimum value. + - size_t: Pointer to store the maximum value. + + .. seealso:: `cuFileGetParameterMinMaxValue` + """ + cdef size_t min_value + cdef size_t max_value + with nogil: + __status__ = cuFileGetParameterMinMaxValue(<_SizeTConfigParameter>param, &min_value, &max_value) + check_status(__status__) + return (min_value, max_value) + + +cpdef set_stats_level(int level): + """Set the level of statistics collection for cuFile operations. This will override the cufile.json settings for stats. + + Args: + level (int): Statistics level (0 = disabled, 1 = basic, 2 = detailed, 3 = verbose). + + .. seealso:: `cuFileSetStatsLevel` + """ + with nogil: + __status__ = cuFileSetStatsLevel(level) + check_status(__status__) + + +cpdef int get_stats_level() except? 0: + """Get the current level of statistics collection for cuFile operations. + + Returns: + int: Pointer to store the current statistics level. + + .. seealso:: `cuFileGetStatsLevel` + """ + cdef int level + with nogil: + __status__ = cuFileGetStatsLevel(&level) + check_status(__status__) + return level + + +cpdef stats_start(): + """Start collecting cuFile statistics. + + .. seealso:: `cuFileStatsStart` + """ + with nogil: + __status__ = cuFileStatsStart() + check_status(__status__) + + +cpdef stats_stop(): + """Stop collecting cuFile statistics. + + .. seealso:: `cuFileStatsStop` + """ + with nogil: + __status__ = cuFileStatsStop() + check_status(__status__) + + +cpdef stats_reset(): + """Reset all cuFile statistics counters. + + .. seealso:: `cuFileStatsReset` + """ + with nogil: + __status__ = cuFileStatsReset() + check_status(__status__) + + +cpdef get_stats_l1(intptr_t stats): + """Get Level 1 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel1_t structure to be filled. + + .. seealso:: `cuFileGetStatsL1` + """ + with nogil: + __status__ = cuFileGetStatsL1(stats) + check_status(__status__) + + +cpdef get_stats_l2(intptr_t stats): + """Get Level 2 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel2_t structure to be filled. + + .. seealso:: `cuFileGetStatsL2` + """ + with nogil: + __status__ = cuFileGetStatsL2(stats) + check_status(__status__) + + +cpdef get_stats_l3(intptr_t stats): + """Get Level 3 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel3_t structure to be filled. + + .. seealso:: `cuFileGetStatsL3` + """ + with nogil: + __status__ = cuFileGetStatsL3(stats) + check_status(__status__) + + +cpdef size_t get_bar_size_in_kb(int gpu_ind_ex) except? 0: + cdef size_t bar_size + with nogil: + __status__ = cuFileGetBARSizeInKB(gpu_ind_ex, &bar_size) + check_status(__status__) + return bar_size + + +cpdef set_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len): + """Set both POSIX pool slab size and count parameters as a pair. + + Args: + size_values (intptr_t): Array of slab sizes in KB. + count_values (intptr_t): Array of slab counts. + len (int): Length of both arrays (must be the same). + + .. seealso:: `cuFileSetParameterPosixPoolSlabArray` + """ + with nogil: + __status__ = cuFileSetParameterPosixPoolSlabArray(size_values, count_values, len) + check_status(__status__) + + +cpdef get_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len): + """Get both POSIX pool slab size and count parameters as a pair. + + Args: + size_values (intptr_t): Buffer to receive slab sizes in KB. + count_values (intptr_t): Buffer to receive slab counts. + len (int): Buffer size (must match the actual parameter length). + + .. seealso:: `cuFileGetParameterPosixPoolSlabArray` + """ + with nogil: + __status__ = cuFileGetParameterPosixPoolSlabArray(size_values, count_values, len) + check_status(__status__) + + cpdef str op_status_error(int status): """cufileop status string. diff --git a/cuda_bindings/cuda/bindings/cycufile.pxd b/cuda_bindings/cuda/bindings/cycufile.pxd index 8c4906f84..c57b18b95 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pxd +++ b/cuda_bindings/cuda/bindings/cycufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.2. Do not modify it directly. +# This code was automatically generated across versions from 12.9.1 to 13.0.2. Do not modify it directly. from libc.stdint cimport uint32_t, uint64_t from libc.time cimport time_t @@ -333,6 +333,7 @@ cdef CUfileError_t cuFileBufDeregister(const void* bufPtr_base) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil cdef long cuFileUseCount() except* nogil cdef CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t* props) except?CUFILE_LOADING_ERROR nogil @@ -356,7 +357,6 @@ cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/cycufile.pyx b/cuda_bindings/cuda/bindings/cycufile.pyx index ee033eb25..f1589fcd1 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pyx +++ b/cuda_bindings/cuda/bindings/cycufile.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.2. Do not modify it directly. +# This code was automatically generated across versions from 12.9.1 to 13.0.2. Do not modify it directly. from ._internal cimport cufile as _cufile @@ -41,6 +41,10 @@ cdef CUfileError_t cuFileDriverOpen() except?CUFILE_LOADING_ERROR return _cufile._cuFileDriverOpen() +cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverClose() + + cdef CUfileError_t cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil: return _cufile._cuFileDriverClose_v2() @@ -134,10 +138,6 @@ cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, return _cufile._cuFileSetParameterString(param, desc_str) -cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: - return _cufile._cuFileDriverClose() - - cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: return _cufile._cuFileGetParameterMinMaxValue(param, min_value, max_value) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index c9a910f5e..6e7d9883a 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -50,13 +50,14 @@ def cufile_env_json(): logging.info(f"Using cuFile config: {config_path}") assert os.path.isfile(config_path) os.environ["CUFILE_ENV_PATH_JSON"] = config_path + yield # Restore original value or remove if it wasn't set if original_value is not None: os.environ["CUFILE_ENV_PATH_JSON"] = original_value else: - os.environ.pop("CUFILE_ENV_PATH_JSON", None) + del os.environ["CUFILE_ENV_PATH_JSON"] @cache @@ -1419,6 +1420,7 @@ def test_batch_io_cancel(): @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") def test_batch_io_large_operations(): """Test batch IO with large buffer operations.""" + # Initialize CUDA (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -1477,11 +1479,11 @@ def test_batch_io_large_operations(): handle = cufile.handle_register(descr.ptr) # Set up batch IO - batch_handle = cufile.batch_io_set_up(num_operations * 2) # 2 writes + 2 reads + batch_handle = cufile.batch_io_set_up(num_operations) # Only for writes # Create IOParams array for batch operations - io_params = cufile.IOParams(num_operations * 2) - io_events = cufile.IOEvents(num_operations * 2) + io_params = cufile.IOParams(num_operations) + io_events = cufile.IOEvents(num_operations) # Prepare test data test_strings = [ @@ -1498,7 +1500,7 @@ def test_batch_io_large_operations(): test_data = test_data[:buf_size] host_buf = ctypes.create_string_buffer(test_data, buf_size) cuda.cuMemcpyHtoDAsync(write_buffers[i], host_buf, buf_size, 0) - cuda.cuStreamSynchronize(0) + cuda.cuStreamSynchronize(0) # Set up write operations for i in range(num_operations): @@ -1511,47 +1513,65 @@ def test_batch_io_large_operations(): io_params[i].u.batch.dev_ptr_offset = 0 io_params[i].u.batch.size_ = buf_size - # Set up read operations - for i in range(num_operations): - idx = i + num_operations - io_params[idx].mode = cufile.BatchMode.BATCH # Batch mode - io_params[idx].fh = handle - io_params[idx].opcode = cufile.Opcode.READ # Read opcode - io_params[idx].cookie = i + 100 - io_params[idx].u.batch.dev_ptr_base = int(read_buffers[i]) - io_params[idx].u.batch.file_offset = i * buf_size - io_params[idx].u.batch.dev_ptr_offset = 0 - io_params[idx].u.batch.size_ = buf_size + # Submit writes + cufile.batch_io_submit(batch_handle, num_operations, io_params.ptr, 0) - # Submit batch operations - cufile.batch_io_submit(batch_handle, num_operations * 2, io_params.ptr, 0) + # Wait for writes to complete + nr_completed_writes = ctypes.c_uint(num_operations) + timeout = ctypes.c_int(10000) + cufile.batch_io_get_status( + batch_handle, + num_operations, + ctypes.addressof(nr_completed_writes), + io_events.ptr, + ctypes.addressof(timeout), + ) - # Get batch status - min_nr = num_operations * 2 # Wait for all operations to complete - nr_completed = ctypes.c_uint(num_operations * 2) # Initialize to max operations posted - timeout = ctypes.c_int(10000) # 10 second timeout for large operations + # Clean up write batch + cufile.batch_io_destroy(batch_handle) + # Now submit reads separately + read_batch_handle = cufile.batch_io_set_up(num_operations) + read_io_params = cufile.IOParams(num_operations) + read_io_events = cufile.IOEvents(num_operations) + + # Set up read operations + for i in range(num_operations): + read_io_params[i].mode = cufile.BatchMode.BATCH + read_io_params[i].fh = handle + read_io_params[i].opcode = cufile.Opcode.READ + read_io_params[i].cookie = i + 100 + read_io_params[i].u.batch.dev_ptr_base = int(read_buffers[i]) + read_io_params[i].u.batch.file_offset = i * buf_size + read_io_params[i].u.batch.dev_ptr_offset = 0 + read_io_params[i].u.batch.size_ = buf_size + + # Submit reads + cufile.batch_io_submit(read_batch_handle, num_operations, read_io_params.ptr, 0) + + # Wait for reads + nr_completed = ctypes.c_uint(num_operations) cufile.batch_io_get_status( - batch_handle, min_nr, ctypes.addressof(nr_completed), io_events.ptr, ctypes.addressof(timeout) + read_batch_handle, + num_operations, + ctypes.addressof(nr_completed), + read_io_events.ptr, + ctypes.addressof(timeout), ) # Verify all operations completed successfully - assert nr_completed.value == num_operations * 2, ( - f"Expected {num_operations * 2} operations, got {nr_completed.value}" - ) + assert nr_completed.value == num_operations, f"Expected {num_operations} operations, got {nr_completed.value}" # Collect all returned cookies returned_cookies = set() - for i in range(num_operations * 2): - assert io_events[i].status == cufile.Status.COMPLETE, ( - f"Operation {i} failed with status {io_events[i].status}" + for i in range(num_operations): + assert read_io_events[i].status == cufile.Status.COMPLETE, ( + f"Operation {i} failed with status {read_io_events[i].status}" ) - returned_cookies.add(io_events[i].cookie) + returned_cookies.add(read_io_events[i].cookie) # Verify all expected cookies are present - expected_cookies = set(range(num_operations)) | set( - range(100, 100 + num_operations) - ) # write cookies 0,1 + read cookies 100,101 + expected_cookies = set(range(100, 100 + num_operations)) assert returned_cookies == expected_cookies, ( f"Cookie mismatch. Expected {expected_cookies}, got {returned_cookies}" ) @@ -1579,7 +1599,7 @@ def test_batch_io_large_operations(): ) # Clean up batch IO - cufile.batch_io_destroy(batch_handle) + cufile.batch_io_destroy(read_batch_handle) # Deregister file handle cufile.handle_deregister(handle) @@ -1624,104 +1644,500 @@ def test_set_get_parameter_size_t(): (err,) = cuda.cuCtxSetCurrent(ctx) assert err == cuda.CUresult.CUDA_SUCCESS + param_val_pairs = ( + (cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, 64), # 64KB threshold + (cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, 1024), # 1MB max direct IO size + (cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, 512), # 512KB max cache size + (cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, 128), # 128KB per buffer cache + (cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, 2048), # 2MB max pinned memory + (cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, 16), # 16 operations per batch + (cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, 5000), # 5 second timeout + (cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, 32), # Max 32 operations in queue + (cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, 8), # Max 8 IO threads + (cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, 4), # 4KB minimum IO threshold + (cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, 4), # Max 4 parallel requests + ) + + def test_param(param, val): + orig_val = cufile.get_parameter_size_t(param) + cufile.set_parameter_size_t(param, val) + retrieved_val = cufile.get_parameter_size_t(param) + assert retrieved_val == val + cufile.set_parameter_size_t(param, orig_val) + try: # Test setting and getting various size_t parameters + for param, val in param_val_pairs: + test_param(param, val) + finally: + cuda.cuDevicePrimaryCtxRelease(device) - # Test poll threshold size (in KB) - poll_threshold_kb = 64 # 64KB threshold - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, poll_threshold_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB) - assert retrieved_value == poll_threshold_kb, ( - f"Poll threshold mismatch: set {poll_threshold_kb}, got {retrieved_value}" - ) - # Test max direct IO size (in KB) - max_direct_io_kb = 1024 # 1MB max direct IO size - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, max_direct_io_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB) - assert retrieved_value == max_direct_io_kb, ( - f"Max direct IO size mismatch: set {max_direct_io_kb}, got {retrieved_value}" - ) +@pytest.mark.skipif( + cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" +) +def test_set_get_parameter_bool(): + """Test setting and getting boolean parameters with cuFile validation.""" - # Test max device cache size (in KB) - max_cache_kb = 512 # 512KB max cache size - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, max_cache_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB) - assert retrieved_value == max_cache_kb, f"Max cache size mismatch: set {max_cache_kb}, got {retrieved_value}" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS - # Test per buffer cache size (in KB) - per_buffer_cache_kb = 128 # 128KB per buffer cache - cufile.set_parameter_size_t( - cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, per_buffer_cache_kb - ) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB) - assert retrieved_value == per_buffer_cache_kb, ( - f"Per buffer cache size mismatch: set {per_buffer_cache_kb}, got {retrieved_value}" - ) + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS - # Test max device pinned memory size (in KB) - max_pinned_kb = 2048 # 2MB max pinned memory - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb) - retrieved_value = cufile.get_parameter_size_t( - cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB - ) - assert retrieved_value == max_pinned_kb, ( - f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}" - ) + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS - # Test IO batch size - batch_size = 16 # 16 operations per batch - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, batch_size) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE) - assert retrieved_value == batch_size, f"IO batch size mismatch: set {batch_size}, got {retrieved_value}" - - # Test batch IO timeout (in milliseconds) - timeout_ms = 5000 # 5 second timeout - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, timeout_ms) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS) - assert retrieved_value == timeout_ms, f"Batch IO timeout mismatch: set {timeout_ms}, got {retrieved_value}" - - # Test execution parameters - max_io_queue_depth = 32 # Max 32 operations in queue - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, max_io_queue_depth) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH) - assert retrieved_value == max_io_queue_depth, ( - f"Max IO queue depth mismatch: set {max_io_queue_depth}, got {retrieved_value}" - ) + param_val_pairs = ( + (cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE, True), + (cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE, False), + (cufile.BoolConfigParameter.FORCE_COMPAT_MODE, False), + (cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE, True), + (cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO, True), + (cufile.BoolConfigParameter.PROFILE_NVTX, False), + (cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY, True), + (cufile.BoolConfigParameter.USE_PCIP2PDMA, True), + (cufile.BoolConfigParameter.PREFER_IO_URING, False), + (cufile.BoolConfigParameter.FORCE_ODIRECT_MODE, True), + (cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION, False), + (cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS, True), + ) + + def test_param(param, val): + orig_val = cufile.get_parameter_bool(param) + cufile.set_parameter_bool(param, val) + retrieved_val = cufile.get_parameter_bool(param) + assert retrieved_val is val + cufile.set_parameter_bool(param, orig_val) - max_io_threads = 8 # Max 8 IO threads - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, max_io_threads) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS) - assert retrieved_value == max_io_threads, ( - f"Max IO threads mismatch: set {max_io_threads}, got {retrieved_value}" - ) + try: + # Test setting and getting various boolean parameters + for param, val in param_val_pairs: + test_param(param, val) + finally: + cuda.cuDevicePrimaryCtxRelease(device) - min_io_threshold_kb = 4 # 4KB minimum IO threshold - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, min_io_threshold_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB) - assert retrieved_value == min_io_threshold_kb, ( - f"Min IO threshold mismatch: set {min_io_threshold_kb}, got {retrieved_value}" - ) - max_request_parallelism = 4 # Max 4 parallel requests - cufile.set_parameter_size_t( - cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, max_request_parallelism - ) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM) - assert retrieved_value == max_request_parallelism, ( - f"Max request parallelism mismatch: set {max_request_parallelism}, got {retrieved_value}" +@pytest.mark.skipif( + cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" +) +def test_set_get_parameter_string(tmp_path): + """Test setting and getting string parameters with cuFile validation.""" + + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + temp_dir = tempfile.gettempdir() + # must be set to avoid getter error when testing ENV_LOGFILE_PATH... + os.environ["CUFILE_LOGFILE_PATH"] = "" + + param_val_pairs = ( + (cufile.StringConfigParameter.LOGGING_LEVEL, "INFO", "DEBUG"), # Test logging level + ( + cufile.StringConfigParameter.ENV_LOGFILE_PATH, + os.path.join(temp_dir, "cufile.log"), + str(tmp_path / "cufile.log"), + ), # Test environment log file path + ( + cufile.StringConfigParameter.LOG_DIR, + os.path.join(temp_dir, "cufile_logs"), + str(tmp_path), + ), # Test log directory + ) + + def test_param(param, val, default_val): + orig_val = cufile.get_parameter_string(param, 256) + # Use safe_decode_string to handle null terminators and padding + orig_val = safe_decode_string(orig_val.encode("utf-8")) + + val_b = val.encode("utf-8") + val_buf = ctypes.create_string_buffer(val_b) + default_val_b = default_val.encode("utf-8") + defualt_val_buf = ctypes.create_string_buffer(default_val_b) + orig_val_b = orig_val.encode("utf-8") + orig_val_buf = ctypes.create_string_buffer(orig_val_b) + + # Round-trip test + cufile.set_parameter_string(param, int(ctypes.addressof(val_buf))) + retrieved_val = cufile.get_parameter_string(param, 256) + retrieved_val = safe_decode_string(retrieved_val.encode("utf-8")) + assert retrieved_val == val + + # Restore + try: + # Currently this line will raise, see below. + cufile.set_parameter_string(param, int(ctypes.addressof(orig_val_buf))) + except: + # This block will always be reached because cuFILE could start with garbage default (empty string) + # that cannot be restored. In other words, cuFILE does honor the common sense that getter/setter + # should be round-tripable. + cufile.set_parameter_string(param, int(ctypes.addressof(defualt_val_buf))) + + try: + # Test setting and getting various string parameters + # Note: String parameter tests may have issues with the current implementation + for param, val, default_val in param_val_pairs: + test_param(param, val, default_val) + finally: + del os.environ["CUFILE_LOGFILE_PATH"] + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_set_stats_level(): + """Test cuFile statistics level configuration.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + old_level = cufile.get_stats_level() + + # Test setting different statistics levels + valid_levels = [0, 1, 2, 3] # 0=disabled, 1=basic, 2=detailed, 3=verbose + + for level in valid_levels: + cufile.set_stats_level(level) + + # Verify the level was set correctly + current_level = cufile.get_stats_level() + assert current_level == level, f"Expected stats level {level}, but got {current_level}" + + logging.info(f"Successfully set and verified stats level {level}") + + # Test invalid level (should raise an error) + try: + assert cufile.set_stats_level(-1) # Invalid negative level + except Exception as e: + logging.info(f"Correctly caught error for invalid stats level: {e}") + + try: + assert cufile.set_stats_level(4) # Invalid level > 3 + except Exception as e: + logging.info(f"Correctly caught error for invalid stats level: {e}") + + finally: + # Reset cuFile statistics to clear all counters + cufile.stats_reset() + cufile.set_stats_level(old_level) + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_get_parameter_min_max_value(): + """Test getting minimum and maximum values for size_t parameters.""" + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + cufile.driver_open() + + try: + # Test with poll threshold parameter + param = cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB + + # Get min/max values + min_value, max_value = cufile.get_parameter_min_max_value(param) + + # Verify that min <= max and both are reasonable values + assert min_value >= 0, f"Invalid min value: {min_value}" + assert max_value >= min_value, f"Max value {max_value} < min value {min_value}" + assert max_value > 0, f"Invalid max value: {max_value}" + + logging.info(f"POLLTHRESHOLD_SIZE_KB: min={min_value}, max={max_value}") + + finally: + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_stats_start_stop(): + """Test cuFile statistics collection stop.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + old_level = cufile.get_stats_level() + + # Set statistics level first (required before starting stats) + cufile.set_stats_level(1) # Level 1 = basic statistics + # Start collecting cuFile statistics first + cufile.stats_start() + + # Stop collecting cuFile statistics + cufile.stats_stop() + + # Verify statistics collection is stopped + logging.info("cuFile statistics collection stopped successfully") + + finally: + # Reset cuFile statistics to clear all counters + cufile.stats_reset() + cufile.set_stats_level(old_level) + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l1(): + """Test cuFile L1 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l1.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + old_level = cufile.get_stats_level() + + cufile.set_stats_level(1) # L1 = basic operation counts + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 4096 # 4KB, aligned to 4096 bytes + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L1 stats test data" * 100 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform cuFile operations to generate L1 statistics + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + + # Use the exposed StatsLevel1 class from cufile module + stats = cufile.StatsLevel1() + + # Get L1 statistics (basic operation counts) + cufile.get_stats_l1(stats.ptr) + + # Verify actual field values using OpCounter class for cleaner access + read_ops = cufile.OpCounter.from_data(stats.read_ops) + write_ops = cufile.OpCounter.from_data(stats.write_ops) + read_bytes = int(stats.read_bytes) + write_bytes = int(stats.write_bytes) + + assert read_ops.ok > 0, f"Expected read operations, got {read_ops.ok}" + assert write_ops.ok > 0, f"Expected write operations, got {write_ops.ok}" + assert read_bytes > 0, f"Expected read bytes, got {read_bytes}" + assert write_bytes > 0, f"Expected write bytes, got {write_bytes}" + + logging.info( + f"Stats: reads={read_ops.ok}, writes={write_ops.ok}, read_bytes={read_bytes}, write_bytes={write_bytes}" ) + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + finally: + cufile.stats_reset() + cufile.set_stats_level(old_level) + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device) @pytest.mark.skipif( - cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" ) -def test_set_get_parameter_bool(): - """Test setting and getting boolean parameters with cuFile validation.""" +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l2(): + """Test cuFile L2 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l2.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + old_level = cufile.get_stats_level() + + cufile.set_stats_level(2) # L2 = detailed performance metrics + + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 8192 # 8KB for more detailed stats + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L2 detailed stats test data" * 150 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform multiple cuFile operations to generate detailed L2 statistics + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.write(handle, buf_ptr_int, buffer_size, buffer_size, 0) # Different offset + cufile.read(handle, buf_ptr_int, buffer_size, buffer_size, 0) + + # Use the exposed StatsLevel2 class from cufile module + stats = cufile.StatsLevel2() + + # Get L2 statistics (detailed performance metrics) + cufile.get_stats_l2(stats.ptr) + + # Verify L2 histogram fields contain data + # Access numpy array fields: histograms are numpy arrays + read_hist_total = int(stats.read_size_kb_hist.sum()) + write_hist_total = int(stats.write_size_kb_hist.sum()) + assert read_hist_total > 0 or write_hist_total > 0, "Expected L2 histogram data" + + # L2 also contains L1 basic stats - verify using OpCounter class + basic_stats = cufile.StatsLevel1.from_data(stats.basic) + read_ops = cufile.OpCounter.from_data(basic_stats.read_ops) + write_ops = cufile.OpCounter.from_data(basic_stats.write_ops) + + logging.info( + f"L2 Stats: read_hist_total={read_hist_total}, write_hist_total={write_hist_total}, " + f"basic_reads={read_ops.ok}, basic_writes={write_ops.ok}" + ) + + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + finally: + cufile.stats_reset() + cufile.set_stats_level(old_level) + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l3(): + """Test cuFile L3 statistics retrieval with file operations.""" # Initialize CUDA (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -1734,79 +2150,143 @@ def test_set_get_parameter_bool(): (err,) = cuda.cuCtxSetCurrent(ctx) assert err == cuda.CUresult.CUDA_SUCCESS + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l3.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + try: - # Test setting and getting various boolean parameters + old_level = cufile.get_stats_level() + + cufile.set_stats_level(3) # L3 = comprehensive diagnostic data + + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 16384 # 16KB for comprehensive stats testing + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L3 comprehensive stats test data" * 200 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform comprehensive cuFile operations to generate L3 statistics + # Multiple writes and reads at different offsets to generate rich stats + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.write(handle, buf_ptr_int, buffer_size, buffer_size, 0) # Different offset + cufile.read(handle, buf_ptr_int, buffer_size, buffer_size, 0) + cufile.write(handle, buf_ptr_int, buffer_size // 2, buffer_size * 2, 0) # Partial write + cufile.read(handle, buf_ptr_int, buffer_size // 2, buffer_size * 2, 0) # Partial read + + # Use the exposed StatsLevel3 class from cufile module + stats = cufile.StatsLevel3() + + # Get L3 statistics (comprehensive diagnostic data) + cufile.get_stats_l3(stats.ptr) + + # Verify L3-specific fields + num_gpus = int(stats.num_gpus) + assert num_gpus >= 0, f"Expected valid GPU count, got {num_gpus}" + + # Check if we have at least one GPU with stats using PerGpuStats class + gpu_with_data = False + for i in range(min(num_gpus, 16)): + # Access per-GPU stats using PerGpuStats class + # stats.per_gpu_stats has shape (1, 16), we need to get [0] first to get the (16,) array + # then slice [i:i+1] to get a 1-d array view (required by from_data) + per_gpu_array = stats.per_gpu_stats[0] # Get the (16,) array + gpu_stats = cufile.PerGpuStats.from_data(per_gpu_array[i : i + 1]) + if gpu_stats.n_total_reads > 0 or gpu_stats.read_bytes > 0: + gpu_with_data = True + break + + # L3 also contains L2 detailed stats (which includes L1 basic stats) + detailed_stats = cufile.StatsLevel2.from_data(stats.detailed) + read_hist_total = int(detailed_stats.read_size_kb_hist.sum()) + + logging.info( + f"L3 Stats: num_gpus={num_gpus}, gpu_with_data={gpu_with_data}, detailed_read_hist={read_hist_total}" + ) - # Test poll mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE) - assert retrieved_value is True, f"Poll mode mismatch: set True, got {retrieved_value}" - - # Test compatibility mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE) - assert retrieved_value is False, f"Compatibility mode mismatch: set False, got {retrieved_value}" - - # Test force compatibility mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE) - assert retrieved_value is False, f"Force compatibility mode mismatch: set False, got {retrieved_value}" - - # Test aggressive API check - cufile.set_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE) - assert retrieved_value is True, f"Aggressive API check mismatch: set True, got {retrieved_value}" - - # Test parallel IO - cufile.set_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO) - assert retrieved_value is True, f"Parallel IO mismatch: set True, got {retrieved_value}" - - # Test NVTX profiling - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX) - assert retrieved_value is False, f"NVTX profiling mismatch: set False, got {retrieved_value}" - - # Test system memory allowance - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY) - assert retrieved_value is True, f"System memory allowance mismatch: set True, got {retrieved_value}" - - # Test PCI P2P DMA - cufile.set_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA) - assert retrieved_value is True, f"PCI P2P DMA mismatch: set True, got {retrieved_value}" - - # Test IO uring preference - cufile.set_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING) - assert retrieved_value is False, f"IO uring preference mismatch: set False, got {retrieved_value}" - - # Test force O_DIRECT mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE) - assert retrieved_value is True, f"Force O_DIRECT mode mismatch: set True, got {retrieved_value}" - - # Test topology detection skip - cufile.set_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION) - assert retrieved_value is False, f"Topology detection skip mismatch: set False, got {retrieved_value}" - - # Test stream memops bypass - cufile.set_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS) - assert retrieved_value is True, f"Stream memops bypass mismatch: set True, got {retrieved_value}" + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) finally: + cufile.stats_reset() + cufile.set_stats_level(old_level) + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device) @pytest.mark.skipif( - cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" ) -def test_set_get_parameter_string(): - """Test setting and getting string parameters with cuFile validation.""" +def test_get_bar_size_in_kb(): + """Test cuFile BAR (Base Address Register) size retrieval.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Get BAR size in kilobytes + bar_size_kb = cufile.get_bar_size_in_kb(0) + + # Verify BAR size is a reasonable value + assert isinstance(bar_size_kb, int), "BAR size should be an integer" + assert bar_size_kb > 0, "BAR size should be positive" + + logging.info(f"GPU BAR size: {bar_size_kb} KB ({bar_size_kb / 1024 / 1024:.2f} GB)") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_set_parameter_posix_pool_slab_array(): + """Test cuFile POSIX pool slab array configuration.""" # Initialize CUDA (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -1819,69 +2299,62 @@ def test_set_get_parameter_string(): (err,) = cuda.cuCtxSetCurrent(ctx) assert err == cuda.CUresult.CUDA_SUCCESS + # Define slab sizes for POSIX I/O pool (common I/O buffer sizes) - BEFORE driver open + import ctypes + + slab_sizes = [ + 4096, # 4KB - small files + 65536, # 64KB - medium files + 1048576, # 1MB - large files + 16777216, # 16MB - very large files + ] + + # Define counts for each slab size (number of buffers) + slab_counts = [ + 10, # 10 buffers of 4KB + 5, # 5 buffers of 64KB + 3, # 3 buffers of 1MB + 2, # 2 buffers of 16MB + ] + + # Convert to ctypes arrays + size_array_type = ctypes.c_size_t * len(slab_sizes) + count_array_type = ctypes.c_size_t * len(slab_counts) + size_array = size_array_type(*slab_sizes) + count_array = count_array_type(*slab_counts) + + # Set POSIX pool slab array configuration BEFORE opening driver + cufile.set_parameter_posix_pool_slab_array( + ctypes.addressof(size_array), ctypes.addressof(count_array), len(slab_sizes) + ) + + # Open cuFile driver AFTER setting parameters + cufile.driver_open() + try: - # Test setting and getting various string parameters - # Note: String parameter tests may have issues with the current implementation + # After setting parameters, retrieve them back to verify + retrieved_sizes = (ctypes.c_size_t * len(slab_sizes))() + retrieved_counts = (ctypes.c_size_t * len(slab_counts))() - # Test logging level - logging_level = "INFO" - try: - # Convert Python string to null-terminated C string - logging_level_bytes = logging_level.encode("utf-8") + b"\x00" - logging_level_buffer = ctypes.create_string_buffer(logging_level_bytes) - cufile.set_parameter_string( - cufile.StringConfigParameter.LOGGING_LEVEL, int(ctypes.addressof(logging_level_buffer)) - ) - retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOGGING_LEVEL, 256) - # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) - logging.info(f"Logging level test: set {logging_level}, got {retrieved_value}") - # The retrieved value should be a string, so we can compare directly - assert retrieved_value == logging_level, ( - f"Logging level mismatch: set {logging_level}, got {retrieved_value}" - ) - except Exception as e: - logging.error(f"Logging level test failed: {e}") - # Re-raise the exception to make the test fail - raise + cufile.get_parameter_posix_pool_slab_array( + ctypes.addressof(retrieved_sizes), ctypes.addressof(retrieved_counts), len(slab_sizes) + ) - # Test environment log file path - logfile_path = tempfile.gettempdir() + "/cufile.log" - try: - # Convert Python string to null-terminated C string - logfile_path_bytes = logfile_path.encode("utf-8") + b"\x00" - logfile_buffer = ctypes.create_string_buffer(logfile_path_bytes) - cufile.set_parameter_string( - cufile.StringConfigParameter.ENV_LOGFILE_PATH, int(ctypes.addressof(logfile_buffer)) + # Verify they match what we set + for i in range(len(slab_sizes)): + assert retrieved_sizes[i] == slab_sizes[i], ( + f"Size mismatch at index {i}: expected {slab_sizes[i]}, got {retrieved_sizes[i]}" + ) + assert retrieved_counts[i] == slab_counts[i], ( + f"Count mismatch at index {i}: expected {slab_counts[i]}, got {retrieved_counts[i]}" ) - retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.ENV_LOGFILE_PATH, 256) - # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) - logging.info(f"Log file path test: set {logfile_path}, got {retrieved_value}") - # The retrieved value should be a string, so we can compare directly - assert retrieved_value == logfile_path, f"Log file path mismatch: set {logfile_path}, got {retrieved_value}" - except Exception as e: - logging.error(f"Log file path test failed: {e}") - # Re-raise the exception to make the test fail - raise - # Test log directory - log_dir = tempfile.gettempdir() + "/cufile_logs" - try: - # Convert Python string to null-terminated C string - log_dir_bytes = log_dir.encode("utf-8") + b"\x00" - log_dir_buffer = ctypes.create_string_buffer(log_dir_bytes) - cufile.set_parameter_string(cufile.StringConfigParameter.LOG_DIR, int(ctypes.addressof(log_dir_buffer))) - retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOG_DIR, 256) - # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) - logging.info(f"Log directory test: set {log_dir}, got {retrieved_value}") - # The retrieved value should be a string, so we can compare directly - assert retrieved_value == log_dir, f"Log directory mismatch: set {log_dir}, got {retrieved_value}" - except Exception as e: - logging.error(f"Log directory test failed: {e}") - # Re-raise the exception to make the test fail - raise + # Verify configuration was accepted successfully + logging.info(f"POSIX pool slab array configured with {len(slab_sizes)} slab sizes") + logging.info(f"Slab sizes: {[f'{size // 1024}KB' for size in slab_sizes]}") + logging.info("Round-trip verification successful: set and retrieved values match") finally: + # Close cuFile driver + cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device)