Skip to content

Commit c404452

Browse files
Fix #1186: Fix segmentation fault when accessing StridedMemoryView (#1190)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
1 parent 259a554 commit c404452

File tree

4 files changed

+66
-16
lines changed

4 files changed

+66
-16
lines changed

cuda_core/cuda/core/experimental/_memoryview.pyx

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,17 @@ cdef class StridedMemoryView:
122122

123123
@property
124124
def shape(self) -> tuple[int]:
125-
if self._shape is None and self.exporting_obj is not None:
126-
if self.dl_tensor != NULL:
127-
self._shape = cuda_utils.carray_int64_t_to_tuple(
128-
self.dl_tensor.shape,
129-
self.dl_tensor.ndim
130-
)
125+
if self._shape is None:
126+
if self.exporting_obj is not None:
127+
if self.dl_tensor != NULL:
128+
self._shape = cuda_utils.carray_int64_t_to_tuple(
129+
self.dl_tensor.shape,
130+
self.dl_tensor.ndim
131+
)
132+
else:
133+
self._shape = self.metadata["shape"]
131134
else:
132-
self._shape = self.metadata["shape"]
133-
else:
134-
self._shape = ()
135+
self._shape = ()
135136
return self._shape
136137

137138
@property
@@ -146,14 +147,12 @@ cdef class StridedMemoryView:
146147
self.dl_tensor.ndim
147148
)
148149
else:
150+
# This is a Python interface anyway, so not much point
151+
# to using the optimization in cuda_utils.carray_int64_t_to_tuple
149152
strides = self.metadata.get("strides")
150153
if strides is not None:
151154
itemsize = self.dtype.itemsize
152-
self._strides = cpython.PyTuple_New(len(strides))
153-
for i in range(len(strides)):
154-
cpython.PyTuple_SET_ITEM(
155-
self._strides, i, strides[i] // itemsize
156-
)
155+
self._strides = tuple(x // itemsize for x in strides)
157156
self._strides_init = True
158157
return self._strides
159158

cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
cimport cpython
6+
from cpython.object cimport PyObject
67
from libc.stdint cimport int64_t
78

89
from cuda.bindings cimport cydriver
@@ -32,9 +33,17 @@ cpdef int _check_nvrtc_error(error) except?-1
3233
cpdef check_or_create_options(type cls, options, str options_description=*, bint keep_none=*)
3334

3435

36+
# Create low-level externs so Cython won't "helpfully" handle reference counting
37+
# for us. Prefixing with an underscore to distinguish it from the definition in
38+
# cpython.long.
39+
cdef extern from "Python.h":
40+
PyObject *_PyLong_FromLongLong "PyLong_FromLongLong" (long long val) except NULL
41+
void _PyTuple_SET_ITEM "PyTuple_SET_ITEM" (object p, Py_ssize_t pos, PyObject *o)
42+
43+
3544
cdef inline tuple carray_int64_t_to_tuple(int64_t *ptr, int length):
3645
# Construct shape and strides tuples using the Python/C API for speed
37-
result = cpython.PyTuple_New(length)
46+
cdef tuple result = cpython.PyTuple_New(length)
3847
for i in range(length):
39-
cpython.PyTuple_SET_ITEM(result, i, cpython.PyLong_FromLongLong(ptr[i]))
48+
_PyTuple_SET_ITEM(result, i, _PyLong_FromLongLong(ptr[i]))
4049
return result
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
.. SPDX-License-Identifier: Apache-2.0
3+
4+
.. currentmodule:: cuda.core.experimental
5+
6+
``cuda.core`` 0.4.X Release Notes
7+
=================================
8+
9+
10+
Highlights
11+
----------
12+
13+
14+
Breaking Changes
15+
----------------
16+
17+
18+
New features
19+
------------
20+
21+
22+
New examples
23+
------------
24+
25+
26+
Fixes and enhancements
27+
----------------------
28+
29+
- Fixed a segmentation fault when accessing :class:`StridedMemoryView` ``shape`` and ``strides`` members.

cuda_core/tests/test_memory.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,3 +612,16 @@ def test_strided_memory_view_leak():
612612
StridedMemoryView(arr, stream_ptr=-1)
613613
after = sys.getrefcount(arr)
614614
assert before == after
615+
616+
617+
def test_strided_memory_view_refcnt():
618+
# Use Fortran ordering so strides is used
619+
a = np.zeros((64, 4), dtype=np.uint8, order="F")
620+
av = StridedMemoryView(a, stream_ptr=-1)
621+
# segfaults if refcnt is wrong
622+
assert av.shape[0] == 64
623+
assert sys.getrefcount(av.shape) >= 2
624+
625+
assert av.strides[0] == 1
626+
assert av.strides[1] == 64
627+
assert sys.getrefcount(av.strides) >= 2

0 commit comments

Comments
 (0)