Skip to content

Commit 3c98f14

Browse files
committed
reference compute capability instead of chip gen
1 parent 7d117f2 commit 3c98f14

File tree

3 files changed

+12
-4
lines changed

3 files changed

+12
-4
lines changed

cuda_core/cuda/core/experimental/_launcher.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def __post_init__(self):
7272
if not _use_ex:
7373
raise CUDAError("thread block clusters require cuda.bindings & driver 11.8+")
7474
if Device().compute_capability < (9, 0):
75-
raise CUDAError("thread block clusters are not supported below Hopper")
75+
raise CUDAError("thread block clusters are not supported on devices with compute capability < 9.0")
7676
self.cluster = self._cast_to_3_tuple(self.cluster)
7777
# we handle "stream=None" in the launch API
7878
if self.stream is not None and not isinstance(self.stream, Stream):

cuda_core/cuda/core/experimental/_program.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,22 +39,27 @@ def close(self):
3939
self.handle = None
4040

4141
__slots__ = ("__weakref__", "_mnff", "_backend")
42-
_supported_code_type = ("c++",)
42+
_supported_code_type = ("c++", "ptx")
4343
_supported_target_type = ("ptx", "cubin", "ltoir")
4444

4545
def __init__(self, code, code_type):
4646
self._mnff = Program._MembersNeededForFinalize(self, None)
47+
code_type = code_type.lower()
4748

4849
if code_type not in self._supported_code_type:
4950
raise NotImplementedError
5051

51-
if code_type.lower() == "c++":
52+
if code_type == "c++":
5253
if not isinstance(code, str):
5354
raise TypeError
5455
# TODO: support pre-loaded headers & include names
5556
# TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
5657
self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
5758
self._backend = "nvrtc"
59+
60+
if code_type == "ptx":
61+
self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
62+
self._backend = "nvrtc"
5863
else:
5964
raise NotImplementedError
6065

cuda_core/examples/thread_block_cluster.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@
3535
dev = Device()
3636
arch = dev.compute_capability
3737
if arch < (9, 0):
38-
print("this demo requires a Hopper GPU (since thread block cluster is a hardware feature)", file=sys.stderr)
38+
print(
39+
"this demo requires compute capability >= 9.0 (since thread block cluster is a hardware feature)",
40+
file=sys.stderr,
41+
)
3942
sys.exit(0)
4043
arch = "".join(f"{i}" for i in arch)
4144

0 commit comments

Comments
 (0)