Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cuda_core/cuda/core/experimental/_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,11 @@ def compute_capability(self) -> ComputeCapability:
self.properties._cache["compute_capability"] = cc
return cc

@property
def arch(self) -> str:
"""Return compute capability as a string (e.g., '75' for CC 7.5)."""
return "".join(f"{i}" for i in self.compute_capability)

@property
def context(self) -> Context:
"""Return the current :obj:`~_context.Context` associated with this device.
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/docs/source/release/0.X.Y-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Breaking Changes
New features
------------

None.
- Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple.


New examples
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/cuda_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ def main():
cp.cuda.ExternalStream(int(stream.handle)).use()

# Compile the program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile(
"cubin", name_expressions=("vector_add<float>", "vector_multiply<float>", "vector_subtract<float>")
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/memory_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@
cp.cuda.ExternalStream(int(stream.handle)).use()

# Compile kernel
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile("cubin")
kernel = mod.get_kernel("memory_ops")
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/pytorch_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ def __cuda_stream__(self):
s = dev.create_stream(PyTorchStreamWrapper(pt_stream))

# prepare program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile(
"cubin",
Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/saxpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
s = dev.create_stream()

# prepare program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)

# Note the use of the `name_expressions` argument to specify the template
Expand Down
6 changes: 2 additions & 4 deletions cuda_core/examples/simple_multi_gpu_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@
}
}
"""
arch0 = "".join(f"{i}" for i in dev0.compute_capability)
prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"})
prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev0.arch}"})
mod_add = prog_add.compile("cubin")
ker_add = mod_add.get_kernel("vector_add")

Expand All @@ -63,8 +62,7 @@
}
}
"""
arch1 = "".join(f"{i}" for i in dev1.compute_capability)
prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"})
prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev1.arch}"})
mod_sub = prog_sub.compile("cubin")
ker_sub = mod_sub.get_kernel("vector_sub")

Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/strided_memory_view_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ def run():
# To know the GPU's compute capability, we need to identify which GPU to use.
dev = Device(0)
dev.set_current()
arch = "".join(f"{i}" for i in dev.compute_capability)
gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{arch}", std="c++11"))
gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{dev.arch}", std="c++11"))
mod = gpu_prog.compile(target_type="cubin")
gpu_ker = mod.get_kernel(func_name)

Expand Down
3 changes: 1 addition & 2 deletions cuda_core/examples/vector_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
s = dev.create_stream()

# prepare program
arch = "".join(f"{i}" for i in dev.compute_capability)
program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
prog = Program(code, code_type="c++", options=program_options)
mod = prog.compile("cubin", name_expressions=("vector_add<float>",))

Expand Down
12 changes: 12 additions & 0 deletions cuda_core/tests/test_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ def test_compute_capability():
assert device.compute_capability == expected_cc


def test_arch():
device = Device()
# Test that arch returns the same as the old pattern
expected_arch = "".join(f"{i}" for i in device.compute_capability)
assert device.arch == expected_arch
# Test that it's a string
assert isinstance(device.arch, str)
# Test that it matches the expected format (e.g., "75" for CC 7.5)
cc = device.compute_capability
assert device.arch == f"{cc.major}{cc.minor}"


cuda_base_properties = [
("max_threads_per_block", int),
("max_block_dim_x", int),
Expand Down
Loading