From e543174c1d2ebd1e53740e9e68d143a912f3daae Mon Sep 17 00:00:00 2001 From: Ryan Story Date: Fri, 16 Jan 2026 10:13:46 -0600 Subject: [PATCH] Fix CUDA vector multiply for RTX 5070 --- chapter-02/code/vecMul.py | 6 +++++- chapter-02/code/vecMulTorchTensor.cu | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/chapter-02/code/vecMul.py b/chapter-02/code/vecMul.py index e97d467..700441b 100644 --- a/chapter-02/code/vecMul.py +++ b/chapter-02/code/vecMul.py @@ -29,7 +29,11 @@ def compile_extension(): cuda_sources=cuda_source, functions=["vector_multiplication"], with_cuda=True, - # extra_cuda_cflags=["-O2"] + extra_cuda_cflags=[ + "-O2", + "-gencode=arch=compute_120,code=sm_120", + "-gencode=arch=compute_120,code=compute_120", + ], ) diff --git a/chapter-02/code/vecMulTorchTensor.cu b/chapter-02/code/vecMulTorchTensor.cu index 66d9cdc..c6edf4b 100644 --- a/chapter-02/code/vecMulTorchTensor.cu +++ b/chapter-02/code/vecMulTorchTensor.cu @@ -25,5 +25,5 @@ torch::Tensor vector_multiplication(torch::Tensor A, torch::Tensor B) { C10_CUDA_KERNEL_LAUNCH_CHECK(); - return A; + return C; }