diff --git a/chapter-02/code/vecMul.py b/chapter-02/code/vecMul.py index e97d467..700441b 100644 --- a/chapter-02/code/vecMul.py +++ b/chapter-02/code/vecMul.py @@ -29,7 +29,11 @@ def compile_extension(): cuda_sources=cuda_source, functions=["vector_multiplication"], with_cuda=True, - # extra_cuda_cflags=["-O2"] + extra_cuda_cflags=[ + "-O2", + "-gencode=arch=compute_120,code=sm_120", + "-gencode=arch=compute_120,code=compute_120", + ], ) diff --git a/chapter-02/code/vecMulTorchTensor.cu b/chapter-02/code/vecMulTorchTensor.cu index 66d9cdc..c6edf4b 100644 --- a/chapter-02/code/vecMulTorchTensor.cu +++ b/chapter-02/code/vecMulTorchTensor.cu @@ -25,5 +25,5 @@ torch::Tensor vector_multiplication(torch::Tensor A, torch::Tensor B) { C10_CUDA_KERNEL_LAUNCH_CHECK(); - return A; + return C; }