From b2495d3badba2c329e393af204f0f59730630980 Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Mon, 20 Jan 2025 16:00:47 -0800
Subject: [PATCH 1/3] squash

---
 cuda_core/cuda/core/experimental/_program.py | 22 ++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index c10fd077..fde69a80 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -11,6 +11,7 @@
 from cuda.core.experimental._utils import (
     _handle_boolean_option,
     check_or_create_options,
+    driver,
     handle_return,
     is_nested_sequence,
     is_sequence,
@@ -413,6 +414,21 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
                 raise TypeError
             # TODO: support pre-loaded headers & include names
             # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
+
+            supported_archs = handle_return(nvrtc.nvrtcGetSupportedArchs())
+
+            if options is not None:
+                arch_not_supported = options.arch is not None and options.arch not in supported_archs
+                default_arch_not_supported = (
+                    options.arch is None
+                    and 10 * Device().compute_capability[0] + Device().compute_capability[1] not in supported_archs
+                )
+
+                if arch_not_supported or default_arch_not_supported:
+                    raise ValueError(
+                        f"The provided arch, or default arch (that of the current device) "
+                        f"is not supported by the current backend. Supported architectures: {supported_archs}"
+                    )
             self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
             self._backend = "nvrtc"
         else:
@@ -448,6 +464,12 @@ def compile(self, target_type, name_expressions=(), logs=None):
             raise NotImplementedError
 
         if self._backend == "nvrtc":
+            version = handle_return(nvrtc.nvrtcVersion())
+            if handle_return(driver.cuDriverGetVersion()) > version[0] * 1000 + version[1] * 10:
+                raise RuntimeError(
+                    "The CUDA driver version is newer than the NVRTC version. "
+                    "Please update your NVRTC library to match the CUDA driver version."
+                )
             if name_expressions:
                 for n in name_expressions:
                     handle_return(nvrtc.nvrtcAddNameExpression(self._mnff.handle, n.encode()), handle=self._mnff.handle)

From eb8a73f2c3b5a01bdaaa19c9c09d453ccc306d6e Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Mon, 20 Jan 2025 16:18:25 -0800
Subject: [PATCH 2/3] fix bug

---
 cuda_core/cuda/core/experimental/_program.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index fde69a80..235d5115 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -418,7 +418,9 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
             supported_archs = handle_return(nvrtc.nvrtcGetSupportedArchs())
 
             if options is not None:
-                arch_not_supported = options.arch is not None and options.arch not in supported_archs
+                arch_not_supported = (
+                    options.arch is not None and int(options.arch.split("_")[-1]) not in supported_archs
+                )
                 default_arch_not_supported = (
                     options.arch is None
                     and 10 * Device().compute_capability[0] + Device().compute_capability[1] not in supported_archs

From bdb06af0afec386ff1baf75f3c4d1505186d07df Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Mon, 20 Jan 2025 16:24:06 -0800
Subject: [PATCH 3/3] remove todo

---
 cuda_core/tests/test_program.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 0f9b8e3b..3bf77528 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -67,7 +67,6 @@ def test_program_init_invalid_code_format():
         Program(code, "c++")
 
 
-# TODO: incorporate this check in Program
 # This is tested against the current device's arch
 @pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new")
 def test_program_compile_valid_target_type():