From b6ed7f6b5481af6f704525d44edfe0bb726acfda Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 10 Jun 2025 11:12:33 -0700
Subject: [PATCH 01/11] Add sm_* offload arch support to DPNP_TARGE T_CUDA

---
 CMakeLists.txt | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2d1bd4fc4ae8..5cc9df99ff9c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,17 +68,21 @@ find_package(Dpctl REQUIRED)
 message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
 message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
 
-option(DPNP_TARGET_CUDA
-    "Build DPNP to target CUDA devices"
-    OFF
-)
 option(DPNP_USE_ONEMKL_INTERFACES
     "Build DPNP with oneMKL Interfaces"
     OFF
 )
+set(DPNP_TARGET_CUDA
+    ""
+    CACHE STRING
+    "Build DPNP to target CUDA device. \
+Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
+or to a specific architecture like sm_80."
+)
 set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
 
 set(_dpnp_sycl_targets)
+set(_dpnp_cuda_arch)
 set(_use_onemkl_interfaces OFF)
 set(_use_onemkl_interfaces_cuda OFF)
 set(_use_onemkl_interfaces_hip OFF)
@@ -87,8 +91,18 @@ set(_dpnp_sycl_target_compile_options)
 set(_dpnp_sycl_target_link_options)
 
 if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
-    if(DPNP_TARGET_CUDA)
-        set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
+    if (DPNP_TARGET_CUDA)
+        if(DPNP_TARGET_CUDA MATCHES "^sm_")
+            set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA})
+        elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
+            set(_dpnp_cuda_arch "sm_50")
+        else()
+            message(FATAL_ERROR
+                "Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". "
+                "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
+            )
+        endif()
+        set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown")
         set(_use_onemkl_interfaces_cuda ON)
     endif()
 
@@ -104,7 +118,7 @@ if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
 else()
     set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
 
-    if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda")
+    if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)")
         set(_use_onemkl_interfaces_cuda ON)
     endif()
 

From 25bf7b9787cfa0be1e286972a88903766a96b133 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 10 Jun 2025 11:51:21 -0700
Subject: [PATCH 02/11] Enable CUDA architecture selection via --target-cuda

---
 scripts/build_locally.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index 3403f98304eb..2ac4120998ad 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -38,7 +38,7 @@ def run(
     cmake_executable=None,
     verbose=False,
     cmake_opts="",
-    target="intel",
+    target_cuda=None,
     target_hip=None,
     onemkl_interfaces=False,
     onemkl_interfaces_dir=None,
@@ -98,12 +98,14 @@ def run(
         if "DPL_ROOT" in os.environ:
             os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]
 
-    if not target.strip():
-        target = "intel"
-
-    if target == "cuda":
+    if target_cuda is not None:
+        if not target_cuda.strip():
+            raise ValueError(
+                "--target-cuda can not be an empty string. "
+                "Use --target-cuda=<arch> or --target-cuda"
+            )
         cmake_args += [
-            "-DDPNP_TARGET_CUDA=ON",
+            f"-DDPNP_TARGET_CUDA={target_cuda}",
         ]
         # Always builds using oneMKL interfaces for the cuda target
         onemkl_interfaces = True
@@ -186,10 +188,12 @@ def run(
         type=str,
     )
     driver.add_argument(
-        "--target",
-        help="Target backend for build",
-        dest="target",
-        default="intel",
+        "--target-cuda",
+        nargs="?",
+        const="ON",
+        help="Enable CUDA target for build; "
+        "optionally specify architecture (e.g., sm_80)",
+        default=None,
         type=str,
     )
     driver.add_argument(
@@ -265,7 +269,7 @@ def run(
         cmake_executable=args.cmake_executable,
         verbose=args.verbose,
         cmake_opts=args.cmake_opts,
-        target=args.target,
+        target_cuda=args.target_cuda,
         target_hip=args.target_hip,
         onemkl_interfaces=args.onemkl_interfaces,
         onemkl_interfaces_dir=args.onemkl_interfaces_dir,

From b0bd17cc1701697522fe78ca3012ae3e49afd7b8 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 10 Jun 2025 11:52:52 -0700
Subject: [PATCH 03/11] Raise RuntimeError if onemkl_interfaces_dir passed

---
 scripts/build_locally.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index 2ac4120998ad..de66651e6d6d 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -131,7 +131,7 @@ def run(
                 f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}",
             ]
     elif onemkl_interfaces_dir:
-        RuntimeError("--onemkl-interfaces-dir option is not supported")
+        raise RuntimeError("--onemkl-interfaces-dir option is not supported")
 
     subprocess.check_call(
         cmake_args, shell=False, cwd=setup_dir, env=os.environ

From e0dae0edbc945525421d3774320721eff585be01 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 10 Jun 2025 11:55:21 -0700
Subject: [PATCH 04/11] Clarify --target-cuda help message

---
 scripts/build_locally.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index de66651e6d6d..3902e4eb4733 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -192,7 +192,7 @@ def run(
         nargs="?",
         const="ON",
         help="Enable CUDA target for build; "
-        "optionally specify architecture (e.g., sm_80)",
+        "optionally specify architecture (e.g., --target-cuda=sm_80)",
         default=None,
         type=str,
     )

From c670477f16877004fde6e96a962eacf92af5acb6 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 10 Jun 2025 12:17:06 -0700
Subject: [PATCH 05/11] Update CUDA build docs

---
 doc/quick_start_guide.rst | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 0e6f9dca74e2..9c18e608f84e 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -144,13 +144,33 @@ installation layout of compatible version. The following plugins from CodePlay a
 Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
 <https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_
 
-``dpnp`` can be built for CUDA devices as follows:
+``dpnp`` can be built for CUDA devices using the ``--target-cuda`` argument.
+
+To target a specific architecture (e.g., ``sm_80``):
+
+.. code-block:: bash
+
+    python scripts/build_locally.py --target-cuda=sm_80
+
+To use the default architecture (``sm_50``), run:
 
 .. code-block:: bash
 
-    python scripts/build_locally.py --target=cuda
+    python scripts/build_locally.py --target-cuda
+
+Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider
+range of architectures, but limiting the usage of more recent CUDA features.
+
+For reference, compute architecture strings like ``sm_80`` correspond to specific
+CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
+A complete mapping between NVIDIA GPU models and their respective
+Compute Capabilities can be found in the official
+`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
+
+A full list of available SYCL alias targets is available in the
+`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
 
-And for AMD devices:
+To build for AMD devices, use:
 
 .. code-block:: bash
 
@@ -179,7 +199,7 @@ architecture all at once:
 
 .. code-block:: bash
 
-    python scripts/build_locally.py --target=cuda --target-hip=gfx90a
+    python scripts/build_locally.py --target-cuda --target-hip=gfx90a
 
 
 Testing

From dbdd077b3347ae23b560f89994435d6980bbc140 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 11 Jun 2025 03:38:09 -0700
Subject: [PATCH 06/11] Add CUDA and AMD build subchapters to docs

---
 doc/quick_start_guide.rst | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 9c18e608f84e..8aa8c6819b21 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -144,7 +144,10 @@ installation layout of compatible version. The following plugins from CodePlay a
 Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
 <https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_
 
-``dpnp`` can be built for CUDA devices using the ``--target-cuda`` argument.
+CUDA build
+~~~~~~~~~~
+
+To build for CUDA devices, use the ``--target-cuda`` argument.
 
 To target a specific architecture (e.g., ``sm_80``):
 
@@ -170,7 +173,10 @@ Compute Capabilities can be found in the official
 A full list of available SYCL alias targets is available in the
 `DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
 
-To build for AMD devices, use:
+AMD build
+~~~~~~~~~
+
+To build for AMD devices, use the ``--target-hip=<arch>`` argument:
 
 .. code-block:: bash
 
@@ -193,6 +199,8 @@ For example:
 .. code-block:: bash
     python scripts/build_locally.py --target-hip=gfx90a
 
+Multi-target build
+~~~~~~~~~~~~~~~~~~
 
 It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
 architecture all at once:

From b08c0e5cad7539c92f93ec3ad7e750124ad3d871 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 11 Jun 2025 04:02:45 -0700
Subject: [PATCH 07/11] Clarify SYCL alias target usage

---
 doc/quick_start_guide.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 8aa8c6819b21..59f33add03db 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -144,6 +144,10 @@ installation layout of compatible version. The following plugins from CodePlay a
 Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
 <https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_
 
+Builds for CUDA and AMD devices internally use SYCL alias targets that are passed to the compiler.
+A full list of available SYCL alias targets is available in the
+`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
+
 CUDA build
 ~~~~~~~~~~
 
@@ -170,9 +174,6 @@ A complete mapping between NVIDIA GPU models and their respective
 Compute Capabilities can be found in the official
 `CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
 
-A full list of available SYCL alias targets is available in the
-`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
-
 AMD build
 ~~~~~~~~~
 

From 117f6a5f4db4ca8d377a4cbeda541a9764588121 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 11 Jun 2025 04:03:22 -0700
Subject: [PATCH 08/11] Apply remarks

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5cc9df99ff9c..33bd9efd47cc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,7 +82,6 @@ or to a specific architecture like sm_80."
 set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
 
 set(_dpnp_sycl_targets)
-set(_dpnp_cuda_arch)
 set(_use_onemkl_interfaces OFF)
 set(_use_onemkl_interfaces_cuda OFF)
 set(_use_onemkl_interfaces_hip OFF)
@@ -92,6 +91,7 @@ set(_dpnp_sycl_target_link_options)
 
 if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
     if (DPNP_TARGET_CUDA)
+        set(_dpnp_cuda_arch)
         if(DPNP_TARGET_CUDA MATCHES "^sm_")
             set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA})
         elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")

From 2a03eba7a5193207792c8fed4a51c1cab3751cdb Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 11 Jun 2025 04:16:28 -0700
Subject: [PATCH 09/11] Update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c8524f180594..debdf09a756a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+* Added support for selecting CUDA architecture via the `--target-cuda` option using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478)
+
 ### Changed
 
 ### Fixed

From 39b62e2449306c7f52dcc36a20902be22315767e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 12 Jun 2025 03:40:25 -0700
Subject: [PATCH 10/11] Apply remarks

---
 CHANGELOG.md              | 2 +-
 doc/quick_start_guide.rst | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index debdf09a756a..dc2497489e54 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-* Added support for selecting CUDA architecture via the `--target-cuda` option using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478)
+* Added `--target-cuda[=ARCH]` option to replace the deprecated `--target=cuda`, allowing users to build for CUDA devices with optional architecture selection using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478)
 
 ### Changed
 
diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 59f33add03db..27fbf9044ebe 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -165,8 +165,8 @@ To use the default architecture (``sm_50``), run:
 
     python scripts/build_locally.py --target-cuda
 
-Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider
-range of architectures, but limiting the usage of more recent CUDA features.
+Note that kernels are built for the default architecture (``sm_50``), allowing them to work on a
+wider range of architectures, but limiting the usage of more recent CUDA features.
 
 For reference, compute architecture strings like ``sm_80`` correspond to specific
 CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
@@ -203,8 +203,8 @@ For example:
 Multi-target build
 ~~~~~~~~~~~~~~~~~~
 
-It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
-architecture all at once:
+By default, building from source enables support for Intel devices.
+However, the build can be extended to support both CUDA and AMD devices simultaneously:
 
 .. code-block:: bash
 

From b335fa47ac00c6df3df12b02f606792881ef6077 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 16 Jun 2025 03:01:08 -0700
Subject: [PATCH 11/11] Update Multi-target build doc

---
 doc/quick_start_guide.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 27fbf9044ebe..497ff7027f3e 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -203,8 +203,10 @@ For example:
 Multi-target build
 ~~~~~~~~~~~~~~~~~~
 
-By default, building from source enables support for Intel devices.
-However, the build can be extended to support both CUDA and AMD devices simultaneously:
+The default ``dpnp`` build from the source enables support of Intel devices only.
+Extending the build with a custom SYCL target additionally enables support of CUDA or AMD
+device in ``dpnp``. Besides, the support can be also extended to enable both CUDA and AMD
+devices at the same time:
 
 .. code-block:: bash