From b6ed7f6b5481af6f704525d44edfe0bb726acfda Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 10 Jun 2025 11:12:33 -0700 Subject: [PATCH 01/11] Add sm_* offload arch support to DPNP_TARGE T_CUDA --- CMakeLists.txt | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d1bd4fc4ae8..5cc9df99ff9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,17 +68,21 @@ find_package(Dpctl REQUIRED) message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR}) message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR}) -option(DPNP_TARGET_CUDA - "Build DPNP to target CUDA devices" - OFF -) option(DPNP_USE_ONEMKL_INTERFACES "Build DPNP with oneMKL Interfaces" OFF ) +set(DPNP_TARGET_CUDA + "" + CACHE STRING + "Build DPNP to target CUDA device. \ +Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \ +or to a specific architecture like sm_80." +) set(HIP_TARGETS "" CACHE STRING "HIP architecture for target") set(_dpnp_sycl_targets) +set(_dpnp_cuda_arch) set(_use_onemkl_interfaces OFF) set(_use_onemkl_interfaces_cuda OFF) set(_use_onemkl_interfaces_hip OFF) @@ -87,8 +91,18 @@ set(_dpnp_sycl_target_compile_options) set(_dpnp_sycl_target_link_options) if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x") - if(DPNP_TARGET_CUDA) - set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + if (DPNP_TARGET_CUDA) + if(DPNP_TARGET_CUDA MATCHES "^sm_") + set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA}) + elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$") + set(_dpnp_cuda_arch "sm_50") + else() + message(FATAL_ERROR + "Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". " + "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'." + ) + endif() + set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown") set(_use_onemkl_interfaces_cuda ON) endif() @@ -104,7 +118,7 @@ if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x") else() set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS}) - if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda") + if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)") set(_use_onemkl_interfaces_cuda ON) endif() From 25bf7b9787cfa0be1e286972a88903766a96b133 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 10 Jun 2025 11:51:21 -0700 Subject: [PATCH 02/11] Enable CUDA architecture selection via --target-cuda --- scripts/build_locally.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/scripts/build_locally.py b/scripts/build_locally.py index 3403f98304eb..2ac4120998ad 100644 --- a/scripts/build_locally.py +++ b/scripts/build_locally.py @@ -38,7 +38,7 @@ def run( cmake_executable=None, verbose=False, cmake_opts="", - target="intel", + target_cuda=None, target_hip=None, onemkl_interfaces=False, onemkl_interfaces_dir=None, @@ -98,12 +98,14 @@ def run( if "DPL_ROOT" in os.environ: os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"] - if not target.strip(): - target = "intel" - - if target == "cuda": + if target_cuda is not None: + if not target_cuda.strip(): + raise ValueError( + "--target-cuda can not be an empty string. " + "Use --target-cuda= or --target-cuda" + ) cmake_args += [ - "-DDPNP_TARGET_CUDA=ON", + f"-DDPNP_TARGET_CUDA={target_cuda}", ] # Always builds using oneMKL interfaces for the cuda target onemkl_interfaces = True @@ -186,10 +188,12 @@ def run( type=str, ) driver.add_argument( - "--target", - help="Target backend for build", - dest="target", - default="intel", + "--target-cuda", + nargs="?", + const="ON", + help="Enable CUDA target for build; " + "optionally specify architecture (e.g., sm_80)", + default=None, type=str, ) driver.add_argument( @@ -265,7 +269,7 @@ def run( cmake_executable=args.cmake_executable, verbose=args.verbose, cmake_opts=args.cmake_opts, - target=args.target, + target_cuda=args.target_cuda, target_hip=args.target_hip, onemkl_interfaces=args.onemkl_interfaces, onemkl_interfaces_dir=args.onemkl_interfaces_dir, From b0bd17cc1701697522fe78ca3012ae3e49afd7b8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 10 Jun 2025 11:52:52 -0700 Subject: [PATCH 03/11] Raise RuntimeError if onemkl_interfaces_dir passed --- scripts/build_locally.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_locally.py b/scripts/build_locally.py index 2ac4120998ad..de66651e6d6d 100644 --- a/scripts/build_locally.py +++ b/scripts/build_locally.py @@ -131,7 +131,7 @@ def run( f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}", ] elif onemkl_interfaces_dir: - RuntimeError("--onemkl-interfaces-dir option is not supported") + raise RuntimeError("--onemkl-interfaces-dir option is not supported") subprocess.check_call( cmake_args, shell=False, cwd=setup_dir, env=os.environ From e0dae0edbc945525421d3774320721eff585be01 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 10 Jun 2025 11:55:21 -0700 Subject: [PATCH 04/11] Clarify --target-cuda help message --- scripts/build_locally.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_locally.py b/scripts/build_locally.py index de66651e6d6d..3902e4eb4733 100644 --- a/scripts/build_locally.py +++ b/scripts/build_locally.py @@ -192,7 +192,7 @@ def run( nargs="?", const="ON", help="Enable CUDA target for build; " - "optionally specify architecture (e.g., sm_80)", + "optionally specify architecture (e.g., --target-cuda=sm_80)", default=None, type=str, ) From c670477f16877004fde6e96a962eacf92af5acb6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 10 Jun 2025 12:17:06 -0700 Subject: [PATCH 05/11] Update CUDA build docs --- doc/quick_start_guide.rst | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 0e6f9dca74e2..9c18e608f84e 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -144,13 +144,33 @@ installation layout of compatible version. The following plugins from CodePlay a Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets. `_ -``dpnp`` can be built for CUDA devices as follows: +``dpnp`` can be built for CUDA devices using the ``--target-cuda`` argument. + +To target a specific architecture (e.g., ``sm_80``): + +.. code-block:: bash + + python scripts/build_locally.py --target-cuda=sm_80 + +To use the default architecture (``sm_50``), run: .. code-block:: bash - python scripts/build_locally.py --target=cuda + python scripts/build_locally.py --target-cuda + +Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider +range of architectures, but limiting the usage of more recent CUDA features. + +For reference, compute architecture strings like ``sm_80`` correspond to specific +CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``). +A complete mapping between NVIDIA GPU models and their respective +Compute Capabilities can be found in the official +`CUDA GPU Compute Capability `_ documentation. + +A full list of available SYCL alias targets is available in the +`DPC++ Compiler User Manual `_. -And for AMD devices: +To build for AMD devices, use: .. code-block:: bash @@ -179,7 +199,7 @@ architecture all at once: .. code-block:: bash - python scripts/build_locally.py --target=cuda --target-hip=gfx90a + python scripts/build_locally.py --target-cuda --target-hip=gfx90a Testing From dbdd077b3347ae23b560f89994435d6980bbc140 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 11 Jun 2025 03:38:09 -0700 Subject: [PATCH 06/11] Add CUDA and AMD build subchapters to docs --- doc/quick_start_guide.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 9c18e608f84e..8aa8c6819b21 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -144,7 +144,10 @@ installation layout of compatible version. The following plugins from CodePlay a Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets. `_ -``dpnp`` can be built for CUDA devices using the ``--target-cuda`` argument. +CUDA build +~~~~~~~~~~ + +To build for CUDA devices, use the ``--target-cuda`` argument. To target a specific architecture (e.g., ``sm_80``): @@ -170,7 +173,10 @@ Compute Capabilities can be found in the official A full list of available SYCL alias targets is available in the `DPC++ Compiler User Manual `_. -To build for AMD devices, use: +AMD build +~~~~~~~~~ + +To build for AMD devices, use the ``--target-hip=`` argument: .. code-block:: bash @@ -193,6 +199,8 @@ For example: .. code-block:: bash python scripts/build_locally.py --target-hip=gfx90a +Multi-target build +~~~~~~~~~~~~~~~~~~ It is, however, possible to build for Intel devices, CUDA devices, and an AMD device architecture all at once: From b08c0e5cad7539c92f93ec3ad7e750124ad3d871 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 11 Jun 2025 04:02:45 -0700 Subject: [PATCH 07/11] Clarify SYCL alias target usage --- doc/quick_start_guide.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 8aa8c6819b21..59f33add03db 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -144,6 +144,10 @@ installation layout of compatible version. The following plugins from CodePlay a Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets. `_ +Builds for CUDA and AMD devices internally use SYCL alias targets that are passed to the compiler. +A full list of available SYCL alias targets is available in the +`DPC++ Compiler User Manual `_. + CUDA build ~~~~~~~~~~ @@ -170,9 +174,6 @@ A complete mapping between NVIDIA GPU models and their respective Compute Capabilities can be found in the official `CUDA GPU Compute Capability `_ documentation. -A full list of available SYCL alias targets is available in the -`DPC++ Compiler User Manual `_. - AMD build ~~~~~~~~~ From 117f6a5f4db4ca8d377a4cbeda541a9764588121 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 11 Jun 2025 04:03:22 -0700 Subject: [PATCH 08/11] Apply remarks --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cc9df99ff9c..33bd9efd47cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,7 +82,6 @@ or to a specific architecture like sm_80." set(HIP_TARGETS "" CACHE STRING "HIP architecture for target") set(_dpnp_sycl_targets) -set(_dpnp_cuda_arch) set(_use_onemkl_interfaces OFF) set(_use_onemkl_interfaces_cuda OFF) set(_use_onemkl_interfaces_hip OFF) @@ -92,6 +91,7 @@ set(_dpnp_sycl_target_link_options) if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x") if (DPNP_TARGET_CUDA) + set(_dpnp_cuda_arch) if(DPNP_TARGET_CUDA MATCHES "^sm_") set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA}) elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$") From 2a03eba7a5193207792c8fed4a51c1cab3751cdb Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 11 Jun 2025 04:16:28 -0700 Subject: [PATCH 09/11] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8524f180594..debdf09a756a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* Added support for selecting CUDA architecture via the `--target-cuda` option using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478) + ### Changed ### Fixed From 39b62e2449306c7f52dcc36a20902be22315767e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 12 Jun 2025 03:40:25 -0700 Subject: [PATCH 10/11] Apply remarks --- CHANGELOG.md | 2 +- doc/quick_start_guide.rst | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index debdf09a756a..dc2497489e54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -* Added support for selecting CUDA architecture via the `--target-cuda` option using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478) +* Added `--target-cuda[=ARCH]` option to replace the deprecated `--target=cuda`, allowing users to build for CUDA devices with optional architecture selection using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478) ### Changed diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 59f33add03db..27fbf9044ebe 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -165,8 +165,8 @@ To use the default architecture (``sm_50``), run: python scripts/build_locally.py --target-cuda -Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider -range of architectures, but limiting the usage of more recent CUDA features. +Note that kernels are built for the default architecture (``sm_50``), allowing them to work on a +wider range of architectures, but limiting the usage of more recent CUDA features. For reference, compute architecture strings like ``sm_80`` correspond to specific CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``). @@ -203,8 +203,8 @@ For example: Multi-target build ~~~~~~~~~~~~~~~~~~ -It is, however, possible to build for Intel devices, CUDA devices, and an AMD device -architecture all at once: +By default, building from source enables support for Intel devices. +However, the build can be extended to support both CUDA and AMD devices simultaneously: .. code-block:: bash From b335fa47ac00c6df3df12b02f606792881ef6077 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 16 Jun 2025 03:01:08 -0700 Subject: [PATCH 11/11] Update Multi-target build doc --- doc/quick_start_guide.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 27fbf9044ebe..497ff7027f3e 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -203,8 +203,10 @@ For example: Multi-target build ~~~~~~~~~~~~~~~~~~ -By default, building from source enables support for Intel devices. -However, the build can be extended to support both CUDA and AMD devices simultaneously: +The default ``dpnp`` build from the source enables support of Intel devices only. +Extending the build with a custom SYCL target additionally enables support of CUDA or AMD +device in ``dpnp``. Besides, the support can be also extended to enable both CUDA and AMD +devices at the same time: .. code-block:: bash