diff --git a/CHANGELOG.md b/CHANGELOG.md index bf3b51e717a..26e9d091c0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* Added `--target-cuda[=ARCH]` option to replace the deprecated `--target=cuda`, allowing users to build for CUDA devices with optional architecture selection using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478) + ### Changed * Adjusted the `pre-commit` configuration to run autoupdate weekly [#2479](https://github.com/IntelPython/dpnp/pull/2479) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d1bd4fc4ae..33bd9efd47c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,14 +68,17 @@ find_package(Dpctl REQUIRED) message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR}) message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR}) -option(DPNP_TARGET_CUDA - "Build DPNP to target CUDA devices" - OFF -) option(DPNP_USE_ONEMKL_INTERFACES "Build DPNP with oneMKL Interfaces" OFF ) +set(DPNP_TARGET_CUDA + "" + CACHE STRING + "Build DPNP to target CUDA device. \ +Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \ +or to a specific architecture like sm_80." +) set(HIP_TARGETS "" CACHE STRING "HIP architecture for target") set(_dpnp_sycl_targets) @@ -87,8 +90,19 @@ set(_dpnp_sycl_target_compile_options) set(_dpnp_sycl_target_link_options) if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x") - if(DPNP_TARGET_CUDA) - set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + if (DPNP_TARGET_CUDA) + set(_dpnp_cuda_arch) + if(DPNP_TARGET_CUDA MATCHES "^sm_") + set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA}) + elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$") + set(_dpnp_cuda_arch "sm_50") + else() + message(FATAL_ERROR + "Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". " + "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'." + ) + endif() + set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown") set(_use_onemkl_interfaces_cuda ON) endif() @@ -104,7 +118,7 @@ if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x") else() set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS}) - if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda") + if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)") set(_use_onemkl_interfaces_cuda ON) endif() diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 0e6f9dca74e..497ff7027f3 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -144,13 +144,40 @@ installation layout of compatible version. The following plugins from CodePlay a Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets. `_ -``dpnp`` can be built for CUDA devices as follows: +Builds for CUDA and AMD devices internally use SYCL alias targets that are passed to the compiler. +A full list of available SYCL alias targets is available in the +`DPC++ Compiler User Manual `_. + +CUDA build +~~~~~~~~~~ + +To build for CUDA devices, use the ``--target-cuda`` argument. + +To target a specific architecture (e.g., ``sm_80``): + +.. code-block:: bash + + python scripts/build_locally.py --target-cuda=sm_80 + +To use the default architecture (``sm_50``), run: .. code-block:: bash - python scripts/build_locally.py --target=cuda + python scripts/build_locally.py --target-cuda + +Note that kernels are built for the default architecture (``sm_50``), allowing them to work on a +wider range of architectures, but limiting the usage of more recent CUDA features. + +For reference, compute architecture strings like ``sm_80`` correspond to specific +CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``). +A complete mapping between NVIDIA GPU models and their respective +Compute Capabilities can be found in the official +`CUDA GPU Compute Capability `_ documentation. + +AMD build +~~~~~~~~~ -And for AMD devices: +To build for AMD devices, use the ``--target-hip=`` argument: .. code-block:: bash @@ -173,13 +200,17 @@ For example: .. code-block:: bash python scripts/build_locally.py --target-hip=gfx90a +Multi-target build +~~~~~~~~~~~~~~~~~~ -It is, however, possible to build for Intel devices, CUDA devices, and an AMD device -architecture all at once: +The default ``dpnp`` build from the source enables support of Intel devices only. +Extending the build with a custom SYCL target additionally enables support of CUDA or AMD +device in ``dpnp``. Besides, the support can be also extended to enable both CUDA and AMD +devices at the same time: .. code-block:: bash - python scripts/build_locally.py --target=cuda --target-hip=gfx90a + python scripts/build_locally.py --target-cuda --target-hip=gfx90a Testing diff --git a/scripts/build_locally.py b/scripts/build_locally.py index 3403f98304e..3902e4eb473 100644 --- a/scripts/build_locally.py +++ b/scripts/build_locally.py @@ -38,7 +38,7 @@ def run( cmake_executable=None, verbose=False, cmake_opts="", - target="intel", + target_cuda=None, target_hip=None, onemkl_interfaces=False, onemkl_interfaces_dir=None, @@ -98,12 +98,14 @@ def run( if "DPL_ROOT" in os.environ: os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"] - if not target.strip(): - target = "intel" - - if target == "cuda": + if target_cuda is not None: + if not target_cuda.strip(): + raise ValueError( + "--target-cuda can not be an empty string. " + "Use --target-cuda= or --target-cuda" + ) cmake_args += [ - "-DDPNP_TARGET_CUDA=ON", + f"-DDPNP_TARGET_CUDA={target_cuda}", ] # Always builds using oneMKL interfaces for the cuda target onemkl_interfaces = True @@ -129,7 +131,7 @@ def run( f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}", ] elif onemkl_interfaces_dir: - RuntimeError("--onemkl-interfaces-dir option is not supported") + raise RuntimeError("--onemkl-interfaces-dir option is not supported") subprocess.check_call( cmake_args, shell=False, cwd=setup_dir, env=os.environ @@ -186,10 +188,12 @@ def run( type=str, ) driver.add_argument( - "--target", - help="Target backend for build", - dest="target", - default="intel", + "--target-cuda", + nargs="?", + const="ON", + help="Enable CUDA target for build; " + "optionally specify architecture (e.g., --target-cuda=sm_80)", + default=None, type=str, ) driver.add_argument( @@ -265,7 +269,7 @@ def run( cmake_executable=args.cmake_executable, verbose=args.verbose, cmake_opts=args.cmake_opts, - target=args.target, + target_cuda=args.target_cuda, target_hip=args.target_hip, onemkl_interfaces=args.onemkl_interfaces, onemkl_interfaces_dir=args.onemkl_interfaces_dir,