Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

* Added `--target-cuda[=ARCH]` option to replace the deprecated `--target=cuda`, allowing users to build for CUDA devices with optional architecture selection using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478)

### Changed

* Adjusted the `pre-commit` configuration to run autoupdate weekly [#2479](https://github.com/IntelPython/dpnp/pull/2479)
Expand Down
28 changes: 21 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,17 @@ find_package(Dpctl REQUIRED)
message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})

option(DPNP_TARGET_CUDA
"Build DPNP to target CUDA devices"
OFF
)
option(DPNP_USE_ONEMKL_INTERFACES
"Build DPNP with oneMKL Interfaces"
OFF
)
set(DPNP_TARGET_CUDA
""
CACHE STRING
"Build DPNP to target CUDA device. \
Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
or to a specific architecture like sm_80."
)
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")

set(_dpnp_sycl_targets)
Expand All @@ -87,8 +90,19 @@ set(_dpnp_sycl_target_compile_options)
set(_dpnp_sycl_target_link_options)

if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
if(DPNP_TARGET_CUDA)
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
if (DPNP_TARGET_CUDA)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not OFF by default now. Should this be updated?

Suggested change
if (DPNP_TARGET_CUDA)
if (NOT "x${DPNP_TARGET_CUDA}" STREQUAL "x")

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The empty string is False for this check if (DPNP_TARGET_CUDA)
I added this check in case when DPNP_TARGET_CUDA is passed as 0, OFF, NO, FALSE, N via cmake-opts argument

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in case when DPNP_TARGET_CUDA is passed as 0, OFF, NO, FALSE, N via cmake-opts argument

That is not the case when DPNP_TARGET_CUDA passed as an empty string. So it's still unclear for me.
Per my understanding the string can't be empty due to the check.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right that --target-cuda= is checked in build_locally.py.
But if someone bypasses it via --cmake-opts="-DDPNP_TARGET_CUDA=" the empty string is still evaluated as FALSE in if(DPNP_TARGET_CUDA). Thus this condition safely handles both cases.
Using if (NOT "x${DPNP_TARGET_CUDA}" STREQUAL "x") would only check for non-empty strings but still treat values like OFF or 0 as TRUE

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it sounds reasonable.
But what is about similar flag for AMD build? Why don't we check the same there then?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@antonwolfy it will be updated in the next PR.
Thank you

set(_dpnp_cuda_arch)
if(DPNP_TARGET_CUDA MATCHES "^sm_")
set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA})
elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
set(_dpnp_cuda_arch "sm_50")
else()
message(FATAL_ERROR
"Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". "
"Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
)
endif()
set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
endif()

Expand All @@ -104,7 +118,7 @@ if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
else()
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})

if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda")
if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)")
set(_use_onemkl_interfaces_cuda ON)
endif()

Expand Down
43 changes: 37 additions & 6 deletions doc/quick_start_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,40 @@ installation layout of compatible version. The following plugins from CodePlay a
Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
<https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_

``dpnp`` can be built for CUDA devices as follows:
Builds for CUDA and AMD devices internally use SYCL alias targets that are passed to the compiler.
A full list of available SYCL alias targets is available in the
`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.

CUDA build
~~~~~~~~~~

To build for CUDA devices, use the ``--target-cuda`` argument.

To target a specific architecture (e.g., ``sm_80``):

.. code-block:: bash
python scripts/build_locally.py --target-cuda=sm_80
To use the default architecture (``sm_50``), run:

.. code-block:: bash
python scripts/build_locally.py --target=cuda
python scripts/build_locally.py --target-cuda
Note that kernels are built for the default architecture (``sm_50``), allowing them to work on a
wider range of architectures, but limiting the usage of more recent CUDA features.

For reference, compute architecture strings like ``sm_80`` correspond to specific
CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
A complete mapping between NVIDIA GPU models and their respective
Compute Capabilities can be found in the official
`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.

AMD build
~~~~~~~~~

And for AMD devices:
To build for AMD devices, use the ``--target-hip=<arch>`` argument:

.. code-block:: bash
Expand All @@ -173,13 +200,17 @@ For example:
.. code-block:: bash
python scripts/build_locally.py --target-hip=gfx90a
Multi-target build
~~~~~~~~~~~~~~~~~~

It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
architecture all at once:
The default ``dpnp`` build from the source enables support of Intel devices only.
Extending the build with a custom SYCL target additionally enables support of CUDA or AMD
device in ``dpnp``. Besides, the support can be also extended to enable both CUDA and AMD
devices at the same time:

.. code-block:: bash
python scripts/build_locally.py --target=cuda --target-hip=gfx90a
python scripts/build_locally.py --target-cuda --target-hip=gfx90a
Testing
Expand Down
28 changes: 16 additions & 12 deletions scripts/build_locally.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def run(
cmake_executable=None,
verbose=False,
cmake_opts="",
target="intel",
target_cuda=None,
target_hip=None,
onemkl_interfaces=False,
onemkl_interfaces_dir=None,
Expand Down Expand Up @@ -98,12 +98,14 @@ def run(
if "DPL_ROOT" in os.environ:
os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]

if not target.strip():
target = "intel"

if target == "cuda":
if target_cuda is not None:
if not target_cuda.strip():
raise ValueError(
"--target-cuda can not be an empty string. "
"Use --target-cuda=<arch> or --target-cuda"
)
cmake_args += [
"-DDPNP_TARGET_CUDA=ON",
f"-DDPNP_TARGET_CUDA={target_cuda}",
]
# Always builds using oneMKL interfaces for the cuda target
onemkl_interfaces = True
Expand All @@ -129,7 +131,7 @@ def run(
f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}",
]
elif onemkl_interfaces_dir:
RuntimeError("--onemkl-interfaces-dir option is not supported")
raise RuntimeError("--onemkl-interfaces-dir option is not supported")

subprocess.check_call(
cmake_args, shell=False, cwd=setup_dir, env=os.environ
Expand Down Expand Up @@ -186,10 +188,12 @@ def run(
type=str,
)
driver.add_argument(
"--target",
help="Target backend for build",
dest="target",
default="intel",
"--target-cuda",
nargs="?",
const="ON",
help="Enable CUDA target for build; "
"optionally specify architecture (e.g., --target-cuda=sm_80)",
default=None,
type=str,
)
driver.add_argument(
Expand Down Expand Up @@ -265,7 +269,7 @@ def run(
cmake_executable=args.cmake_executable,
verbose=args.verbose,
cmake_opts=args.cmake_opts,
target=args.target,
target_cuda=args.target_cuda,
target_hip=args.target_hip,
onemkl_interfaces=args.onemkl_interfaces,
onemkl_interfaces_dir=args.onemkl_interfaces_dir,
Expand Down
Loading