Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 30 additions & 18 deletions .github/workflows/build-cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,53 @@ name: Build CUDA

on:
workflow_call:
inputs:
cuda-version:
description: 'CUDA version (e.g. 12.8, 13.1)'
required: true
type: string
python-version:
description: 'Python version (e.g. 3.10, 3.12)'
required: true
type: string
torch-spec:
description: 'PyTorch install spec (e.g. --pre torch --extra-index-url ...)'
required: true
type: string
artifact-name:
description: 'Name for the uploaded wheel artifact'
required: true
type: string
docker-image:
description: 'Docker image to use for the build'
required: true
type: string

concurrency:
group: build-cuda-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
group: build-cuda-${{ inputs.cuda-version }}-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true

jobs:
build-cuda:
name: Build CUDA (cuda12.8)
name: Build CUDA (cuda${{ inputs.cuda-version }}-py${{ inputs.python-version }})
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
strategy:
fail-fast: true
matrix:
python-version: ['3.10', '3.12']
include:
- name: 4xlargegpu
runs-on: linux.g5.4xlarge.nvidia.gpu
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu128'
gpu-arch-type: "cuda"
gpu-arch-version: "12.8"
with:
timeout: 60
runner: ${{ matrix.runs-on }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: ${{ inputs.cuda-version }}
docker-image: ${{ inputs.docker-image }}
submodules: recursive
upload-artifact: monarch-cuda-${{ github.sha }}-py${{ matrix.python-version }}
upload-artifact: ${{ inputs.artifact-name }}
script: |
# Source common setup functions
source scripts/common-setup.sh

# Setup build environment (conda + system deps + rust + build deps)
setup_build_environment ${{ matrix.python-version }}
setup_build_environment ${{ inputs.python-version }}

# Install torch nightly
pip install ${{ matrix.torch-spec }}
pip install ${{ inputs.torch-spec }}
pip install -r build-requirements.txt

# Setup Tensor Engine
Expand Down
79 changes: 68 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,35 @@ concurrency:
cancel-in-progress: true

jobs:
build-cuda:
name: Build CUDA
build-cuda-12-8-py3-10:
name: Build CUDA 12.8 / py3.10
uses: ./.github/workflows/build-cuda.yml
with:
cuda-version: '12.8'
python-version: '3.10'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu128'
artifact-name: monarch-cuda12.8-${{ github.sha }}-py3.10
docker-image: 'pytorch/almalinux-builder:cuda12.8'

build-cuda-12-8-py3-12:
name: Build CUDA 12.8 / py3.12
uses: ./.github/workflows/build-cuda.yml
with:
cuda-version: '12.8'
python-version: '3.12'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu128'
artifact-name: monarch-cuda12.8-${{ github.sha }}-py3.12
docker-image: 'pytorch/almalinux-builder:cuda12.8'

build-cuda-13-0-py3-10:
name: Build CUDA 13.0 / py3.10
uses: ./.github/workflows/build-cuda.yml
with:
cuda-version: '13.0'
python-version: '3.10'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu130'
artifact-name: monarch-cuda13.0-${{ github.sha }}-py3.10
docker-image: 'pytorch/almalinux-builder:cuda13.0'

build-rocm:
name: Build ROCm
Expand All @@ -35,11 +61,28 @@ jobs:
artifact-name: monarch-cpu-${{ github.sha }}-py3.10

test-gpu-python:
name: Test GPU Python
needs: build-cuda
name: Test GPU Python (cuda${{ matrix.cuda-version }}-py${{ matrix.python-version }})
needs: [build-cuda-12-8-py3-10, build-cuda-13-0-py3-10]
strategy:
fail-fast: true
matrix:
python-version: ['3.10']
cuda-version: ['12.8', '13.0']
include:
- cuda-version: '12.8'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu128'
docker-image: 'pytorch/almalinux-builder:cuda12.8'
- cuda-version: '13.0'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu130'
docker-image: 'pytorch/almalinux-builder:cuda13.0'
uses: ./.github/workflows/test-gpu-python.yml
with:
artifact-name: monarch-cuda-${{ github.sha }}-py3.10
artifact-name: monarch-cuda${{ matrix.cuda-version }}-${{ github.sha }}-py${{ matrix.python-version }}
torch-spec: ${{ matrix.torch-spec }}
gpu-arch-type: cuda
gpu-arch-version: ${{ matrix.cuda-version }}
python-version: ${{ matrix.python-version }}
docker-image: ${{ matrix.docker-image }}

test-cpu-rust:
name: Test CPU Rust
Expand All @@ -53,19 +96,33 @@ jobs:
docker-image: 'pytorch/manylinuxaarch64-builder:cuda12.8'

test-gpu-rust:
name: Test GPU Rust
needs: build-cuda
name: Test GPU Rust (cuda${{ matrix.cuda-version }}-py${{ matrix.python-version }})
strategy:
fail-fast: true
matrix:
python-version: ['3.10']
cuda-version: ['12.8', '13.0']
include:
- cuda-version: '12.8'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu128'
docker-image: 'pytorch/almalinux-builder:cuda12.8'
- cuda-version: '13.0'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu130'
docker-image: 'pytorch/almalinux-builder:cuda13.0'
uses: ./.github/workflows/test-gpu-rust.yml
with:
artifact-name: monarch-cuda-${{ github.sha }}-py3.10
torch-spec: ${{ matrix.torch-spec }}
gpu-arch-type: cuda
gpu-arch-version: ${{ matrix.cuda-version }}
python-version: ${{ matrix.python-version }}
docker-image: ${{ matrix.docker-image }}

build-docker:
name: Build Docker image
needs: build-cuda
needs: build-cuda-12-8-py3-12
uses: ./.github/workflows/build-docker.yml
with:
# Docker image requires python 3.12
artifact-name: monarch-cuda-${{ github.sha }}-py3.12
artifact-name: monarch-cuda12.8-${{ github.sha }}-py3.12

status-check:
name: Status Check
Expand Down
40 changes: 26 additions & 14 deletions .github/workflows/test-gpu-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,41 @@ on:
description: 'Wheel artifact name from build workflow'
required: true
type: string
torch-spec:
description: 'PyTorch install spec (e.g. --pre torch --extra-index-url ...)'
required: true
type: string
gpu-arch-type:
description: 'GPU architecture type (e.g. cuda)'
required: true
type: string
gpu-arch-version:
description: 'GPU architecture version (e.g. 12.8, 13.1)'
required: true
type: string
python-version:
description: 'Python version (e.g. 3.10, 3.12)'
required: true
type: string
docker-image:
description: 'Docker image to use for the test'
required: true
type: string

concurrency:
group: test-gpu-python-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true

jobs:
test-gpu-python:
name: Test GPU Python (cuda12.8-py3.10)
name: Test GPU Python (cuda${{ inputs.gpu-arch-version }}-py${{ inputs.python-version }})
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
strategy:
fail-fast: true
matrix:
include:
- name: 4xlargegpu
runs-on: linux.g5.4xlarge.nvidia.gpu
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu128'
gpu-arch-type: "cuda"
gpu-arch-version: "12.8"
with:
timeout: 120
runner: ${{ matrix.runs-on }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: ${{ inputs.gpu-arch-type }}
gpu-arch-version: ${{ inputs.gpu-arch-version }}
docker-image: ${{ inputs.docker-image }}
submodules: recursive
download-artifact: ${{ inputs.artifact-name }}
script: |
Expand All @@ -51,7 +63,7 @@ jobs:

# Install torch nightly before installing the wheel,
# so that we can test the wheel against the latest nightly
pip install ${{ matrix.torch-spec }}
pip install ${{ inputs.torch-spec }}

# Install the built wheel from artifact
install_wheel_from_artifact
Expand Down
41 changes: 24 additions & 17 deletions .github/workflows/test-gpu-rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,24 @@ name: Test GPU Rust
on:
workflow_call:
inputs:
artifact-name:
description: 'Wheel artifact name from build workflow'
torch-spec:
description: 'PyTorch install spec (e.g. --pre torch --extra-index-url ...)'
required: true
type: string
gpu-arch-type:
description: 'GPU architecture type (e.g. cuda)'
required: true
type: string
gpu-arch-version:
description: 'GPU architecture version (e.g. 12.8, 13.1)'
required: true
type: string
python-version:
description: 'Python version (e.g. 3.10, 3.12)'
required: true
type: string
docker-image:
description: 'Docker image to use for the test'
required: true
type: string

Expand All @@ -14,24 +30,15 @@ concurrency:

jobs:
test-gpu-rust:
name: Test GPU Rust (cuda12.8)
name: Test GPU Rust (cuda${{ inputs.gpu-arch-version }}-py${{ inputs.python-version }})
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
strategy:
fail-fast: true
matrix:
include:
- name: 4xlargegpu
runs-on: linux.g5.4xlarge.nvidia.gpu
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu128'
gpu-arch-type: "cuda"
gpu-arch-version: "12.8"
with:
timeout: 120
runner: ${{ matrix.runs-on }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: ${{ inputs.gpu-arch-type }}
gpu-arch-version: ${{ inputs.gpu-arch-version }}
docker-image: ${{ inputs.docker-image }}
submodules: recursive
download-artifact: ${{ inputs.artifact-name }}
script: |
# Source common setup functions
source scripts/common-setup.sh
Expand All @@ -56,7 +63,7 @@ jobs:
setup_cuda_environment

# Setup PyTorch with C++ headers using common-setup utility
setup_pytorch_with_headers "${{ matrix.gpu-arch-version }}" "${{ matrix.torch-spec }}"
setup_pytorch_with_headers "${{ inputs.gpu-arch-version }}" "${{ inputs.torch-spec }}"

# Run GPU Rust tests
echo "Running OSS Rust tests..."
Expand Down
6 changes: 6 additions & 0 deletions rdmaxcel-sys/src/driver_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@
#define SYM_DEVICE_GET cuDeviceGet
#define SYM_DEVICE_GET_COUNT cuDeviceGetCount
#define SYM_DEVICE_GET_ATTRIBUTE cuDeviceGetAttribute
// CUDA 13.x removed cuCtxCreate_v2 from headers, but libcuda.so still
// exports it for backward compatibility. Provide our own declaration so
// decltype and STRINGIFY resolve correctly.
#if CUDA_VERSION >= 13000
CUresult CUDAAPI cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
#endif
#define SYM_CTX_CREATE cuCtxCreate_v2
#define SYM_DEVICE_PRIMARY_CTX_RETAIN cuDevicePrimaryCtxRetain
#define SYM_CTX_SET_CURRENT cuCtxSetCurrent
Expand Down
19 changes: 12 additions & 7 deletions scripts/common-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,19 +139,24 @@ setup_pytorch_with_headers() {
local cuda_version_short=$(echo "${gpu_arch_version}" | tr -d '.')
local libtorch_url="https://download.pytorch.org/libtorch/nightly/cu${cuda_version_short}/libtorch-cxx11-abi-shared-with-deps-latest.zip"

# Install PyTorch Python package first (needed as fallback for headers)
echo "Installing PyTorch Python package with: ${torch_spec}"
pip install ${torch_spec}

echo "Downloading libtorch from: ${libtorch_url}"
wget -q "${libtorch_url}"
unzip -q "libtorch-cxx11-abi-shared-with-deps-latest.zip"
if wget -q "${libtorch_url}" && unzip -q "libtorch-cxx11-abi-shared-with-deps-latest.zip"; then
export LIBTORCH_ROOT="$PWD/libtorch"
else
# Libtorch zip not available (e.g. newer CUDA versions); fall back to
# the pip-installed torch package which includes C++ headers and libs.
echo "Libtorch download unavailable, using pip-installed torch for C++ headers"
export LIBTORCH_ROOT=$(python -c "import torch; print(torch.utils.cmake_prefix_path)")/../../
fi

# Set environment variables for libtorch
export LIBTORCH_ROOT="$PWD/libtorch"
export LD_LIBRARY_PATH="$LIBTORCH_ROOT/lib:${LD_LIBRARY_PATH:-}"
export CMAKE_PREFIX_PATH="$LIBTORCH_ROOT:${CMAKE_PREFIX_PATH:-}"

# Install PyTorch Python package using provided torch-spec
echo "Installing PyTorch Python package with: ${torch_spec}"
pip install ${torch_spec}

# Verify installation
echo "LibTorch C++ headers available at: $LIBTORCH_ROOT/include"
if [[ -d "$LIBTORCH_ROOT/include/torch/csrc/api/include/torch" ]]; then
Expand Down
Loading