Skip to content

Commit 5d743c1

Browse files
committed
ci: Workflows for wheel build
Signed-off-by: oliver könig <[email protected]>
1 parent 4bd5fe2 commit 5d743c1

File tree

3 files changed

+381
-0
lines changed

3 files changed

+381
-0
lines changed

.github/workflows/_build.yml

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
name: ~Build wheel template
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runs-on:
7+
description: "The runner to use for the build"
8+
required: true
9+
type: string
10+
python-version:
11+
description: "The Python version to use for the build"
12+
required: true
13+
type: string
14+
cuda-version:
15+
description: "The CUDA version to use for the build"
16+
required: true
17+
type: string
18+
torch-version:
19+
description: "The PyTorch version to use for the build"
20+
required: true
21+
type: string
22+
cxx11_abi:
23+
description: "The C++11 ABI to use for the build"
24+
required: true
25+
type: string
26+
upload-to-release:
27+
description: "Upload wheel to this release"
28+
required: false
29+
type: boolean
30+
default: false
31+
release-version:
32+
description: "Upload wheel to this release"
33+
required: false
34+
type: string
35+
36+
defaults:
37+
run:
38+
shell: bash -x -e -u -o pipefail {0}
39+
40+
jobs:
41+
build-wheel:
42+
runs-on: ${{ inputs.runs-on }}
43+
name: Build wheel (${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }})
44+
steps:
45+
- name: Checkout
46+
uses: actions/checkout@v4
47+
with:
48+
ref: ${{ inputs.release-version }}
49+
submodules: recursive
50+
51+
- name: Set up Python
52+
uses: actions/setup-python@v5
53+
with:
54+
python-version: ${{ inputs.python-version }}
55+
56+
- name: Set CUDA and PyTorch versions
57+
run: |
58+
echo "MATRIX_CUDA_VERSION=$(echo ${{ inputs.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
59+
echo "MATRIX_TORCH_VERSION=$(echo ${{ inputs.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
60+
echo "WHEEL_CUDA_VERSION=$(echo ${{ inputs.cuda-version }} | awk -F \. {'print $1'})" >> $GITHUB_ENV
61+
echo "MATRIX_PYTHON_VERSION=$(echo ${{ inputs.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
62+
63+
- name: Free up disk space
64+
if: ${{ runner.os == 'Linux' }}
65+
# https://github.com/easimon/maximize-build-space/blob/master/action.yml
66+
# https://github.com/easimon/maximize-build-space/tree/test-report
67+
run: |
68+
sudo rm -rf /usr/share/dotnet
69+
sudo rm -rf /opt/ghc
70+
sudo rm -rf /opt/hostedtoolcache/CodeQL
71+
72+
- name: Set up swap space
73+
if: runner.os == 'Linux'
74+
uses: pierotofy/[email protected]
75+
with:
76+
swap-size-gb: 10
77+
78+
- name: Install CUDA ${{ inputs.cuda-version }}
79+
if: ${{ inputs.cuda-version != 'cpu' }}
80+
uses: Jimver/[email protected]
81+
id: cuda-toolkit
82+
with:
83+
cuda: ${{ inputs.cuda-version }}
84+
linux-local-args: '["--toolkit"]'
85+
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
86+
# method: ${{ (inputs.cuda-version == '11.8.0' || inputs.cuda-version == '12.1.0') && 'network' || 'local' }}
87+
method: "network"
88+
89+
- name: Install additional CUDA libraries
90+
run: |
91+
CUDA_VERSION=$(echo ${{ inputs.cuda-version }} | awk -F \. {'print $1 "-" $2'})
92+
sudo apt-get update
93+
sudo apt-get install -y libcusparse-$CUDA_VERSION libcusolver-$CUDA_VERSION
94+
sudo apt-get clean
95+
96+
- name: Install PyTorch ${{ inputs.torch-version }}+cu${{ inputs.cuda-version }}
97+
run: |
98+
pip install --upgrade pip
99+
# With python 3.13 and torch 2.5.1, unless we update typing-extensions, we get error
100+
# AttributeError: attribute '__default__' of 'typing.ParamSpec' objects is not writable
101+
pip install typing-extensions==4.12.2
102+
# We want to figure out the CUDA version to download pytorch
103+
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
104+
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
105+
# This code is ugly, maybe there's a better way to do this.
106+
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
107+
minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118, '2.8': 126}[env['MATRIX_TORCH_VERSION']]; \
108+
maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128, '2.8': 129}[env['MATRIX_TORCH_VERSION']]; \
109+
print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \
110+
)
111+
if [[ ${{ inputs.torch-version }} == *"dev"* ]]; then
112+
# pip install --no-cache-dir --pre torch==${{ inputs.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
113+
# Can't use --no-deps because we need cudnn etc.
114+
# Hard-coding this version of pytorch-triton for torch 2.6.0.dev20241001
115+
pip install jinja2
116+
pip install https://download.pytorch.org/whl/nightly/pytorch_triton-3.1.0%2Bcf34004b8a-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
117+
pip install --no-cache-dir --pre https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}/torch-${{ inputs.torch-version }}%2Bcu${TORCH_CUDA_VERSION}-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
118+
else
119+
pip install --no-cache-dir torch==${{ inputs.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
120+
fi
121+
nvcc --version
122+
python --version
123+
python -c "import torch; print('PyTorch:', torch.__version__)"
124+
python -c "import torch; print('CUDA:', torch.version.cuda)"
125+
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
126+
127+
- name: Restore build cache
128+
uses: actions/cache/restore@v4
129+
with:
130+
path: build.tar
131+
key: build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}-${{ github.run_number }}-${{ github.run_attempt }}
132+
restore-keys: |
133+
build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}-
134+
135+
- name: Unpack build cache
136+
run: |
137+
echo ::group::Adjust timestamps
138+
sudo find / -exec touch -t 197001010000 {} + || true
139+
echo ::endgroup::
140+
141+
if [ -f build.tar ]; then
142+
find . -mindepth 1 -maxdepth 1 ! -name 'build.tar' -exec rm -rf {} +
143+
tar -xpvf build.tar -C .
144+
else
145+
echo "No build.tar found, skipping"
146+
fi
147+
148+
ls -al ./
149+
ls -al build/ || true
150+
ls -al csrc/ || true
151+
152+
- name: Build wheel
153+
id: build_wheel
154+
run: |
155+
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
156+
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
157+
# However this still fails so I'm using a newer version of setuptools
158+
pip install setuptools==75.8.0
159+
pip install ninja packaging wheel
160+
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
161+
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
162+
# Limit MAX_JOBS otherwise the github runner goes OOM
163+
# nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM
164+
165+
export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2)
166+
export NVCC_THREADS=2
167+
export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
168+
169+
# 5h timeout since GH allows max 6h and we want some buffer
170+
EXIT_CODE=0
171+
timeout 5h python setup.py bdist_wheel --dist-dir=dist || EXIT_CODE=$?
172+
173+
if [ $EXIT_CODE -eq 0 ]; then
174+
tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ inputs.cxx11_abi }}
175+
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
176+
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
177+
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
178+
fi
179+
180+
# Store exit code in GitHub env for later steps
181+
echo "build_exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
182+
183+
# Do not fail the job if timeout killed the build
184+
exit $EXIT_CODE
185+
186+
- name: Log build logs after timeout
187+
if: always() && steps.build_wheel.outputs.build_exit_code == 124
188+
run: |
189+
ls -al ./
190+
tar -cvf build.tar . --atime-preserve=replace
191+
192+
- name: Save build cache timeout
193+
if: always() && steps.build_wheel.outputs.build_exit_code == 124
194+
uses: actions/cache/save@v4
195+
with:
196+
key: build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}-${{ github.run_number }}-${{ github.run_attempt }}
197+
path: build.tar
198+
199+
- name: Log Built Wheels
200+
run: |
201+
ls dist
202+
203+
- name: Get Release with tag
204+
id: get_current_release
205+
uses: joutvhu/get-release@v1
206+
with:
207+
tag_name: ${{ inputs.release-version }}
208+
env:
209+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
210+
211+
- name: Upload Release Asset
212+
id: upload_release_asset
213+
if: inputs.upload-to-release
214+
uses: actions/upload-release-asset@v1
215+
env:
216+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
217+
with:
218+
upload_url: ${{ steps.get_current_release.outputs.upload_url }}
219+
asset_path: ./dist/${{env.wheel_name}}
220+
asset_name: ${{env.wheel_name}}
221+
asset_content_type: application/*

.github/workflows/build.yml

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
name: Build wheels
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runs-on:
7+
description: "The runner to use for the build"
8+
required: true
9+
type: string
10+
default: ubuntu-22.04
11+
python-version:
12+
description: "The Python version to use for the build"
13+
required: true
14+
type: string
15+
cuda-version:
16+
description: "The CUDA version to use for the build"
17+
required: true
18+
type: string
19+
torch-version:
20+
description: "The PyTorch version to use for the build"
21+
required: true
22+
type: string
23+
cxx11_abi:
24+
description: "Enable torch flag C++11 ABI (TRUE/FALSE)"
25+
required: true
26+
type: string
27+
upload-to-release:
28+
description: "Upload wheel to this release"
29+
required: false
30+
type: boolean
31+
default: false
32+
release-version:
33+
description: "Upload wheel to this release"
34+
required: false
35+
type: string
36+
push:
37+
38+
jobs:
39+
build-wheels:
40+
uses: ./.github/workflows/_build.yml
41+
strategy:
42+
fail-fast: false
43+
matrix:
44+
# Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
45+
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
46+
os: [ubuntu-22.04]
47+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
48+
torch-version: ["2.4.0", "2.5.1", "2.6.0", "2.7.1", "2.8.0"]
49+
cuda-version: ["12.9.1"]
50+
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
51+
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
52+
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
53+
# when building without C++11 ABI and using it on nvcr images.
54+
cxx11_abi: ["FALSE", "TRUE"]
55+
exclude:
56+
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
57+
# Pytorch < 2.5 does not support Python 3.13
58+
- torch-version: "2.4.0"
59+
python-version: "3.13"
60+
with:
61+
runs-on: ${{ inputs.runs-on || matrix.os }}
62+
python-version: ${{ inputs.python-version || matrix.python-version }}
63+
cuda-version: ${{ inputs.cuda-version || matrix.cuda-version }}
64+
torch-version: ${{ inputs.torch-version || matrix.torch-version }}
65+
cxx11_abi: ${{ inputs.cxx11_abi || matrix.cxx11_abi }}
66+
upload-to-release: ${{ inputs.upload-to-release || matrix.upload-to-release }}
67+
release-version: ${{ inputs.release-version || matrix.release-version }}

.github/workflows/publish.yml

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# This workflow will:
2+
# - Create a new Github release
3+
# - Build wheels for supported architectures
4+
# - Deploy the wheels to the Github release
5+
# - Release the static code to PyPi
6+
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
7+
8+
name: Build wheels and deploy
9+
10+
on:
11+
create:
12+
tags:
13+
- v*
14+
15+
jobs:
16+
setup_release:
17+
name: Create Release
18+
runs-on: ubuntu-latest
19+
outputs:
20+
release-version: ${{ steps.extract_branch.outputs.branch }}
21+
steps:
22+
- name: Get the tag version
23+
id: extract_branch
24+
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
25+
shell: bash
26+
- name: Create Release
27+
id: create_release
28+
uses: actions/create-release@v1
29+
env:
30+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31+
with:
32+
tag_name: ${{ steps.extract_branch.outputs.branch }}
33+
release_name: ${{ steps.extract_branch.outputs.branch }}
34+
35+
build_wheels:
36+
name: Build Wheel
37+
needs: setup_release
38+
strategy:
39+
fail-fast: false
40+
matrix:
41+
# Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
42+
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
43+
os: [ubuntu-22.04]
44+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
45+
torch-version: ["2.4.0", "2.5.1", "2.6.0", "2.7.1", "2.8.0"]
46+
cuda-version: ["12.9.1"]
47+
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
48+
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
49+
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
50+
# when building without C++11 ABI and using it on nvcr images.
51+
cxx11_abi: ["FALSE", "TRUE"]
52+
exclude:
53+
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
54+
# Pytorch < 2.5 does not support Python 3.13
55+
- torch-version: "2.4.0"
56+
python-version: "3.13"
57+
uses: ./.github/workflows/_build.yml
58+
with:
59+
runs-on: ${{ matrix.os }}
60+
python-version: ${{ matrix.python-version }}
61+
cuda-version: ${{ matrix.cuda-version }}
62+
torch-version: ${{ matrix.torch-version }}
63+
cxx11_abi: ${{ matrix.cxx11_abi }}
64+
release-version: ${{ needs.setup_release.outputs.release-version }}
65+
upload-to-release: true
66+
67+
publish_package:
68+
name: Publish package
69+
needs: [build_wheels]
70+
runs-on: ubuntu-latest
71+
steps:
72+
- uses: actions/checkout@v4
73+
- uses: actions/setup-python@v5
74+
with:
75+
python-version: "3.10"
76+
- name: Install dependencies
77+
run: |
78+
pip install ninja packaging wheel twine
79+
# Install latest setuptools with support for pypi metadata 2.2 (improved compat w/ uv)
80+
pip install setuptools==75.8.0
81+
# We don't want to download anything CUDA-related here
82+
pip install torch --index-url https://download.pytorch.org/whl/cpu
83+
- name: Build core package
84+
env:
85+
GROUPED_GEMM_SKIP_CUDA_BUILD: "TRUE"
86+
run: |
87+
python setup.py sdist --dist-dir=dist
88+
- name: Deploy
89+
env:
90+
TWINE_USERNAME: "__token__"
91+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
92+
run: |
93+
python -m twine upload dist/*

0 commit comments

Comments
 (0)