Skip to content

Commit 1e1148b

Browse files
CI: Add Windows GPU runner for tests (#444)
* add simple windows runner * try this * shrik build matrix for now * duplicate workflow for windows * fix conflicts * try to use default (power) shell on windows runner * use bash shell too * add driver update step + start rewriting to ps1 * custom driver installation + debug * update driver version for using on different VM * avoid Resolve-Path; add gh install step * restore job dependency * port the remaining steps to PS too * try to fix extras * avoid using our own fetch_ctk since it's using bash * fix typo * fix typo again * fixes * force evaluation * resume driver install * clean up and add MINI_CTK_DEPS * fix nvvm tests with local CTK * debug * it does not seem we need to escape * fix again... * clean up for review --------- Co-authored-by: Keenan Simpson <[email protected]>
1 parent 060af3e commit 1e1148b

File tree

4 files changed

+294
-5
lines changed

4 files changed

+294
-5
lines changed

.github/workflows/build-and-test.yml

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,16 +202,14 @@ jobs:
202202
run: |
203203
echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT
204204
205-
test:
205+
test-linux:
206206
strategy:
207207
fail-fast: false
208208
# TODO: add driver version here
209209
matrix:
210210
host-platform:
211211
- linux-64
212212
- linux-aarch64
213-
# TODO: enable testing once win-64 GPU runners are up
214-
# - win-64
215213
python-version:
216214
- "3.13"
217215
- "3.12"
@@ -246,7 +244,42 @@ jobs:
246244
- build
247245
secrets: inherit
248246
uses:
249-
./.github/workflows/test-wheel.yml
247+
./.github/workflows/test-wheel-linux.yml
248+
with:
249+
host-platform: ${{ matrix.host-platform }}
250+
python-version: ${{ matrix.python-version }}
251+
build-ctk-ver: ${{ needs.build.outputs.BUILD_CTK_VER }}
252+
cuda-version: ${{ matrix.cuda-version }}
253+
local-ctk: ${{ matrix.local-ctk}}
254+
runner: ${{ matrix.runner }}
255+
256+
test-windows:
257+
strategy:
258+
fail-fast: false
259+
# TODO: add driver version here
260+
matrix:
261+
host-platform:
262+
- win-64
263+
python-version:
264+
- "3.12"
265+
cuda-version:
266+
# Note: this is for test-time only.
267+
- "12.8.0"
268+
- "11.8.0"
269+
local-ctk:
270+
- 1 # use mini CTK
271+
- 0 # use CTK wheels
272+
runner:
273+
- default
274+
name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }})
275+
if: ${{ github.repository_owner == 'nvidia' }}
276+
permissions:
277+
contents: read # This is required for actions/checkout
278+
needs:
279+
- build
280+
secrets: inherit
281+
uses:
282+
./.github/workflows/test-wheel-windows.yml
250283
with:
251284
host-platform: ${{ matrix.host-platform }}
252285
python-version: ${{ matrix.python-version }}
@@ -277,7 +310,8 @@ jobs:
277310
checks: read
278311
needs:
279312
- build
280-
- test
313+
- test-linux
314+
- test-windows
281315
- doc
282316
secrets: inherit
283317
uses:
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#Requires -RunAsAdministrator
2+
3+
# Install the driver
4+
function Install-Driver {
5+
6+
# Set the correct URL, filename, and arguments to the installer
7+
# This driver is picked to support Windows 11 & CUDA 12.8
8+
$url = 'https://us.download.nvidia.com/tesla/572.13/572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe';
9+
$file_dir = 'C:\NVIDIA-Driver\572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe';
10+
$install_args = '/s /noeula /noreboot';
11+
12+
# Create the folder for the driver download
13+
if (!(Test-Path -Path 'C:\NVIDIA-Driver')) {
14+
New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null
15+
}
16+
17+
# Download the file to a specified directory
18+
# Disabling progress bar due to https://github.com/GoogleCloudPlatform/compute-gpu-installation/issues/29
19+
$ProgressPreference_tmp = $ProgressPreference
20+
$ProgressPreference = 'SilentlyContinue'
21+
Write-Output 'Downloading the driver installer...'
22+
Invoke-WebRequest $url -OutFile $file_dir
23+
$ProgressPreference = $ProgressPreference_tmp
24+
Write-Output 'Download complete!'
25+
26+
# Install the file with the specified path from earlier as well as the RunAs admin option
27+
Write-Output 'Running the driver installer...'
28+
Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait
29+
Write-Output 'Done!'
30+
}
31+
32+
# Run the functions
33+
Install-Driver
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
name: "CI: Test wheels"
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
host-platform:
7+
type: string
8+
required: true
9+
python-version:
10+
type: string
11+
required: true
12+
build-ctk-ver:
13+
type: string
14+
required: true
15+
cuda-version:
16+
type: string
17+
required: true
18+
local-ctk:
19+
type: string
20+
required: true
21+
runner:
22+
type: string
23+
required: true
24+
25+
jobs:
26+
test:
27+
# The build stage could fail but we want the CI to keep moving.
28+
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
29+
runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }}
30+
steps:
31+
- name: Checkout ${{ github.event.repository.name }}
32+
uses: actions/checkout@v4
33+
with:
34+
fetch-depth: 0
35+
36+
- name: Update driver
37+
run: |
38+
.github/workflows/install_gpu_driver.ps1
39+
40+
- name: Ensure GPU is working
41+
run: nvidia-smi
42+
43+
- name: Set environment variables
44+
run: |
45+
$PYTHON_VERSION_FORMATTED = '${{ inputs.python-version }}' -replace '\.'
46+
$REPO_DIR = $PWD.Path
47+
48+
$BUILD_CUDA_MAJOR = '${{ inputs.build-ctk-ver }}' -split '\.' | Select-Object -First 1
49+
$TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1
50+
if ($BUILD_CUDA_MAJOR -ne $TEST_CUDA_MAJOR) {
51+
$SKIP_CUDA_BINDINGS_TEST = 1
52+
} else {
53+
$SKIP_CUDA_BINDINGS_TEST = 0
54+
}
55+
56+
if ('${{ inputs.local-ctk }}' -eq '1') {
57+
if ($TEST_CUDA_MAJOR -eq '12') {
58+
$MINI_CTK_DEPS = '["nvcc", "nvrtc", "nvjitlink"]'
59+
} else {
60+
$MINI_CTK_DEPS = '["nvcc", "nvrtc"]'
61+
}
62+
}
63+
64+
# Make outputs from the previous job as env vars
65+
$CUDA_CORE_ARTIFACT_BASENAME = "cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}"
66+
"PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $env:GITHUB_ENV
67+
"CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV
68+
"CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV
69+
"CUDA_CORE_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_core\dist"))" >> $env:GITHUB_ENV
70+
$CUDA_BINDINGS_ARTIFACT_BASENAME = "cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}"
71+
"CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV
72+
"CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV
73+
"CUDA_BINDINGS_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_bindings\dist"))" >> $env:GITHUB_ENV
74+
"SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $env:GITHUB_ENV
75+
"MINI_CTK_DEPS=${MINI_CTK_DEPS}" >> $env:GITHUB_ENV
76+
77+
- name: Download cuda-python build artifacts
78+
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}}
79+
uses: actions/download-artifact@v4
80+
with:
81+
name: cuda-python-wheel
82+
path: .
83+
84+
- name: Download cuda.bindings build artifacts
85+
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}}
86+
uses: actions/download-artifact@v4
87+
with:
88+
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
89+
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
90+
91+
- name: Install gh cli
92+
# the GPU runner image does not have gh pre-installed...
93+
env:
94+
# doesn't seem there's an easy way to avoid hard-coding it?
95+
GH_MSI_URL: https://github.com/cli/cli/releases/download/v2.67.0/gh_2.67.0_windows_amd64.msi
96+
run: |
97+
Invoke-WebRequest -Uri "$env:GH_MSI_URL" -OutFile "gh_installer.msi"
98+
Start-Process msiexec.exe -Wait -Verbose -ArgumentList '/i "gh_installer.msi" /qn'
99+
$GH_POSSIBLE_PATHS = "C:\\Program Files\\GitHub CLI", "C:\\Program Files (x86)\\GitHub CLI"
100+
foreach ($p in $GH_POSSIBLE_PATHS) {
101+
echo "$p" >> $env:GITHUB_PATH
102+
$env:Path += ";$p"
103+
}
104+
gh --version
105+
106+
- name: Download cuda-python & cuda.bindings build artifacts from the prior branch
107+
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}}
108+
env:
109+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
110+
run: |
111+
$OLD_BRANCH = Get-Content .github/BACKPORT_BRANCH
112+
$OLD_BASENAME = "cuda-bindings-python${env:PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*"
113+
$runData = gh run list -b $OLD_BRANCH -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json
114+
if (-not $runData -or $runData.Length -eq 0 -or -not $runData[0].databaseId -or [string]::IsNullOrEmpty($runData[0].databaseId)) {
115+
Write-Host "LATEST_PRIOR_RUN_ID not found!"
116+
exit 1
117+
}
118+
$LATEST_PRIOR_RUN_ID = $runData[0].databaseId
119+
120+
gh run download $LATEST_PRIOR_RUN_ID -p $OLD_BASENAME -R NVIDIA/cuda-python
121+
Get-ChildItem -Path $OLD_BASENAME
122+
New-Item -Path "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" -ItemType Directory -Force
123+
Move-Item -Path "$OLD_BASENAME/*.whl" -Destination "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"
124+
Remove-Item -Path $OLD_BASENAME -Force
125+
126+
gh run download $LATEST_PRIOR_RUN_ID -p cuda-python-wheel -R NVIDIA/cuda-python
127+
Get-ChildItem -Path cuda-python-wheel
128+
Move-Item -Path "cuda-python-wheel/*.whl" -Destination .
129+
Remove-Item -Path cuda-python-wheel -Force
130+
131+
- name: Display structure of downloaded cuda-python artifacts
132+
run: |
133+
Get-Location
134+
Get-ChildItem -Recurse -Force | Select-Object Mode, LastWriteTime, Length, FullName
135+
136+
- name: Display structure of downloaded cuda.bindings artifacts
137+
run: |
138+
Get-Location
139+
Get-ChildItem -Recurse -Force $env:CUDA_BINDINGS_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName
140+
141+
- name: Download cuda.core build artifacts
142+
uses: actions/download-artifact@v4
143+
with:
144+
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
145+
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
146+
147+
- name: Display structure of downloaded cuda.core build artifacts
148+
run: |
149+
Get-Location
150+
Get-ChildItem -Recurse -Force $env:CUDA_CORE_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName
151+
152+
- name: Set up Python ${{ inputs.python-version }}
153+
uses: actions/setup-python@v5
154+
with:
155+
python-version: ${{ inputs.python-version }}
156+
157+
- name: Set up mini CTK
158+
if: ${{ inputs.local-ctk == '1' }}
159+
# Note: The GH-hosted Windows GPU runner does not have Git for Windows pre-installed,
160+
# so we cannot use our own fetch_ctk action unfortunately...
161+
uses: Jimver/[email protected]
162+
with:
163+
cuda: ${{ inputs.cuda-version }}
164+
method: 'network'
165+
sub-packages: ${{ env.MINI_CTK_DEPS }}
166+
167+
- name: Update PATH
168+
if: ${{ inputs.local-ctk == '1' }}
169+
run: |
170+
# mimics actual CTK installation
171+
echo $PATH
172+
echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH
173+
174+
- name: Run cuda.bindings tests
175+
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
176+
run: |
177+
Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR
178+
if ('${{ inputs.local-ctk }}' -eq '1') {
179+
Get-ChildItem $env:CUDA_PATH
180+
echo $PATH
181+
pip install (Get-ChildItem -Filter *.whl).FullName
182+
} else {
183+
pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]"
184+
}
185+
Pop-Location
186+
187+
Push-Location ./cuda_bindings
188+
pip install -r requirements.txt
189+
pytest -rxXs tests/
190+
# skip Cython tests for now
191+
Pop-Location
192+
193+
- name: Run cuda.core tests
194+
run: |
195+
# If build/test majors match: cuda.bindings is installed in the previous step.
196+
# If mismatch: cuda.bindings is installed from the backport branch.
197+
if ($env:SKIP_CUDA_BINDINGS_TEST -eq '1') {
198+
Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR
199+
if ('${{ inputs.local-ctk }}' -eq '1') {
200+
pip install (Get-ChildItem -Filter *.whl).FullName
201+
} else {
202+
pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]"
203+
}
204+
Pop-Location
205+
}
206+
$TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1
207+
Push-Location $env:CUDA_CORE_ARTIFACTS_DIR
208+
pip install "$((Get-ChildItem -Filter *.whl).FullName)[cu${TEST_CUDA_MAJOR}]"
209+
Pop-Location
210+
211+
Push-Location ./cuda_core
212+
pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt"
213+
pytest -rxXs tests/
214+
Pop-Location
215+
216+
- name: Ensure cuda-python installable
217+
run: |
218+
if ('${{ inputs.local-ctk }}' -eq '1') {
219+
pip install (Get-ChildItem -Filter cuda_python*.whl).FullName
220+
} else {
221+
pip install "$((Get-ChildItem -Filter cuda_python*.whl).FullName)[all]"
222+
}

0 commit comments

Comments
 (0)