From 41b52845e49e0c327ae08792131597da8be6586c Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Mon, 13 Oct 2025 11:23:49 -0700 Subject: [PATCH 01/10] Add temp override for testing same matrix as Linux. --- ci/matrix.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 9b9f9639149..cfd90471fb1 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -21,6 +21,7 @@ workflows: # args: '--preset libcudacxx-cpp20 --lit-tests "cuda/utility/basic_any.pass.cpp"' } # override: + - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], cxx: ['msvc'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4'} pull_request: # Old CTK: Oldest/newest supported host compilers: From 99582e031d0b26b0c494674111ae80fe8b3aabcc Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Thu, 2 Oct 2025 14:10:48 -0700 Subject: [PATCH 02/10] Implement initial Windows CI support for the Python cuda-cccl library. --- .../workflow-run-job-windows/action.yml | 5 + c/parallel/src/three_way_partition.cu | 4 +- ci/matrix.yaml | 2 +- ci/windows/build_common.psm1 | 7 + ci/windows/build_common_python.psm1 | 288 +++++++++++ ci/windows/build_cuda_cccl_python.ps1 | 475 ++++++++++++++++++ ci/windows/test_cuda_cccl_examples_python.ps1 | 27 + ci/windows/test_cuda_cccl_headers_python.ps1 | 27 + ci/windows/test_cuda_compute_python.ps1 | 29 ++ ci/windows/test_cuda_coop_python.ps1 | 25 + pyproject.toml | 4 +- python/cuda_cccl/pyproject.toml | 3 +- python/cuda_cccl/tests/test_examples.py | 5 +- 13 files changed, 892 insertions(+), 9 deletions(-) create mode 100644 ci/windows/build_common_python.psm1 create mode 100644 ci/windows/build_cuda_cccl_python.ps1 create mode 100644 ci/windows/test_cuda_cccl_examples_python.ps1 create mode 100644 ci/windows/test_cuda_cccl_headers_python.ps1 create mode 100644 ci/windows/test_cuda_compute_python.ps1 create mode 100644 ci/windows/test_cuda_coop_python.ps1 diff --git a/.github/actions/workflow-run-job-windows/action.yml b/.github/actions/workflow-run-job-windows/action.yml index cbbce4b9761..849673a2e35 100644 --- a/.github/actions/workflow-run-job-windows/action.yml +++ b/.github/actions/workflow-run-job-windows/action.yml @@ -188,6 +188,7 @@ runs: docker_exit=0 set +e docker run \ + -v //./pipe/docker_engine://./pipe/docker_engine \ --mount type=bind,source="${{steps.paths.outputs.HOST_REPO}}",target="${{steps.paths.outputs.MOUNT_REPO}}" \ --mount type=bind,source="${{ env.ARTIFACT_UPLOAD_STAGE_WIN }}",target="${{ env.ARTIFACT_UPLOAD_STAGE_WIN }}" \ --mount type=bind,source="${{ env.ARTIFACT_ARCHIVES_WIN }}",target="${{ env.ARTIFACT_ARCHIVES_WIN }}" \ @@ -197,14 +198,18 @@ runs: --isolation=process \ ${{ env.ENABLE_GPU }} \ --env COMMAND='& ${{inputs.command}}' \ + --env COMMAND='& ${{inputs.command}}' \ --env "ARTIFACT_ARCHIVES=${{env.ARTIFACT_ARCHIVES}}" \ --env "ARTIFACT_UPLOAD_REGISTERY=${{env.ARTIFACT_UPLOAD_REGISTERY}}" \ --env "ARTIFACT_UPLOAD_STAGE=${{env.ARTIFACT_UPLOAD_STAGE}}" \ --env "AWS_ACCESS_KEY_ID=${{env.AWS_ACCESS_KEY_ID}}" \ --env "AWS_SECRET_ACCESS_KEY=${{env.AWS_SECRET_ACCESS_KEY}}" \ --env "AWS_SESSION_TOKEN=${{env.AWS_SESSION_TOKEN}}" \ + --env "CONTAINER_WORKSPACE=${{steps.paths.outputs.MOUNT_REPO}}" \ --env "CI=true" \ --env "DEVCONTAINER_NAME=cuda${{inputs.cuda}}-${{inputs.host}}" \ + --env "DOCKER_HOST=npipe:////./pipe/docker_engine" \ + --env "HOST_WORKSPACE=${{steps.paths.outputs.HOST_REPO}}" \ --env "GH_TOKEN=$GH_TOKEN" \ --env "GITHUB_ACTIONS=$GITHUB_ACTIONS" \ --env "GITHUB_REF_NAME=$GITHUB_REF_NAME" \ diff --git a/c/parallel/src/three_way_partition.cu b/c/parallel/src/three_way_partition.cu index 9ad80a10c20..93ed631001b 100644 --- a/c/parallel/src/three_way_partition.cu +++ b/c/parallel/src/three_way_partition.cu @@ -99,12 +99,12 @@ std::string get_three_way_partition_kernel_name( std::string_view select_second_part_op_name) { std::string chained_policy_t; - check(nvrtcGetTypeName(&chained_policy_t)); + check(cccl_type_name_from_nvrtc(&chained_policy_t)); constexpr std::string_view scan_tile_state_t = "cub::detail::three_way_partition::ScanTileStateT"; std::string offset_t; - check(nvrtcGetTypeName(&offset_t)); + check(cccl_type_name_from_nvrtc(&offset_t)); const std::string streaming_context_t = std::format("cub::detail::three_way_partition::streaming_context_t<{0}>", offset_t); diff --git a/ci/matrix.yaml b/ci/matrix.yaml index cfd90471fb1..9d0fac63f61 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -501,7 +501,7 @@ projects: name: "Python" job_map: build: ['build_py_wheel'] - test: ['test_py_headers', 'test_py_coop', 'test_py_par', test_py_examples] + test: ['test_py_headers', 'test_py_coop', 'test_py_par', 'test_py_examples'] cccl_c_parallel: name: 'CCCL C Parallel' stds: [20] diff --git a/ci/windows/build_common.psm1 b/ci/windows/build_common.psm1 index 6badd9e920a..4fdb2fb4789 100644 --- a/ci/windows/build_common.psm1 +++ b/ci/windows/build_common.psm1 @@ -19,6 +19,13 @@ $ErrorActionPreference = "Stop" $script:HOST_COMPILER = (Get-Command "cl").source -replace '\\','/' $script:PARALLEL_LEVEL = $env:NUMBER_OF_PROCESSORS +Write-Host "=== Docker Container Resource Info ===" +Write-Host "Number of Processors: $script:PARALLEL_LEVEL" +Get-WmiObject Win32_OperatingSystem | ForEach-Object { + Write-Host ("Memory: total={0:N1} GB, free={1:N1} GB" -f ($_.TotalVisibleMemorySize / 1MB), ($_.FreePhysicalMemory / 1MB)) +} +Write-Host "======================================" + # Extract the CL version for export to build scripts: $script:CL_VERSION_STRING = & cl.exe /? if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") { diff --git a/ci/windows/build_common_python.psm1 b/ci/windows/build_common_python.psm1 new file mode 100644 index 00000000000..ba7af0b3679 --- /dev/null +++ b/ci/windows/build_common_python.psm1 @@ -0,0 +1,288 @@ +function Get-LatestPythonPatchVersionFromPyEnvWin { + <# + .SYNOPSIS + Resolves the latest patch version for a given Python major.minor (e.g. + '3.12') by parsing `pyenv install --list` output on Windows (pyenv-win). + .PARAMETER Version + A string in the form 'M.m' (e.g., '3.10', '3.11', '3.12'). + #> + [CmdletBinding()] + param( + [Parameter(Mandatory, Position = 0)] + [ValidatePattern('^\d+\.\d+$')] + [string]$Version + ) + + # Verify pyenv exists. + if (-not (Get-Command pyenv -ErrorAction SilentlyContinue)) { + throw [System.InvalidOperationException]::new( + 'pyenv-win ("pyenv") not found on PATH.' + ) + } + + $listOutput = & pyenv install --list 2>&1 + if ($LASTEXITCODE -ne 0 -or -not $listOutput) { + $joined = $listOutput -join "`n" + throw [System.InvalidOperationException]::new( + "Failed to run 'pyenv install --list'. Output:`n$joined" + ) + } + + # Build a list of patch numbers that match the requested minor version. + $versionPrefix = "$Version." + $patchNumbers = @() + foreach ($line in $listOutput) { + $candidate = $line.Trim() + if (-not $candidate) { continue } + + # Accept any major version; the StartsWith check guarantees we only + # keep the wanted minor. + if (-not $candidate.StartsWith($versionPrefix)) { continue } + if ($candidate -notmatch '^\d+\.\d+\.\d+$') { continue } + + $patchNumbers += [int]($candidate.Split('.')[2]) + } + + if ($patchNumbers.Count -eq 0) { + throw [System.InvalidOperationException]::new( + "No installable CPython versions found for prefix " + + "'$Version' in pyenv-win list." + ) + } + + $latestPatch = ($patchNumbers | Sort-Object -Descending)[0] + return "$Version.$latestPatch" +} + +function Install-PythonViaPyEnvWin { + <# + .SYNOPSIS + Ensures a Python version for the given major.minor exists via + pyenv-win, activates it for the current shell, and returns the + path to python.exe. + .PARAMETER Version + A string in the form 'M.m' (e.g., '3.12'). + #> + param( + [Parameter(Mandatory, Position = 0)] + [ValidatePattern('^\d+\.\d+$')] + [string]$Version + ) + + $fullVersion = Get-LatestPythonPatchVersionFromPyEnvWin ` + -Version $Version + + Write-Host "Installing Python $fullVersion via pyenv..." + Write-Host "pyenv install $fullVersion" + ($null = & pyenv install $fullVersion | Out-Host) + if ($LASTEXITCODE -ne 0) { + throw [System.InvalidOperationException]::new( + "Failed to install Python $fullVersion via pyenv." + ) + } + Write-Host "Successfully installed Python $fullVersion via pyenv." + + ($null = & pyenv local $fullVersion | Out-Host) + if ($LASTEXITCODE -ne 0) { + throw [System.InvalidOperationException]::new( + "Failed to set Python $fullVersion as local via pyenv." + ) + } + Write-Host "Successfully set Python $fullVersion as local via pyenv." + + # Avoid the shim (i.e. shims/python.bat) because it will attempt to set + # a codepage via `chcp` that we probably won't have installed on our + # Server Core-based image. + $exe = (Resolve-Path -LiteralPath $(pyenv which python)).Path + Write-Host "python.exe path: $exe" + + # Add the root and Scripts directory to $Env:PATH. + $rootDir = $exe.Replace("\python.exe", "") + $scriptsDir = $exe.Replace("python.exe", "Scripts") + $pathPrefix = $rootDir + ";" + $scriptsDir + ";" + $Env:PATH = $pathPrefix + $Env:PATH + + # Upgrade pip using the found exe. This is necessary because some older + # versions of pip (e.g. 23.10) don't support arguments like `--wheeldir`. + ($null = & $exe -m pip install --upgrade pip --no-cache-dir | Out-Host) + if ($LASTEXITCODE -ne 0) { + throw [System.InvalidOperationException]::new("pip upgrade failed") + } + + Write-Host "pip successfully upgraded, running pyenv rehash..." + ($null = & pyenv rehash | Out-Host) + if ($LASTEXITCODE -ne 0) { + throw [System.InvalidOperationException]::new("pyenv rehash failed") + } + Write-Host "Successfully ran pyenv rehash." + + return $exe +} + +function Get-Python { + <# + .SYNOPSIS + Returns the path of the Python interpreter satisfying the supplied + version, potentially installing it via pyenv-win if it's not already + installed. + #> + [CmdletBinding()] + param( + [Parameter(Mandatory, Position = 0)] + [ValidatePattern('^\d+\.\d+$')] + [string]$Version + ) + + # Look for a plain 'python.exe' already on the path. + try { + $candidate = (Get-Command python -ErrorAction Stop).Source + $foundVer = & $candidate -c "import sys; print(f'{sys.version_info[0]}.{sys.version_info[1]}')" 2>$null + if ($foundVer -eq $Version) { + Write-Host "Found matching Python $foundVer at $candidate." + return $candidate.Trim() + } + else { + Write-Host "Found python.exe but version $foundVer != requested version $Version." + } + } + catch { + Write-Host "Unable to query existing 'python' on PATH: $_" + } + + # If we reach here, we'll need to install the requested version via pyenv. + try { + $exe = Install-PythonViaPyEnvWin -Version $Version + return $exe.Trim() + } + catch { + throw [System.InvalidOperationException]::new( + "Requested Python $Version not found and installation " + + "via pyenv-win failed: $($_.Exception.Message)" + ) + } +} + +function Get-RepoRoot { + return (Resolve-Path "$PSScriptRoot/../..") +} + +function Get-CudaMajor { + <# + .SYNOPSIS + Gets the CUDA major version for this container instance (e.g. '12' or + '13'). Defaults to '13' if no match can be found. + #> + if ($env:CUDA_PATH) { + $nvcc = Join-Path $env:CUDA_PATH "bin/nvcc.exe" + if (Test-Path $nvcc) { + $out = & $nvcc --version 2>&1 + $text = ($out -join "`n") + if ($text -match 'release\s+(\d+)\.') { return $Matches[1] } + } + # Fallback: parse major from CUDA_PATH like ...\v13.0 or ...\CUDA\13 + $pathMatch = [regex]::Match($env:CUDA_PATH, 'v?(\d+)(?:\.\d+)?') + if ($pathMatch.Success) { return $pathMatch.Groups[1].Value } + } + return '13' +} + +function Convert-ToUnixPath { + Param([Parameter(Mandatory = $true)][string]$p) + return ($p -replace "\\", "/") +} + +function Get-CudaCcclWheel { + <# + .SYNOPSIS + Returns the path of the cuda-cccl wheel artifact to use in the context + of a GitHub Actions CI test script. + #> + Param() + + $repoRoot = Get-RepoRoot + if ($env:GITHUB_ACTIONS) { + Push-Location $repoRoot + try { + $wheelArtifactName = (& bash -lc "ci/util/workflow/get_wheel_artifact_name.sh").Trim() + if (-not $wheelArtifactName) { throw 'Failed to resolve wheel artifact name' } + $repoRootPosix = Convert-ToUnixPath $repoRoot + # Ensure output from downloader goes to console, not function return pipeline + $null = (& bash -lc "ci/util/artifacts/download.sh $wheelArtifactName $repoRootPosix" 2>&1 | Out-Host) + if ($LASTEXITCODE -ne 0) { throw "Failed to download wheel artifact '$wheelArtifactName'" } + } + finally { Pop-Location } + } + + $wheelhouse = Join-Path $repoRoot 'wheelhouse' + $wheelPath = Get-OnePathMatch -Path $wheelhouse -Pattern '^cuda_cccl-.*\.whl' -File + return $wheelPath +} + +function Get-OnePathMatch { + <# + .SYNOPSIS + Returns a single path (file or directory) match for a given pattern, + throwing an error if there were no matches or more than one match. + #> + [CmdletBinding(DefaultParameterSetName = 'FileSet')] + param( + [Parameter(Mandatory)] + [string] $Path, + + [Parameter(Mandatory)] + [string] $Pattern, + + [Parameter(Mandatory, ParameterSetName = 'FileSet')] + [switch] $File, + + [Parameter(Mandatory, ParameterSetName = 'DirSet')] + [switch] $Directory, + + [switch] $Recurse + ) + + if (-not (Test-Path -LiteralPath $Path -PathType Container)) { + throw "Path not found or not a directory: $Path" + } + + $gciArgs = @{ + LiteralPath = $Path + ErrorAction = 'SilentlyContinue' + } + + if ($Recurse) { $gciArgs['Recurse'] = $true } + if ($PSCmdlet.ParameterSetName -eq 'FileSet') { + $gciArgs['File'] = $true + } + else { + $gciArgs['Directory'] = $true + } + + $pathMatches = @( + Get-ChildItem @gciArgs | + Where-Object { $_.Name -match $Pattern } | + Select-Object -ExpandProperty FullName + ) + + if ($pathMatches.Count -ne 1) { + $kind = if ($PSCmdlet.ParameterSetName -eq 'FileSet') { 'file' } + else { 'directory' } + $indented = ($pathMatches | ForEach-Object { " $_" }) -join "`n" + + $msg = @" +Expected exactly one $kind name matching regex: + $Pattern +under: + $Path +Found: + $($pathMatches.Count) + +$indented +"@ + throw $msg + } + + return $pathMatches[0] +} + +Export-ModuleMember -Function Get-Python, Get-CudaMajor, Convert-ToUnixPath, Get-RepoRoot, Get-CudaCcclWheel, Get-OnePathMatch diff --git a/ci/windows/build_cuda_cccl_python.ps1 b/ci/windows/build_cuda_cccl_python.ps1 new file mode 100644 index 00000000000..164c6751820 --- /dev/null +++ b/ci/windows/build_cuda_cccl_python.ps1 @@ -0,0 +1,475 @@ +<# +.SYNOPSIS + Build Python cuda-cccl wheels for the cuda.compute and cuda.coop packages + on Windows. + +.DESCRIPTION + This script is the Windows analog to the Linux ../build_cuda_cccl_python.sh + script. It is responsible for building CUDA 12.x and CUDA 13.x wheels that + are then merged together into a singular cuda-cccl wheel. + + A single CUDA 12.9 builder image (i.e. Docker devcontainer) is used to + build each distinct Python/MSVC combo. Much like the Linux approach, this + script detects when launched via the outer 12.9 instance, builds a `cu12` + wheel, then dispatches a inner Docker instance (Docker-out-of-Docker) to + execute this script with `-OnlyCudaMajor 13 -SkipUpload` parameters, which + yields a `cu13` build. + + Upon completion of the `cu13` build, the outer 12.9 container merges both + `cu12` and `cu13` wheels into a single cuda-cccl wheel, and uploads that + via the standard CCCL CI artifact upload mechanisms. + +.PARAMETER PyVersion + **Required.** The Python version to use for building the wheel, expressed + as `.` (e.g. `3.11`). + +.PARAMETER UseNinja + When present, uses Ninja instead of Visual Studio for the CMake generator + if the `ninja` executable can be found on the PATH. If Ninja is not + available the script falls back to the default Visual Studio generator + and continues without error. + +.PARAMETER OnlyCudaMajor + Optional. Restricts the build to a single CUDA major version (`12` or `13`). + When set, only that version is built and the *merge* step is skipped. + +.PARAMETER Cuda13Image + Optional. The Docker image name used for a nested build of the CUDA 13 + wheel when the outer container defaults to CUDA 12.9. The default value + matches the RAPIDS dev‑container image that contains the required + toolchain: `rapidsai/devcontainers:25.12-cuda13.0-cl14.44-windows2022`. + +.PARAMETER SkipUpload + When set, prevents the final wheel(s) from being uploaded as a GitHub + Actions artifact even when the script detects it is running inside an + Action. + +.EXAMPLE + # Build a single cuda-cccl wheel for Python 3.13 (consisting of both CUDA + # 12 and 13 versions), and, if in CI, upload the resulting wheel as an + # artifact. + .\build_cuda_cccl_python.ps1 -PyVersion 3.11 +#> + +[CmdletBinding()] +Param( + [Parameter(Mandatory = $true)] + [Alias("py-version")] + [ValidatePattern("^\d+\.\d+$")] + [string]$PyVersion, + + [Parameter(Mandatory = $false)] + [switch]$UseNinja, + + [Parameter(Mandatory = $false)] + [ValidateSet('12', '13')] + [string]$OnlyCudaMajor, + + [Parameter(Mandatory = $false)] + [string]$Cuda13Image = "rapidsai/devcontainers:25.12-cuda13.0-cl14.44-windows2022", + + [Parameter(Mandatory = $false)] + [switch]$SkipUpload +) + +$ErrorActionPreference = "Stop" + +# Import shared helpers. +Import-Module "$PSScriptRoot/build_common.psm1" +Import-Module "$PSScriptRoot/build_common_python.psm1" -Force + +# Resolve repo root from this script's location. +$RepoRoot = Resolve-Path "$PSScriptRoot/../.." +Write-Host "Repo root: $RepoRoot" + +# Get the full path to the python.exe for the version we need. +Write-Host "Looking for Python version $PyVersion..." +$PythonExe = Get-Python -Version $PyVersion +Write-Host "Using Python: $PythonExe" +& $PythonExe -m pip --version + +# Ensure MSVC is available. +$clPath = (Get-Command cl).Source +if (-not $clPath) { + throw "cl.exe not found in PATH. Run from a Developer PowerShell prompt." +} +Write-Host "Found cl.exe at: $clPath" + +function Resolve-CudaPathForMajor { + Param( + [Parameter(Mandatory = $true)] + [ValidateSet('12', '13')] + [string]$Major + ) + $candidates = @() + Get-ChildItem Env: | + Where-Object { $_.Name -match "^CUDA_PATH_V${Major}_(\d+)$" } | + ForEach-Object { + $minor = [int]([regex]::Match( + $_.Name, + "^CUDA_PATH_V${Major}_(\d+)$" + ).Groups[1].Value) + $candidates += [PSCustomObject]@{ + Minor = $minor; + Path = $_.Value + } + } + + if ($candidates.Count -gt 0) { + return ($candidates | Sort-Object -Property Minor -Descending | + Select-Object -First 1).Path + } + + if ($env:CUDA_PATH) { + $maybe = $env:CUDA_PATH + $nvcc = Join-Path $maybe 'bin/nvcc.exe' + if (Test-Path $nvcc) { + $out = & $nvcc --version 2>&1 + $text = ($out -join "`n") + if ($text -match 'release\s+(\d+)\.') { + if ($Matches[1] -eq $Major) { + return $maybe + } + } + } + } + + return $null +} + +# If $OnlyCudaMajor is present, it means we're being launched from a +# nested Docker container build (12.x launched a 13.x build via DooD). +if ($OnlyCudaMajor) { + $CudaMajorsToBuild = @($OnlyCudaMajor) +} +else { + $CudaMajorsToBuild = @('12', '13') +} +$DoMerge = -not [bool]$OnlyCudaMajor + +# Base pip/CMake options +$pipBaseConfigArgs = @( + '-C', 'cmake.define.CMAKE_C_COMPILER=cl.exe', + '-C', 'cmake.define.CMAKE_CXX_COMPILER=cl.exe' +) + +# Use Ninja if requested and available. +if ($UseNinja) { + if (Get-Command ninja -ErrorAction SilentlyContinue) { + $env:CMAKE_GENERATOR = "Ninja" + Write-Host "CMAKE_GENERATOR=Ninja" + } + else { + Write-Host "Ninja not found; proceeding with default generator" ` + -ForegroundColor Yellow + $UseNinja = $false + if ($env:CMAKE_GENERATOR -eq 'Ninja') { + Remove-Item Env:CMAKE_GENERATOR -ErrorAction SilentlyContinue + } + } +} + +# Remove the env vars VS complains about when not using Ninja. +if (-not $UseNinja) { + Remove-Item Env:CUDAHOSTCXX -ErrorAction SilentlyContinue + Remove-Item Env:CMAKE_CUDA_HOST_COMPILER -ErrorAction SilentlyContinue + if ($env:CMAKE_GENERATOR -eq 'Ninja') { + Remove-Item Env:CMAKE_GENERATOR -ErrorAction SilentlyContinue + } +} + +# Ensure wheelhouse directories exist. +$Wheelhouse = Join-Path $RepoRoot "wheelhouse" +New-Item -ItemType Directory -Path $Wheelhouse -Force | Out-Null +${null} = New-Item -ItemType Directory -Path (Join-Path $RepoRoot 'wheelhouse_cu12') -Force +${null} = New-Item -ItemType Directory -Path (Join-Path $RepoRoot 'wheelhouse_cu13') -Force + +function Invoke-Cuda13NestedBuild { + <# + .SYNOPSIS + Run the nested Docker build for CUDA 13 when we are already inside a + CUDA 12 builder image. + + .DESCRIPTION + This routine launches a Docker devcontainer CUDA 13 build for the given + Python version by way of Docker-out-of-Docker (DooD) facilities. + #> + [CmdletBinding()] + param ( + [Parameter(Mandatory)] [string] $Cuda13Image, + [Parameter(Mandatory)] [string] $PyVersion, + [bool] $UseNinja = $false, + + [ValidateNotNullOrEmpty()] [string] $HostWorkspace = $env:HOST_WORKSPACE, + [ValidateNotNullOrEmpty()] [string] $ContainerWorkspace = $env:CONTAINER_WORKSPACE + ) + + # Validate required environment variables. + if (-not $HostWorkspace) { + throw "HOST_WORKSPACE env var is not set; required for DooD " + + "nested docker mounts on Windows." + } + if (-not $ContainerWorkspace) { + throw "CONTAINER_WORKSPACE env var is not set; required for " + + "DooD nested docker mounts on Windows." + } + + # Validate Docker CLI availability. + if (-not (Get-Command docker -ErrorAction SilentlyContinue)) { + throw "docker CLI not found in the devcontainer image (required for DooD)." + } + + Write-Host "Checking DooD connectivity..." + $dockerVersionOutput = & docker version 2>&1 + $dockerExitCode = $LASTEXITCODE + $dockerVersionOutput | Out-Host + if ($dockerExitCode -ne 0) { + throw "DooD connectivity check failed (exit code $dockerExitCode). See Docker output above." + } + Write-Host "DooD appears to be working, continuing..." + + # Detect outer‑container resources so we can set sensible limits. + $os = Get-WmiObject -Class Win32_OperatingSystem + $totalGB = [math]::Floor($os.TotalVisibleMemorySize / 1MB) # KB -> GB + $procCount = [Environment]::ProcessorCount + + # Leave a little head‑room so the outer container doesn't starve + $memLimitGB = [math]::Max(2, [int]([math]::Floor($totalGB * 0.9))) + $cpuCount = [math]::Max(2, $procCount) + + Write-Host "Launching nested Docker for CUDA 13 build using image: $Cuda13Image" + $targetFile = Join-Path $ContainerWorkspace 'ci\windows\build_cuda_cccl_python.ps1' + $dockerArgs = @( + 'run', '--rm', '-i', + '--cpu-count', "$cpuCount", + '--memory', "${memLimitGB}g", + '--workdir', $ContainerWorkspace, + '--mount', "type=bind,source=$HostWorkspace,target=$ContainerWorkspace", + '--env', "py_version=$PyVersion", + '--env', "GITHUB_ACTIONS=$($env:GITHUB_ACTIONS)", + '--env', "GITHUB_RUN_ID=$($env:GITHUB_RUN_ID)", + '--env', "JOB_ID=$($env:JOB_ID)", + $Cuda13Image, + 'PowerShell.exe', '-NoLogo', '-NoProfile', '-ExecutionPolicy', 'Bypass', + '-File', $targetFile, + '-py-version', $PyVersion, + '-OnlyCudaMajor', '13', + '-SkipUpload' + ) + if ($UseNinja) { $dockerArgs += '-UseNinja' } + + Write-Host ("About to invoke: docker " + ($dockerArgs -join ' ')) + & docker @dockerArgs + if ($LASTEXITCODE -ne 0) { + throw 'Nested CUDA 13 wheel build failed' + } +} + +function Build-CudaCcclWheel { + <# + .SYNOPSIS + Perform the regular wheel build for a given CUDA major version. + + .DESCRIPTION + This routine is used to build both CUDA 12 and CUDA 13 based wheels, + and is called from normal "outer" Docker containers, as well as the + "inner" nested ones. + #> + [CmdletBinding()] + param ( + [Parameter(Mandatory)] [ValidateSet('12', '13')] [string] $Major, + [Parameter(Mandatory)] [string] $RepoRoot, + [Parameter(Mandatory)] [string] $PythonExe, + [Parameter(Mandatory)] [string[]] $PipBaseConfigArgs, + [bool] $UseNinja = $false + ) + + # Resolve CUDA toolkit location for the requested major version. + $CudaPathForMajor = Resolve-CudaPathForMajor -Major $Major + if (-not $CudaPathForMajor) { + throw "CUDA Toolkit $Major not found. Ensure CUDA_PATH_V${Major}_* " + + "is set or matching toolkit is installed." + } + + $NvccForMajor = Join-Path $CudaPathForMajor 'bin/nvcc.exe' + if (-not (Test-Path $NvccForMajor)) { + throw "nvcc not found at $NvccForMajor" + } + + # Convert Windows paths to Unix‑style for CMake + $NvccUnix = Convert-ToUnixPath $NvccForMajor + $CudaUnix = Convert-ToUnixPath $CudaPathForMajor + + # Build the pip configuration arguments that inject the CUDA toolchain. + $pipConfigArgs = $PipBaseConfigArgs + @( + '-C', "cmake.define.CMAKE_CUDA_COMPILER=$NvccUnix", + '-C', "cmake.define.CUDAToolkit_ROOT=$CudaUnix" + ) + + $extra = "cu$Major" + # Use separate output directories for 12 vs 13. + $outDir = Join-Path $RepoRoot "wheelhouse_$extra" + + Write-Host "Building cuda-cccl wheel for CUDA $Major at $CudaPathForMajor..." + + # Run pip wheel to build the wheel. + $pythonArgs = @( + '-m', 'pip', 'wheel', + '-w', $outDir, + ".[${extra}]", + '-v' + ) + $pipConfigArgs + + Write-Host ("python " + ($pythonArgs -join ' ')) + & $PythonExe @pythonArgs + if ($LASTEXITCODE -ne 0) { + throw "Wheel build failed for CUDA $Major" + } + + # Normalise the wheel filename (append .cu12/.cu13) and prune duplicates. + $builtWheel = Get-OnePathMatch -Path $outDir ` + -Pattern '^cuda_cccl-.*\.whl' ` + -File + if (-not $builtWheel) { + throw "Failed to locate built wheel in $outDir for CUDA $Major" + } + + $builtName = [System.IO.Path]::GetFileName($builtWheel) + if ($builtName -notmatch ".cu$Major\.whl$") { + $newName = ([System.IO.Path]::GetFileNameWithoutExtension($builtName)) ` + + ".cu$Major.whl" + Write-Host "Renaming wheel to: $newName" + Rename-Item -Path $builtWheel -NewName $newName -Force + } + + # Remove any stray wheels that lack the .cuXX suffix. + Get-ChildItem -Path $outDir -Filter 'cuda_cccl-*.whl' | + Where-Object { $_.Name -notmatch "\.cu$Major\.whl$" } | + ForEach-Object { + Write-Host "Removing duplicate wheel: $($_.FullName)" + Remove-Item -Force $_.FullName + } +} + +# Main build entry code. +Push-Location (Join-Path $RepoRoot 'python/cuda_cccl') +try { + foreach ($major in $CudaMajorsToBuild) { + + # Nested Docker build for CUDA 13 for when we are currently inside a + # CUDA 12 image. + if (-not $OnlyCudaMajor -and $major -eq '13' -and $Cuda13Image) { + Invoke-Cuda13NestedBuild ` + -Cuda13Image $Cuda13Image ` + -PyVersion $PyVersion ` + -UseNinja $UseNinja + + continue + } + + # Perform a normal build for the current major version. This may + # be invoked from either an "outer" or inner "nested" image. + Build-CudaCcclWheel ` + -Major $major ` + -RepoRoot $RepoRoot ` + -PythonExe $PythonExe ` + -PipBaseConfigArgs $pipBaseConfigArgs ` + -UseNinja $UseNinja + } +} +finally { + Pop-Location +} + + +# Merge the two major‑version wheels (if both were built). This will fail if +# either wheel can't be found. This only runs on the outer (non-nested) +# container image. +if ($DoMerge) { + + $Cu12Wheel = Get-OnePathMatch ` + -Path (Join-Path $RepoRoot 'wheelhouse_cu12') ` + -Pattern '^cuda_cccl-.*\.cu12\.whl' ` + -File + + $Cu13Wheel = Get-OnePathMatch ` + -Path (Join-Path $RepoRoot 'wheelhouse_cu13') ` + -Pattern '^cuda_cccl-.*\.cu13\.whl' ` + -File + + Write-Host "Found CUDA 12 wheel: $Cu12Wheel" + Write-Host "Found CUDA 13 wheel: $Cu13Wheel" + + Write-Host 'Merging CUDA wheels...' + & $PythonExe -m pip install wheel | Write-Host + if ($LASTEXITCODE -ne 0) { + throw 'Failed to install wheel for merging' + } + + $WheelhouseMerged = Join-Path $RepoRoot 'wheelhouse_merged' + ${null} = New-Item -ItemType Directory -Path $WheelhouseMerged -Force + + $mergePy = Join-Path $RepoRoot 'python/cuda_cccl/merge_cuda_wheels.py' + & $PythonExe $mergePy $Cu12Wheel $Cu13Wheel --output-dir $WheelhouseMerged + if ($LASTEXITCODE -ne 0) { + throw 'Merging wheels failed' + } + + # Clean up the per‑major directories and move the merged wheel into the + # final location. + Get-ChildItem $Wheelhouse -Filter '*.whl' | + ForEach-Object { + Remove-Item -Force $_.FullName + } + $MergedWheel = Get-OnePathMatch ` + -Path $WheelhouseMerged ` + -Pattern '^cuda_cccl-.*\.whl' ` + -File + Move-Item -Force $MergedWheel $Wheelhouse + + Remove-Item $WheelhouseMerged -Recurse -Force -ErrorAction SilentlyContinue + Remove-Item (Join-Path $RepoRoot 'wheelhouse_cu12') ` + -Recurse -Force -ErrorAction SilentlyContinue + Remove-Item (Join-Path $RepoRoot 'wheelhouse_cu13') ` + -Recurse -Force -ErrorAction SilentlyContinue + + Write-Host 'Final wheels in wheelhouse:' + Get-ChildItem $Wheelhouse -Filter '*.whl' | + ForEach-Object { + Write-Host " - $($_.Name)" + } +} + +# If it turns out we need delvewheel, we'd handle it here, after the merging +# of wheels. The two DLLs that seem like they might be problematic are +# msvc140p.dll, and dbghelp.dll. The former comes from llvmlite, upon which +# we depend. Dbghelp.dll ships in C:\Windows\System32, but that will often +# be a much older version compared to the one used by Visual Studio. We only +# use one symbol from Dbghelp.dll: UnDecorateSymbolName, which is used by +# nvrtc. If we encounter weird issues with c.parallel jit compilation and +# nvrtc in the wild on Windows, an out-of-date Dbghelp.dll could possibly be +# the culprit. +# +# For now, though, it doesn't appear to be necessary. + +# Optionally upload the wheel artifact. +if ($env:GITHUB_ACTIONS -and -not $SkipUpload) { + Push-Location $RepoRoot + try { + Write-Host 'GITHUB_ACTIONS detected; uploading wheel artifact' + $wheelArtifactName = (& bash -lc "ci/util/workflow/get_wheel_artifact_name.sh").Trim() + if (-not $wheelArtifactName) { + throw 'Failed to resolve wheel artifact name' + } + Write-Host "Wheel artifact name: $wheelArtifactName" + + $uploadCmd = "ci/util/artifacts/upload.sh $wheelArtifactName 'wheelhouse/.*'" + & bash -lc $uploadCmd + if ($LASTEXITCODE -ne 0) { + throw 'Wheel artifact upload failed' + } + } + finally { + Pop-Location + } +} diff --git a/ci/windows/test_cuda_cccl_examples_python.ps1 b/ci/windows/test_cuda_cccl_examples_python.ps1 new file mode 100644 index 00000000000..0c108328822 --- /dev/null +++ b/ci/windows/test_cuda_cccl_examples_python.ps1 @@ -0,0 +1,27 @@ +Param( + [Parameter(Mandatory = $true)] + [Alias("py-version")] + [ValidatePattern("^\d+\.\d+$")] + [string]$PyVersion +) + +$ErrorActionPreference = "Stop" + +# Import shared helpers +Import-Module "$PSScriptRoot/build_common.psm1" +Import-Module "$PSScriptRoot/build_common_python.psm1" + +$python = Get-Python -Version $PyVersion +$cudaMajor = Get-CudaMajor + +$repoRoot = Get-RepoRoot + +${wheelPath} = Get-CudaCcclWheel +& $python -m pip install -U pip pytest pytest-xdist +& $python -m pip install "${wheelPath}[test-cu$cudaMajor]" + +Push-Location (Join-Path $repoRoot "python/cuda_cccl/tests") +try { + & $python -m pytest -n 6 test_examples.py +} +finally { Pop-Location } diff --git a/ci/windows/test_cuda_cccl_headers_python.ps1 b/ci/windows/test_cuda_cccl_headers_python.ps1 new file mode 100644 index 00000000000..04a6adacc28 --- /dev/null +++ b/ci/windows/test_cuda_cccl_headers_python.ps1 @@ -0,0 +1,27 @@ +Param( + [Parameter(Mandatory = $true)] + [Alias("py-version")] + [ValidatePattern("^\d+\.\d+$")] + [string]$PyVersion +) + +$ErrorActionPreference = "Stop" + +# Import shared helpers +Import-Module "$PSScriptRoot/build_common.psm1" +Import-Module "$PSScriptRoot/build_common_python.psm1" + +$python = Get-Python -Version $PyVersion +$cudaMajor = Get-CudaMajor + +$repoRoot = Get-RepoRoot + +${wheelPath} = Get-CudaCcclWheel +& $python -m pip install -U pip pytest pytest-xdist +& $python -m pip install "${wheelPath}[test-cu$cudaMajor]" + +Push-Location (Join-Path $repoRoot "python/cuda_cccl/tests") +try { + & $python -m pytest -n auto -v headers/ +} +finally { Pop-Location } diff --git a/ci/windows/test_cuda_compute_python.ps1 b/ci/windows/test_cuda_compute_python.ps1 new file mode 100644 index 00000000000..796d5128141 --- /dev/null +++ b/ci/windows/test_cuda_compute_python.ps1 @@ -0,0 +1,29 @@ +Param( + [Parameter(Mandatory = $true)] + [Alias("py-version")] + [ValidatePattern("^\d+\.\d+$")] + [string]$PyVersion +) + +$ErrorActionPreference = "Stop" + +# Import shared helpers +Import-Module "$PSScriptRoot/build_common.psm1" +Import-Module "$PSScriptRoot/build_common_python.psm1" + +$python = Get-Python -Version $PyVersion +$cudaMajor = Get-CudaMajor + +$repoRoot = Get-RepoRoot + +$wheelPath = Get-CudaCcclWheel + +& $python -m pip install -U pip pytest pytest-xdist +& $python -m pip install "$wheelPath[test-cu$cudaMajor]" + +Push-Location (Join-Path $repoRoot "python/cuda_cccl/tests") +try { + & $python -m pytest -n 6 -v compute/ -m "not large" + & $python -m pytest -n 0 -v compute/ -m "large" +} +finally { Pop-Location } diff --git a/ci/windows/test_cuda_coop_python.ps1 b/ci/windows/test_cuda_coop_python.ps1 new file mode 100644 index 00000000000..7eaa9c470b3 --- /dev/null +++ b/ci/windows/test_cuda_coop_python.ps1 @@ -0,0 +1,25 @@ +Param( + [Parameter(Mandatory = $true)] + [Alias("py-version")] + [ValidatePattern("^\d+\.\d+$")] + [string]$PyVersion +) + +$ErrorActionPreference = "Stop" + +# Import shared helpers +Import-Module "$PSScriptRoot/build_common.psm1" +Import-Module "$PSScriptRoot/build_common_python.psm1" + +$python = Get-Python -Version $PyVersion +$cudaMajor = Get-CudaMajor + +${wheelPath} = Get-CudaCcclWheel +& $python -m pip install -U pip pytest pytest-xdist +& $python -m pip install "${wheelPath}[test-cu$cudaMajor]" + +Push-Location (Join-Path (Get-RepoRoot) "python/cuda_cccl/tests") +try { + & $python -m pytest -n auto -v coop/ +} +finally { Pop-Location } diff --git a/pyproject.toml b/pyproject.toml index c287a1fa933..af5145543a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. [tool.ruff] target-version = "py310" @@ -19,6 +19,6 @@ extend-select = ["I"] skip = "./.git,./build,./CITATION.md" # ignore short words, and typename parameters like OffsetT ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" -ignore-words-list = "inout,imovable,optionN,aCount,quitted,Invocable,countr,unexpect,numer,euclidian,couldn,OffsetT,FromM,Collet" +ignore-words-list = "inout,imovable,optionN,aCount,quitted,Invocable,countr,unexpect,numer,euclidian,couldn,OffsetT,FromM,Collet,NotIn" builtin = "clear" quiet-level = 3 diff --git a/python/cuda_cccl/pyproject.toml b/python/cuda_cccl/pyproject.toml index f3d846d0a94..dfee7e1c395 100644 --- a/python/cuda_cccl/pyproject.toml +++ b/python/cuda_cccl/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "cuda-pathfinder>=1.2.3", "cuda-core", "numba-cuda>=0.20.0", + "typing_extensions", ] dynamic = ["version"] @@ -46,7 +47,6 @@ test-cu12 = [ "pytest", "pytest-xdist", "cupy-cuda12x", - "typing_extensions", "pytest-benchmark", ] test-cu13 = [ @@ -54,7 +54,6 @@ test-cu13 = [ "pytest", "pytest-xdist", "cupy-cuda13x", - "typing_extensions", "pytest-benchmark", ] diff --git a/python/cuda_cccl/tests/test_examples.py b/python/cuda_cccl/tests/test_examples.py index 6981f905001..3648027b99a 100644 --- a/python/cuda_cccl/tests/test_examples.py +++ b/python/cuda_cccl/tests/test_examples.py @@ -41,8 +41,9 @@ def discover_examples(): # Calculate the relative path from the tests directory rel_path = python_file.relative_to(tests_dir) - # Convert path to module name (e.g., "coop/examples/block/reduce.py" -> "coop.examples.block.reduce") - module_name = str(rel_path.with_suffix("")).replace("/", ".") + # Convert path to module name (OS-agnostic) + # Example: coop/examples/block/reduce.py -> coop.examples.block.reduce + module_name = ".".join(rel_path.with_suffix("").parts) # Extract category info for display parts = rel_path.parts From f6ba0c0f4bcae6014d872759dd57331cbfd6f94c Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Tue, 14 Oct 2025 07:12:53 -0700 Subject: [PATCH 03/10] Fetch tags to ensure correct version is generated for cuda-cccl. --- .github/actions/workflow-run-job-windows/action.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/actions/workflow-run-job-windows/action.yml b/.github/actions/workflow-run-job-windows/action.yml index 849673a2e35..e710f4fb4e1 100644 --- a/.github/actions/workflow-run-job-windows/action.yml +++ b/.github/actions/workflow-run-job-windows/action.yml @@ -30,6 +30,8 @@ runs: with: path: ${{github.event.repository.name}} persist-credentials: false + fetch-depth: 0 + fetch-tags: true - name: Define and log job details shell: bash --noprofile --norc -euo pipefail {0} From b47ca7c88df016822fa41452a81a08fc6a40cff6 Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Tue, 14 Oct 2025 07:59:31 -0700 Subject: [PATCH 04/10] Remove the CI override. --- ci/matrix.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 9d0fac63f61..00870b3ff87 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -21,7 +21,6 @@ workflows: # args: '--preset libcudacxx-cpp20 --lit-tests "cuda/utility/basic_any.pass.cpp"' } # override: - - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], cxx: ['msvc'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4'} pull_request: # Old CTK: Oldest/newest supported host compilers: From 18bd2b7078d6057e9a183fcbbdd4421823d9ef1c Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Tue, 14 Oct 2025 09:52:57 -0700 Subject: [PATCH 05/10] PR Feedback: remove duplicate --env COMMAND. --- .github/actions/workflow-run-job-windows/action.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/actions/workflow-run-job-windows/action.yml b/.github/actions/workflow-run-job-windows/action.yml index e710f4fb4e1..1e3d1044777 100644 --- a/.github/actions/workflow-run-job-windows/action.yml +++ b/.github/actions/workflow-run-job-windows/action.yml @@ -200,7 +200,6 @@ runs: --isolation=process \ ${{ env.ENABLE_GPU }} \ --env COMMAND='& ${{inputs.command}}' \ - --env COMMAND='& ${{inputs.command}}' \ --env "ARTIFACT_ARCHIVES=${{env.ARTIFACT_ARCHIVES}}" \ --env "ARTIFACT_UPLOAD_REGISTERY=${{env.ARTIFACT_UPLOAD_REGISTERY}}" \ --env "ARTIFACT_UPLOAD_STAGE=${{env.ARTIFACT_UPLOAD_STAGE}}" \ From 43e33671381d09dc37a8876a91e7cf05132fc0c0 Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Tue, 14 Oct 2025 17:02:13 -0700 Subject: [PATCH 06/10] Add 'msvc' to appropriate pr/pr-lite/nightly/weekly jobs. --- ci/matrix.yaml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 00870b3ff87..686889d4281 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -56,16 +56,16 @@ workflows: - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'} - {jobs: ['verify_codegen'], project: 'libcudacxx'} - # c.parallel -- pinned to gcc13 to match python - - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} + # c.parallel -- pinned to gcc13 on Linux to match python + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} # c.experimental.stf-- pinned to gcc13 to match python - {jobs: ['test'], project: 'cccl_c_stf', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: 'cccl_c_stf', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} - # Python -- pinned to gcc13 for consistency across CTK images - - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.13'], gpu: 'l4', cxx: 'gcc13'} - - {jobs: ['test'], project: 'python', ctk: ['12.0', '13.X'], py_version: ['3.10'], gpu: 'l4', cxx: 'gcc13'} - - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + # Python -- pinned to gcc13 on Linux for consistency across CTK images + - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', ctk: ['12.0', '13.X'], py_version: ['3.10'], gpu: 'l4', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']} # CCCL packaging: - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} @@ -106,8 +106,8 @@ workflows: # stdpar - {project: 'stdpar', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} # Python + support - - {project: 'python', jobs: ['test'], ctk: '13.X', py_version: '3.13', gpu: 'l4', cxx: 'gcc13'} - - {project: 'cccl_c_parallel', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080'], sm: 'gpu'} + - {project: 'python', jobs: ['test'], ctk: '13.X', py_version: '3.13', gpu: 'l4', cxx: ['gcc13', 'msvc']} + - {project: 'cccl_c_parallel', jobs: ['test'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080'], sm: 'gpu'} - {project: 'cccl_c_stf', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080'], sm: 'gpu'} # Packaging / install - {project: 'packaging', jobs: ['test'], ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} @@ -175,9 +175,9 @@ workflows: # c.experimental.stf -- pinned to gcc13 to match python - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} - # Python -- pinned to gcc13 for consistency across CTK images - - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} - - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + # Python -- pinned to gcc13 on Linux for consistency across CTK images + - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']} # CCCL packaging: - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} @@ -270,8 +270,8 @@ workflows: - {jobs: ['compute_sanitizer'], project: 'cub', std: 'max', gpu: 'rtxa6000', sm: 'gpu', cmake_options: '-DCMAKE_CUDA_FLAGS=-lineinfo'} python-wheels: - - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} - - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']} - {jobs: ['test'], project: 'python', cpu: 'arm64', ctk: ['12.X', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} From dcb59dcca24d69940d557e9de35f38e23564c1fd Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Wed, 15 Oct 2025 18:35:08 +0000 Subject: [PATCH 07/10] Update matrix. --- ci/matrix.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 686889d4281..37de86e0329 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -21,6 +21,8 @@ workflows: # args: '--preset libcudacxx-cpp20 --lit-tests "cuda/utility/basic_any.pass.cpp"' } # override: + - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['msvc']} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['msvc']} pull_request: # Old CTK: Oldest/newest supported host compilers: @@ -63,9 +65,9 @@ workflows: - {jobs: ['test'], project: 'cccl_c_stf', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: 'cccl_c_stf', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} # Python -- pinned to gcc13 on Linux for consistency across CTK images - - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']} - {jobs: ['test'], project: 'python', ctk: ['12.0', '13.X'], py_version: ['3.10'], gpu: 'l4', cxx: ['gcc13', 'msvc']} - - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} # CCCL packaging: - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} @@ -177,7 +179,7 @@ workflows: - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} # Python -- pinned to gcc13 on Linux for consistency across CTK images - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']} - - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} # CCCL packaging: - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} @@ -253,8 +255,8 @@ workflows: - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} # Python -- pinned to gcc13 for consistency across CTK images - - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} - - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']} # CCCL packaging: - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} From 997612e8b7003fa89d0cb5f41bc9cdb3fe8cc040 Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Thu, 16 Oct 2025 13:36:09 -0700 Subject: [PATCH 08/10] Update ci/matrix.yaml Co-authored-by: Allison Piper --- ci/matrix.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 37de86e0329..0af9e85fd12 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -60,7 +60,9 @@ workflows: - {jobs: ['verify_codegen'], project: 'libcudacxx'} # c.parallel -- pinned to gcc13 on Linux to match python - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} - - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} + # c.parallel -- pinned to gcc13 to match python + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} # c.experimental.stf-- pinned to gcc13 to match python - {jobs: ['test'], project: 'cccl_c_stf', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: 'cccl_c_stf', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} From 4cdb8e8adcb280e31441cb12c6008b3c8b2af966 Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Fri, 17 Oct 2025 09:05:30 -0700 Subject: [PATCH 09/10] Remove overrides from matrix.yaml. --- ci/matrix.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 0af9e85fd12..3a1b548d2c1 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -21,8 +21,6 @@ workflows: # args: '--preset libcudacxx-cpp20 --lit-tests "cuda/utility/basic_any.pass.cpp"' } # override: - - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['msvc']} - - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['msvc']} pull_request: # Old CTK: Oldest/newest supported host compilers: From ff2c504433d9b345b45618c9ad85c667e7f9b5b8 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Fri, 17 Oct 2025 18:44:20 +0000 Subject: [PATCH 10/10] Fixup matrix --- ci/matrix.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 3a1b548d2c1..78851db1c6e 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -56,8 +56,6 @@ workflows: - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'} - {jobs: ['verify_codegen'], project: 'libcudacxx'} - # c.parallel -- pinned to gcc13 on Linux to match python - - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} # c.parallel -- pinned to gcc13 to match python - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} @@ -108,9 +106,9 @@ workflows: # stdpar - {project: 'stdpar', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} # Python + support + - {project: 'cccl_c_parallel', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: 'rtx2080', sm: 'gpu'} + - {project: 'cccl_c_stf', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: 'rtx2080', sm: 'gpu'} - {project: 'python', jobs: ['test'], ctk: '13.X', py_version: '3.13', gpu: 'l4', cxx: ['gcc13', 'msvc']} - - {project: 'cccl_c_parallel', jobs: ['test'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080'], sm: 'gpu'} - - {project: 'cccl_c_stf', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080'], sm: 'gpu'} # Packaging / install - {project: 'packaging', jobs: ['test'], ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} - {project: 'packaging', jobs: ['install']}