Disable GenAI builds against CUDA 11.8 (#4173)

q10 · facebook-github-bot · commit a7246da26795 · 2025-05-22T13:18:41.000-07:00
Summary: X-link: facebookresearch/FBGEMM#1255 - Disable GenAI builds against CUDA 11.8 since it is no longr possible to support GenAI builds against CUDA 11.8.0 as of #4138 Pull Request resolved: #4173 Reviewed By: jiawenliu64 Differential Revision: D75229752 Pulled By: q10 fbshipit-source-id: e9626799d371ee2671f9062df1933d3caea65087
diff --git a/.github/workflows/fbgemm_gpu_ci_cuda.yml b/.github/workflows/fbgemm_gpu_ci_cuda.yml
@@ -64,18 +64,21 @@ jobs:
       BUILD_ENV: build_binary
       BUILD_TARGET: ${{ matrix.host-machine.build-target }}
       BUILD_VARIANT: cuda
-      BUILD_CUDA_VERSION: ${{ matrix.cuda-version }}
+      BUILD_CUDA_VERSION: ${{ matrix.host-machine.cuda-version }}
     continue-on-error: true
     strategy:
       # Don't fast-fail all the other builds if one of the them fails
       fail-fast: false
       matrix:
         host-machine: [
-          { arch: x86, instance: "linux.24xlarge", build-target: "default" },
-          { arch: x86, instance: "linux.8xlarge.memory", build-target: "genai" },
+          { arch: x86, instance: "linux.24xlarge", build-target: "default", cuda-version: "11.8.0" },
+          { arch: x86, instance: "linux.24xlarge", build-target: "default", cuda-version: "12.6.3" },
+          { arch: x86, instance: "linux.24xlarge", build-target: "default", cuda-version: "12.8.0" },
+          # GenAI is unable to support 11.8.0 anymore as of https://github.com/pytorch/FBGEMM/pull/4138
+          { arch: x86, instance: "linux.8xlarge.memory", build-target: "genai", cuda-version: "12.6.3" },
+          { arch: x86, instance: "linux.8xlarge.memory", build-target: "genai", cuda-version: "12.8.0" },
         ]
         python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
-        cuda-version: [ "11.8.0", "12.6.3", "12.8.0" ]
         compiler: [ "gcc", "clang" ]
 
     steps:
@@ -106,18 +109,18 @@ jobs:
       run: . $PRELUDE; install_build_tools $BUILD_ENV
 
     - name: Install CUDA
-      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
+      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.host-machine.cuda-version }}
 
     # Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
     - name: Install PyTorch Nightly
-      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cuda/${{ matrix.cuda-version }}
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cuda/${{ matrix.host-machine.cuda-version }}
 
     - name: Collect PyTorch Environment Info
       if: ${{ success() || failure() }}
       run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
 
     - name: Install cuDNN
-      run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.cuda-version }}
+      run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.host-machine.cuda-version }}
 
     - name: Prepare FBGEMM_GPU Build
       run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
@@ -128,7 +131,7 @@ jobs:
     - name: Upload Built Wheel as GHA Artifact
       uses: actions/upload-artifact@v4
       with:
-        name: fbgemm_${{ matrix.host-machine.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
+        name: fbgemm_${{ matrix.host-machine.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.host-machine.cuda-version }}.whl
         path: fbgemm_gpu/dist/*.whl
         if-no-files-found: error
 
@@ -147,7 +150,7 @@ jobs:
       BUILD_ENV: build_binary
       BUILD_TARGET: ${{ matrix.build-target }}
       BUILD_VARIANT: cuda
-      BUILD_CUDA_VERSION: ${{ matrix.cuda-version }}
+      BUILD_CUDA_VERSION: ${{ matrix.build.cuda-version }}
       ENFORCE_CUDA_DEVICE: 1
     strategy:
       fail-fast: false
@@ -158,9 +161,14 @@ jobs:
           # https://hud.pytorch.org/metrics
           # { arch: x86, instance: "linux.gcp.a100" },
         ]
-        build-target: [ "default", "genai" ]
+        build: [
+          { build-target: "default", cuda-version: "11.8.0" },
+          { build-target: "default", cuda-version: "12.6.3" },
+          { build-target: "default", cuda-version: "12.8.0" },
+          { build-target: "genai", cuda-version: "12.6.3" },
+          { build-target: "genai", cuda-version: "12.8.0" },
+        ]
         python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
-        cuda-version: [ "11.8.0", "12.6.3", "12.8.0" ]
         # Specify exactly ONE CUDA version for artifact publish
         cuda-version-publish: [ "12.6.3" ]
         compiler: [ "gcc", "clang" ]
@@ -177,7 +185,7 @@ jobs:
       # Cannot upgrade to actions/download-artifact@v4 yet because GLIBC on the instance is too old
       uses: actions/download-artifact@v4
       with:
-        name: fbgemm_${{ matrix.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
+        name: fbgemm_${{ matrix.build.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.build.cuda-version }}.whl
 
     # Use PyTorch test infrastructure action - https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml
     - name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
@@ -203,11 +211,11 @@ jobs:
       run: . $PRELUDE; install_cxx_compiler $BUILD_ENV gcc
 
     - name: Install CUDA
-      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
+      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.build.cuda-version }}
 
     # Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
     - name: Install PyTorch Nightly
-      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cuda/${{ matrix.cuda-version }}
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cuda/${{ matrix.build.cuda-version }}
 
     - name: Collect PyTorch Environment Info
       if: ${{ success() || failure() }}
@@ -224,7 +232,7 @@ jobs:
       run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
     - name: Push Wheel to PyPI
-      if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}
+      if: ${{ (github.event_name == 'schedule' && matrix.build.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.build.cuda-version == matrix.cuda-version-publish) }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
       run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl