vllm-project · ckadner · Mar 24, 2025 · Mar 27, 2025 · Mar 28, 2025 · Mar 28, 2025
@@ -0,0 +1,8 @@
+# exclude any files inside the .git folder to not invalidate docker layer caches
+.git
+
+# exclude any files that are not required in the build image
+.github
+examples
+tools
+*.md
@@ -1,29 +1,142 @@
 name: test-sypre
 
-on: pull_request
+on:
+  workflow_dispatch:
+  push:
+    paths-ignore:
+      - "**.md"
+  pull_request:
+    paths-ignore:
+      - "**.md"
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  TEST_IMAGE_NAME: "vllm-spyre"
 
 jobs:
-  test-spyre:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+    env:
+      CACHE_IMAGE: "ghcr.io/vllm-project/vllm-spyre:test-cache"
+      CACHE_PACKAGE_NAME: "vllm-spyre"
+      CACHE_REGISTRY: "ghcr.io"
+
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: "Setup Docker Buildx"
+        uses: docker/setup-buildx-action@v3
+
+      - name: "Log in to cache image container registry"
+        uses: docker/login-action@v3
+        if: github.event_name != 'pull_request'
+        with:
+          registry: ${{ env.CACHE_REGISTRY }}
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: "Set build cache target"
+        run: |
+          # For push to `main` (PR merged), push new cache image with all layers (cache-mode=max).
+          # For PR builds, use GitHub action cache which isolates cached layers by PR/branch
+          # to optimize builds for subsequent pushes to the same PR/branch.
+          # Do not set a cache-to image for PR builds to not overwrite the `main` cache image
+          # to not ping-pong cache images for two or more concurrent PRs.
+          # Do not push cache images for multiple branches to not exceed GitHub package
+          # usage and traffic limitations.
+          # NOTE 2024/03/24: GHA cache appears to have issues, cannot use `cache-to: gha,mode=min`
+          # if `cache-from: registry,...,mode=max` but `cache-to: gha,mode=max` it takes longer than
+          # uncached build and exhausts GHA cache size, so use cache `type=inline` (no external cache).
+          if [ "${{ github.event_name }}" == "pull_request" ]
+          then
+              #CACHE_TO="type=gha,mode=min"
+              CACHE_TO="type=inline"
+          else
+              CACHE_TO="type=registry,ref=${{ env.CACHE_IMAGE }},mode=max"
+          fi
+          echo "CACHE_TO=$CACHE_TO" >> "$GITHUB_ENV"
+
+      - name: "Build test image"
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.spyre
+          target: "spyre-tests"
+          tags: ${{ env.TEST_IMAGE_NAME }}
+          cache-from: |
+              type=gha
+              type=registry,ref=${{ env.CACHE_IMAGE }}
+          cache-to: ${{ env.CACHE_TO }}
+          outputs: type=docker,dest=${{ runner.temp }}/test_image.tar
+
+      - name: "Upload test image"
+        uses: actions/upload-artifact@v4
+        with:
+          name: "test-image"
+          path: ${{ runner.temp }}/test_image.tar
+          retention-days: 1
+
+      - name: "Cleanup old cache images"
+        uses: actions/delete-package-versions@v5
+        if: ${{ github.event_name == 'push' }}
+        with: 
+          package-name: ${{ env.CACHE_PACKAGE_NAME }}
+          package-type: container
+          delete-only-untagged-versions: true
+
+  test-spyre-v0:
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+    - name: "Checkout"
+      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+    - name: "Download test image"
+      uses: actions/download-artifact@v4
+      with:
+        name: "test-image"
+        path: ${{ runner.temp }}
+
+    - name: "Load test image"
+      run: |
+        docker load --input ${{ runner.temp }}/test_image.tar
+        docker image ls -a
+
+    - name: Run Spyre tests in docker container (V0 and eager)
+      run: |
+        docker run -i --rm --entrypoint /bin/bash ${{ env.TEST_IMAGE_NAME }} -c '''
+          cd vllm-spyre && \
+          python -m pytest --timeout=300  tests -v -k "V0 and eager"
+        '''
+
+  test-spyre-v1:
     runs-on: ubuntu-latest
+    needs: build
     steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-    - name: Build docker image
-      run: docker build . -t vllm-spyre -f Dockerfile.spyre
-    - name: Run Spyre tests within docker container
+    - name: "Checkout"
+      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+    - name: "Download test image"
+      uses: actions/download-artifact@v4
+      with:
+        name: "test-image"
+        path: ${{ runner.temp }}
+
+    - name: "Load test image"
+      run: |
+        docker load --input ${{ runner.temp }}/test_image.tar
+        docker image ls -a
+
+    - name: Run Spyre tests in docker container (V1 and eager)
       run: |
-        docker run -i --rm --entrypoint /bin/bash vllm-spyre -c '''
-          pip install pytest sentence-transformers pytest-timeout pytest-forked && \
-          python -c "from transformers import pipeline; pipeline(\"text-generation\", model=\"JackFram/llama-160m\")" && \
-          export VARIANT=$(ls /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/) && \
-          mkdir -p /models && \
-          ln -s /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/${VARIANT} /models/llama-194m && \
-          python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer(\"sentence-transformers/all-roberta-large-v1\")" && \
-          export VARIANT=$(ls /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/) && \
-          ln -s /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT} /models/all-roberta-large-v1 && \
-          export MASTER_PORT=12355 && \
-          export MASTER_ADDR=localhost && \
-          export DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding && \
+        docker run -i --rm --entrypoint /bin/bash ${{ env.TEST_IMAGE_NAME }} -c '''
           cd vllm-spyre && \
-          python -m pytest --timeout=300  tests -v -k "V0 and eager" && \
           python -m pytest --forked --timeout=300  tests -v -k "(V1- and eager) or test_sampling_metadata_in_input_batch"
         '''
@@ -1,21 +1,35 @@
-# Global Args #################################################################
+## Global Args #################################################################
 ARG BASE_UBI_IMAGE_TAG=9.4
 ARG PYTHON_VERSION=3.12
 
-# Base Layer ##################################################################
+
+## Base Layer ##################################################################
 FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
+
 ARG PYTHON_VERSION
 ENV PYTHON_VERSION=${PYTHON_VERSION}
 WORKDIR /workspace
 
-# Install some basic utilities ##################################################################
-RUN microdnf update -y && microdnf install -y \
-    python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel git vim gcc g++ kmod\
+# Install basic utilities
+RUN microdnf update -y \
+    && microdnf install -y \
+        python${PYTHON_VERSION}-devel \
+        python${PYTHON_VERSION}-pip \
+        python${PYTHON_VERSION}-wheel \
+        git \
+        vim \
+        gcc \
+        g++ \
+        kmod \
     && microdnf clean all
 RUN ln -sf $(which python${PYTHON_VERSION}) /usr/bin/python && \
     ln -sf $(which pip${PYTHON_VERSION}) /usr/bin/pip
 
-# Download and install vllm ###########################################################
+
+## vLLM Base ###################################################################
+FROM base as vllm-base
+
+# Download and install vllm
 RUN git clone --depth 1 https://github.com/vllm-project/vllm.git \
     && cd vllm \
     && git fetch --tags \
@@ -26,10 +40,43 @@ RUN git clone --depth 1 https://github.com/vllm-project/vllm.git \
     && pip install -r requirements/build.txt \
     && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.0 VLLM_TARGET_DEVICE=empty pip install --verbose . --no-build-isolation
 
-# Install vllm Spyre plugin ##################################################################
+
+## Spyre Base ##################################################################
+FROM vllm-base as spyre-base
+
+# Install vllm Spyre plugin
 RUN mkdir /workspace/vllm-spyre
 COPY . /workspace/vllm-spyre
 RUN cd /workspace/vllm-spyre && pip install -v -e .
 ENV VLLM_PLUGINS=spyre
 
-CMD ["/bin/bash"]
+
+## Spyre Tests #################################################################
+FROM spyre-base as spyre-tests
+
+# set environment variables to run tests
+ENV MASTER_ADDR=localhost \
+    MASTER_PORT=12355 \
+    DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding
+
+# Install test dependencies
+RUN pip install \
+        sentence-transformers \
+        pytest \
+        pytest-timeout \
+        pytest-forked
+
+# Download models
+RUN mkdir -p /models \
+    && python -c "from transformers import pipeline; pipeline('text-generation', model='JackFram/llama-160m')" \
+    && export VARIANT=$(ls /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/) \
+    && ln -s /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/${VARIANT} /models/llama-194m \
+    && python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-roberta-large-v1')" \
+    && export VARIANT=$(ls /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/) \
+    && ln -s /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT} /models/all-roberta-large-v1
+
+
+## Spyre Release ###############################################################
+FROM spyre-base as spyre-release
+
+CMD ["/bin/bash"]