vllm-project · joerunde · Apr 9, 2025 · Apr 4, 2025 · Apr 8, 2025 · Apr 8, 2025
@@ -1,29 +1,119 @@
 name: test-sypre
 
-on: pull_request
+on:
+  # Don't use pull_request.paths filter since this workflow is required for
+  # all pull requests on main irrespective of file type or location
+  pull_request:
+    branches:
+      - main
+  push:
+    branches:
+      - main
+    paths:
+      - "tests/**/*.py"
+      - "vllm_spyre/**/*.py"
+      - pyproject.toml
+      - .github/workflows/test-spyre.yml
+  workflow_dispatch:
+
+env:
+  # force output to be colored for non-tty GHA runner shell
+  FORCE_COLOR: "1"
+  # prefer index for torch cpu version and match pip's extra index policy
+  UV_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
+  UV_INDEX_STRATEGY: "unsafe-best-match"
+  # facilitate testing by building vLLM for CPU when needed
+  VLLM_CPU_DISABLE_AVX512: "true"
+  VLLM_TARGET_DEVICE: "cpu"
+  VLLM_PLUGINS: "spyre"
+  VLLM_SPYRE_TEST_MODEL_DIR: "${{ github.workspace }}/models"
+  HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
   test-spyre:
-    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ["ubuntu-latest"]
+        python_version: ["3.12"]
+        vllm_version:
+          - name: "vLLM:v0.8.0"
+            repo: "git+https://github.com/vllm-project/vllm --tag v0.8.0"
+          - name: "vLLM:main"
+            repo: "git+https://github.com/vllm-project/vllm --branch main"
+          - name: "ODH:main"
+            repo: "git+https://github.com/opendatahub-io/vllm --branch main"
+        test_suite:
+          - name: "V0"
+            tests: "V0 and eager"
+            flags: "--timeout=300"
+          - name: "V1"
+            tests: "(V1- and eager) or test_sampling_metadata_in_input_batch"
+            flags: "--timeout=300 --forked"
+        exclude:
+          - vllm_version: { name: "vLLM:main" }
+            test_suite:   { name: "V1" }
+          - vllm_version: { name: "ODH:main" }
+            test_suite:   { name: "V1" }
+
+    name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})"
+
     steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-    - name: Build docker image
-      run: docker build . -t vllm-spyre -f Dockerfile.spyre
-    - name: Run Spyre tests within docker container
-      run: |
-        docker run -i --rm --entrypoint /bin/bash vllm-spyre -c '''
-          source vllm-spyre/.venv/bin/activate && \
-          python -c "from transformers import pipeline; pipeline(\"text-generation\", model=\"JackFram/llama-160m\")" && \
-          export VARIANT=$(ls /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/) && \
-          mkdir -p /models && \
-          ln -s /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/${VARIANT} /models/llama-194m && \
-          python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer(\"sentence-transformers/all-roberta-large-v1\")" && \
-          export VARIANT=$(ls /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/) && \
-          ln -s /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT} /models/all-roberta-large-v1 && \
-          export MASTER_PORT=12355 && \
-          export MASTER_ADDR=localhost && \
-          export DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding && \
-          cd vllm-spyre && \
-          python -m pytest --timeout=300  tests -v -k "V0 and eager" && \
-          python -m pytest --forked --timeout=300  tests -v -k "(V1- and eager) or test_sampling_metadata_in_input_batch"
-        '''
+      - name: "Checkout"
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: "Install PyTorch"
+        run: |
+          pip install torch=="2.5.1+cpu" --index-url https://download.pytorch.org/whl/cpu
+
+      - name: "Install uv"
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "latest"
+          python-version: ${{ matrix.python_version }}
+          enable-cache: true
+          ignore-nothing-to-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+
+      - name: "Install vLLM"
+        env:
+          VLLM_TARGET_DEVICE: empty
+        run: |
+          # Install markupsafe from PyPI, Torch CPU index only has wheels for Python 3.13
+          uv add markupsafe --index force_pypi_index=https://pypi.org/simple
+          uv add ${{ matrix.vllm_version.repo }}
+          uv venv .venv --system-site-packages
+          source .venv/bin/activate
+          uv pip install -v -e .
+          uv sync --frozen --group dev
+
+      - name: "Download models"
+        run: |
+          mkdir -p "${VLLM_SPYRE_TEST_MODEL_DIR}"
+          
+          python -c "from transformers import pipeline; pipeline(\"text-generation\", model=\"JackFram/llama-160m\")"
+          VARIANT=$(ls "${HF_HUB_CACHE}/models--JackFram--llama-160m/snapshots/")
+          ln -s "${HF_HUB_CACHE}/models--JackFram--llama-160m/snapshots/${VARIANT}" "${VLLM_SPYRE_TEST_MODEL_DIR}/llama-194m"
+          
+          python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer(\"sentence-transformers/all-roberta-large-v1\")"
+          VARIANT=$(ls "${HF_HUB_CACHE}/models--sentence-transformers--all-roberta-large-v1/snapshots/")
+          ln -s "${HF_HUB_CACHE}/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT}" "${VLLM_SPYRE_TEST_MODEL_DIR}/all-roberta-large-v1"
+
+      - name: "Run tests"
+        env:
+          MASTER_PORT: 12355
+          MASTER_ADDR: localhost
+          DISTRIBUTED_STRATEGY_IGNORE_MODULES: WordEmbedding
+        run: |
+          source .venv/bin/activate
+          uv run pytest ${{ matrix.test_suite.flags }} \
+            tests -v -k "${{ matrix.test_suite.tests }}"
@@ -102,6 +102,7 @@ use_parentheses = true
 skip_gitignore = true
 
 [tool.pytest.ini_options]
+pythonpath = ["."]
 markers = [
     "skip_global_cleanup",
     "core_model: enable this model test in each PR instead of only nightly",