ci: integrate vLLM inference tests with GitHub Actions workflows

derekhiggins · derekhiggins · commit 0e71d6526ec4 · 2025-09-11T12:46:32.000+01:00
Add vLLM provider support to integration test CI workflows alongside
existing Ollama support. Configure provider-specific test execution
where vLLM runs only inference specific tests (excluding vision tests) while
Ollama continues to run the full test suite.

This enables comprehensive CI testing of both inference providers but
keeps the vLLM footprint small, this can be expanded later if it proves
to not be too disruptive.

Signed-off-by: Derek Higgins &lt;derekh@redhat.com&gt;
diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
@@ -68,7 +68,8 @@ runs:
           echo "New recordings detected, committing and pushing"
           git add tests/integration/recordings/
 
-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
+
           git fetch origin ${{ github.ref_name }}
           git rebase origin/${{ github.ref_name }}
           echo "Rebased successfully"
@@ -82,7 +83,8 @@ runs:
       if: ${{ always() }}
       shell: bash
       run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
 
     - name: Upload logs
       if: ${{ always() }}
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -21,7 +21,6 @@ on:
   schedule:
     # If changing the cron schedule, update the provider in the test-matrix job
     - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
   workflow_dispatch:
     inputs:
       test-all-client-versions:
@@ -39,7 +38,6 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-
   run-replay-mode-tests:
     runs-on: ubuntu-latest
     name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
@@ -48,12 +46,19 @@ jobs:
       fail-fast: false
       matrix:
         client-type: [library, server]
-        # Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
-        setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
-        suite: [base, vision]
+        include:
+          # ollama setup runs both base and vision suites
+          - setup: ollama
+            suite: base
+          - setup: ollama
+            suite: vision
+          # vllm setup runs only base-vllm-subset suite
+          - setup: vllm
+            suite: base-vllm-subset
+
 
     steps:
       - name: Checkout repository
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
@@ -106,6 +106,11 @@ class Setup(BaseModel):
         roots=base_roots,
         default_setup="ollama",
     ),
+    "base-vllm-subset": Suite(
+        name="base-vllm-subset",
+        roots=["tests/integration/inference"],
+        default_setup="vllm",
+    ),
     "responses": Suite(
         name="responses",
         roots=["tests/integration/responses"],