mattf
diff --git a/‎.github/actions/setup-vllm/action.yml‎
Lines changed: 27 additions & 0 deletions b/‎.github/actions/setup-vllm/action.yml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎.github/dependabot.yml‎
Lines changed: 0 additions & 2 deletions b/‎.github/dependabot.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/README.md‎
Lines changed: 22 additions & 0 deletions b/‎.github/workflows/README.md‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎.github/workflows/changelog.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/changelog.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/coverage-badge.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/coverage-badge.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/install-script-ci.yml‎
Lines changed: 16 additions & 4 deletions b/‎.github/workflows/install-script-ci.yml‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎.github/workflows/integration-auth-tests.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/integration-auth-tests.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/integration-sql-store-tests.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/integration-sql-store-tests.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/integration-tests.yml‎
Lines changed: 48 additions & 14 deletions b/‎.github/workflows/integration-tests.yml‎
Lines changed: 48 additions & 14 deletions
diff --git a/‎.github/workflows/integration-vector-io-tests.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/integration-vector-io-tests.yml‎
Lines changed: 3 additions & 1 deletion
@@ -0,0 +1,27 @@
+name: Setup VLLM
+description: Start VLLM
+runs:
+  using: "composite"
+  steps:
+    - name: Start VLLM
+      shell: bash
+      run: |
+        # Start vllm container
+        docker run -d \
+          --name vllm \
+          -p 8000:8000 \
+          --privileged=true \
+          quay.io/higginsd/vllm-cpu:65393ee064 \
+          --host 0.0.0.0 \
+          --port 8000 \
+          --enable-auto-tool-choice \
+          --tool-call-parser llama3_json \
+          --model /root/.cache/Llama-3.2-1B-Instruct \
+          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+
+          # Wait for vllm to be ready
+          echo "Waiting for vllm to be ready..."
+          timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
+            echo "Waiting for vllm..."
+            sleep 5
+          done'
@@ -14,8 +14,6 @@ updates:
     schedule:
       interval: "weekly"
       day: "saturday"
-    # ignore all non-security updates: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit
-    open-pull-requests-limit: 0
     labels:
       - type/dependencies
       - python
 
@@ -0,0 +1,22 @@
+# Llama Stack CI
+
+Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
+
+| Name | File | Purpose |
+| ---- | ---- | ------- |
+| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
+| Coverage Badge | [coverage-badge.yml](coverage-badge.yml) | Creates PR for updating the code coverage badge |
+| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
+| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
+| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
+| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite with Ollama |
+| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
+| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
+| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
+| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
+| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
+| Close stale issues and PRs | [stale_bot.yml](stale_bot.yml) | Run the Stale Bot action |
+| Test External Providers Installed via Module | [test-external-provider-module.yml](test-external-provider-module.yml) | Test External Provider installation via Python module |
+| Test External API and Providers | [test-external.yml](test-external.yml) | Test the External API and Provider mechanisms |
+| Unit Tests | [unit-tests.yml](unit-tests.yml) | Run the unit test suite |
+| Update ReadTheDocs | [update-readthedocs.yml](update-readthedocs.yml) | Update the Llama Stack ReadTheDocs site |
@@ -1,5 +1,7 @@
 name: Update Changelog
 
+run-name: Creates PR for updating the CHANGELOG.md
+
 on:
   release:
     types: [published, unpublished, created, edited, deleted, released]
 
@@ -1,5 +1,7 @@
 name: Coverage Badge
 
+run-name: Creates PR for updating the code coverage badge
+
 on:
   push:
     branches: [ main ]
@@ -15,6 +17,9 @@ on:
 
 jobs:
   unit-tests:
+    permissions:
+      contents: write  # for peter-evans/create-pull-request to create branch
+      pull-requests: write  # for peter-evans/create-pull-request to create a PR
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
 
@@ -1,5 +1,7 @@
 name: Installer CI
 
+run-name: Test the installation script
+
 on:
   pull_request:
     paths:
@@ -17,10 +19,20 @@ jobs:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
       - name: Run ShellCheck on install.sh
         run: shellcheck scripts/install.sh
-  smoke-test:
-    needs: lint
+  smoke-test-on-dev:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+
+      - name: Build a single provider
+        run: |
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template starter --image-type container --image-name test
+
       - name: Run installer end-to-end
-        run: ./scripts/install.sh
+        run: |
+          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
+          ./scripts/install.sh --image $IMAGE_ID
@@ -1,5 +1,7 @@
 name: Integration Auth Tests
 
+run-name: Run the integration test suite with Kubernetes authentication
+
 on:
   push:
     branches: [ main ]
 
@@ -1,5 +1,7 @@
 name: SqlStore Integration Tests
 
+run-name: Run the integration test suite with SqlStore
+
 on:
   push:
     branches: [ main ]
 
@@ -1,5 +1,7 @@
 name: Integration Tests
 
+run-name: Run the integration test suite with Ollama
+
 on:
   push:
     branches: [ main ]
@@ -14,13 +16,19 @@ on:
       - '.github/workflows/integration-tests.yml' # This workflow
       - '.github/actions/setup-ollama/action.yml'
   schedule:
-    - cron: '0 0 * * *'  # Daily at 12 AM UTC
+    # If changing the cron schedule, update the provider in the test-matrix job
+    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
+    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
   workflow_dispatch:
     inputs:
       test-all-client-versions:
         description: 'Test against both the latest and published versions'
         type: boolean
         default: false
+      test-provider:
+        description: 'Test against a specific provider'
+        type: string
+        default: 'ollama'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -53,8 +61,17 @@ jobs:
       matrix:
         test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
         client-type: [library, server]
+        # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
+        provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
         python-version: ["3.12", "3.13"]
-        client-version: ${{ (github.event_name == 'schedule' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        exclude: # TODO: look into why these tests are failing and fix them
+          - provider: vllm
+            test-type: safety
+          - provider: vllm
+            test-type: post_training
+          - provider: vllm
+            test-type: tool_runtime
 
     steps:
       - name: Checkout repository
@@ -67,8 +84,13 @@ jobs:
           client-version: ${{ matrix.client-version }}
 
       - name: Setup ollama
+        if: ${{ matrix.provider == 'ollama' }}
         uses: ./.github/actions/setup-ollama
 
+      - name: Setup vllm
+        if: ${{ matrix.provider == 'vllm' }}
+        uses: ./.github/actions/setup-vllm
+
       - name: Build Llama Stack
         run: |
           uv run llama stack build --template ci-tests --image-type venv
@@ -81,10 +103,6 @@ jobs:
 
       - name: Run Integration Tests
         env:
-          OLLAMA_INFERENCE_MODEL: "llama3.2:3b-instruct-fp16" # for server tests
-          ENABLE_OLLAMA: "ollama" # for server tests
-          OLLAMA_URL: "http://0.0.0.0:11434"
-          SAFETY_MODEL: "llama-guard3:1b"
           LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
         # Use 'shell' to get pipefail behavior
         # https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
@@ -96,12 +114,27 @@ jobs:
           else
             stack_config="server:ci-tests"
           fi
+
+          EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
+          if [ "${{ matrix.provider }}" == "ollama" ]; then
+            export OLLAMA_URL="http://0.0.0.0:11434"
+            export TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16
+            export SAFETY_MODEL="ollama/llama-guard3:1b"
+            EXTRA_PARAMS="--safety-shield=llama-guard"
+          else
+            export VLLM_URL="http://localhost:8000/v1"
+            export TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct
+            # TODO: remove the not(test_inference_store_tool_calls) once we can get the tool called consistently
+            EXTRA_PARAMS=
+            EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
+          fi
+
+
           uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
-            -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
-            --text-model="ollama/llama3.2:3b-instruct-fp16" \
-            --embedding-model=all-MiniLM-L6-v2 \
-            --safety-shield=$SAFETY_MODEL \
-            --color=yes \
+            -k "not( ${EXCLUDE_TESTS} )" \
+            --text-model=$TEXT_MODEL \
+            --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
+            --color=yes ${EXTRA_PARAMS} \
             --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
 
       - name: Check Storage and Memory Available After Tests
@@ -110,16 +143,17 @@ jobs:
           free -h
           df -h
 
-      - name: Write ollama logs to file
+      - name: Write inference logs to file
         if: ${{ always() }}
         run: |
-          sudo docker logs ollama > ollama.log
+          sudo docker logs ollama > ollama.log || true
+          sudo docker logs vllm > vllm.log || true
 
       - name: Upload all logs to artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
-          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
+          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
           path: |
             *.log
           retention-days: 1
@@ -1,5 +1,7 @@
 name: Vector IO Integration Tests
 
+run-name: Run the integration test suite with various VectorIO providers
+
 on:
   push:
     branches: [ main ]
@@ -114,7 +116,7 @@ jobs:
         run: |
           uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
             tests/integration/vector_io \
-            --embedding-model all-MiniLM-L6-v2
+            --embedding-model sentence-transformers/all-MiniLM-L6-v2
 
       - name: Check Storage and Memory Available After Tests
         if: ${{ always() }}