vllm-project · joerunde · Jul 31, 2025 · Jul 28, 2025 · Jul 28, 2025 · Jul 28, 2025
@@ -48,8 +48,21 @@ jobs:
             markers: "cpu and cb"
             flags: "--timeout=300"
           - name: "worker and utils"
-            markers: "not e2e"
+            markers: "cpu and not e2e"
             flags: "--timeout=300"
+          - name: "compatibility"
+            markers: "compat"
+            flags: "--timeout=300"
+        include:
+          - vllm_version:
+              name: "vLLM:lowest"
+              repo: "git+https://github.com/vllm-project/vllm --tag v0.9.2"
+            test_suite:
+              name: "backward compat"
+              markers: "compat or (cpu and basic)"
+              flags: "--timeout=300"
+            os: "ubuntu-latest"
+            python_version: "3.12"
 
     name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})"
 
@@ -90,6 +103,7 @@ jobs:
         if: (steps.changed-src-files.outputs.any_changed == 'true' && matrix.vllm_version.repo)
         run: |
           uv add ${{ matrix.vllm_version.repo }}
+          echo "TEST_VLLM_VERSION=${{ matrix.vllm_version.name }}" >> "$GITHUB_ENV"
 
       - name: "Install vLLM with Spyre plugin"
         if: steps.changed-src-files.outputs.any_changed == 'true'

@@ -122,8 +122,10 @@ asyncio_default_fixture_loop_scope = "function"
 markers = [
     "skip_global_cleanup",
     "e2e: Tests using end-to-end engine spin-up",
+    "basic: Basic correctness tests",
     "cb: Continuous batching tests",
     "cpu: Tests using CPU (i.e. eager) backend",
+    "compat: backward compatibility tests",
     "spyre: Tests using Spyre hardware backend",
     "decoder: Tests for decoder models",
     "embedding: Tests for embedding models",

@@ -32,7 +32,7 @@
 def pytest_collection_modifyitems(config, items):
     """ Mark all tests in e2e directory"""
     for item in items:
-        if "tests/e2e" in str(item.nodeid):
+        if "e2e" in str(item.nodeid):
             item.add_marker(pytest.mark.e2e)
 
 

@@ -12,6 +12,8 @@
                         st_embeddings)
 from vllm import LLM
 
+pytestmark = pytest.mark.basic
+
 
 @pytest.mark.parametrize("model", get_spyre_model_list(isEmbeddings=True))
 @pytest.mark.parametrize("warmup_shape",

@@ -2,6 +2,8 @@
 import pytest
 from spyre_util import get_spyre_backend_list, get_spyre_model_list
 
+pytestmark = pytest.mark.basic
+
 
 @pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("tp_size", [

@@ -3,6 +3,7 @@
 
 
 @pytest.mark.utils
+@pytest.mark.cpu
 def test_get_spyre_backend_list(monkeypatch):
     '''
     Ensure we return the backend list correctly

@@ -3,6 +3,7 @@
 
 
 @pytest.mark.utils
+@pytest.mark.cpu
 def test_get_spyre_model_list(monkeypatch):
     '''
     Tests returning the expected models

@@ -0,0 +1,83 @@
+import os
+
+import pytest
+
+pytestmark = pytest.mark.compat
+
+VLLM_VERSION = os.getenv("TEST_VLLM_VERSION", "default")
+
+
+@pytest.mark.cpu
+def test_vllm_bert_support():
+    '''
+    Test if the vllm version under test already has Bert support for V1
+    '''
+
+    from vllm.model_executor.models.bert import BertEmbeddingModel
+
+    bert_supports_v0_only = getattr(BertEmbeddingModel, "supports_v0_only",
+                                    False)
+
+    if VLLM_VERSION == "vLLM:main":
+        assert not bert_supports_v0_only
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert bert_supports_v0_only, (
+            "The lowest supported vLLM version already"
+            "supports Bert in V1. Remove the compatibility workarounds.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/277
+
+
+@pytest.mark.cpu
+def test_model_config_task():
+
+    from vllm.engine.arg_utils import EngineArgs
+
+    vllm_config = EngineArgs().create_engine_config()
+    model_config = vllm_config.model_config
+
+    task = getattr(model_config, "task", None)
+
+    if VLLM_VERSION == "vLLM:main":
+        assert task is None
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert task is not None, (
+            "The lowest supported vLLM version already"
+            "switched to the new definition of runners and task.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/341
+
+
+@pytest.mark.cpu
+def test_has_tasks():
+
+    try:
+        from vllm import tasks  # noqa
+        has_tasks = True
+    except Exception:
+        has_tasks = False
+
+    if VLLM_VERSION == "vLLM:main":
+        assert has_tasks
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert not has_tasks, (
+            "The lowest supported vLLM version already"
+            "switched to the new definition of runners and task.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/338
+
+
+@pytest.mark.cpu
+def test_pooler_from_config():
+
+    from vllm.model_executor.layers.pooler import Pooler
+    has_from_config = hasattr(Pooler, "from_config_with_defaults")
+
+    if VLLM_VERSION == "vLLM:main":
+        assert not has_from_config
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert has_from_config, (
+            "The lowest supported vLLM version already"
+            "switched to the new definition of runners and task.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/338
@@ -211,6 +211,7 @@ def same(t1: Optional[torch.Tensor], t2: Optional[torch.Tensor]) -> bool:
         sampling_metadata.bad_words_token_ids
 
 
+@pytest.mark.cpu
 @pytest.mark.v1
 @pytest.mark.worker
 @pytest.mark.parametrize("batch_size", [1, 2, 32, 64])