vllm-project · sducouedic · Jul 18, 2025 · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025
@@ -18,7 +18,10 @@ nav:
     - Developer Guide:
       - Contributing: contributing/README.md
       - Continuous Batching:
-        - Testing: contributing/continuous_batching/tests.md
+        - Tests:
+          - End-to-End: contributing/continuous_batching/tests/e2e_testing.md
+          - Step-by-Step: contributing/continuous_batching/tests/scheduling_inference_steps.md
+          - Other Tests: contributing/continuous_batching/tests/other_tests.md
 
   - Getting Started:
     - Installation: getting_started/installation.md
@@ -34,4 +37,7 @@ nav:
   - Developer Guide:
     - Contributing: contributing/README.md
     - Continuous Batching:
-      - Testing: contributing/continuous_batching/tests.md
+      - Tests:
+        - End-to-End: contributing/continuous_batching/tests/e2e_testing.md
+        - Step-by-Step: contributing/continuous_batching/tests/scheduling_inference_steps.md
+        - Other Tests: contributing/continuous_batching/tests/other_tests.md
diff --git a/docs/contributing/continuous_batching/tests/e2e_testing.md b/docs/contributing/continuous_batching/tests/e2e_testing.md
@@ -0,0 +1,10 @@
+# End output correctness tests
+
+!!! note
+    Unless otherwise specified, all the continuous batching tests are running with `max_model_len=256`
+
+::: tests.e2e.test_spyre_basic
+    options:
+        members:
+        - test_output
+        - test_batch_handling
@@ -0,0 +1,6 @@
+# Other tests
+
+!!! note
+    Unless otherwise specified, all the continuous batching tests are running with `max_model_len=256`
+
+::: tests.e2e.test_spyre_cb
diff --git a/docs/contributing/continuous_batching/tests/scheduling_inference_steps.md b/docs/contributing/continuous_batching/tests/scheduling_inference_steps.md
@@ -0,0 +1,9 @@
+# Scheduler inference steps tests
+
+!!! note
+    Unless otherwise specified, all the continuous batching tests are running with `max_model_len=256`
+
+!!! warning
+    End output correctness is not verified in those tests (TODO should we? maybe for some of them?)
+
+::: tests.e2e.test_spyre_cb_inference_steps
@@ -1,5 +1,10 @@
 """Verification of vLLM output by comparing with HF
 
+If errors occur, these can be analyzed/debugged by setting
+`DISABLE_ASSERTS = True` in spyre_util.py and by rerunning the
+test using `pytest --capture=no tests/spyre/test_spyre_basic.py`
+After debugging, `DISABLE_ASSERTS` should be reset to `False`.
+
 Run `python -m pytest tests/e2e/test_spyre_basic.py`.
 """
 
@@ -45,11 +50,12 @@ def test_output(
     The same prompts are also input to HF. The generated output
     including text, token ids, and logprobs, is verified to be
     identical for vLLM and HF.
-
-    If errors occur, these can be analyzed/debugged by setting
-    'DISABLE_ASSERTS = True' in spyre_util.py and by rerunning the
-    test using 'pytest --capture=no tests/spyre/test_spyre_basic.py'
-    After debugging, DISABLE_ASSERTS should be reset to 'False'.
+
+    Configuration for CB - parameters are combinatorial:
+        * max_num_seqs: 4
+        * tensor parallelism: 1, 2, 4, 8
+        * number of prompts: 4 (Chicken soup prompts)
+        * max tokens: 20 (same for all the prompts)
     '''
 
     skip_unsupported_tp_size(tp_size, backend)
@@ -156,7 +162,13 @@ def test_batch_handling(model: str, backend: str, cb: int,
                         monkeypatch: pytest.MonkeyPatch):
     """Test that the spyre worker correctly handles
     continuous batches of requests that
-    finish after different numbers of forward passes"""
+    finish after different numbers of forward passes
+
+    Configuration for CB - parameters are combinatorial:
+        * max_num_seqs: 2
+        * number of prompts: 4 (Chicken soup prompts)
+        * max tokens: [5, 20, 10, 5]
+    """
 
     prompts = get_chicken_soup_prompts(4)