Skip to content

Commit 4a86784

Browse files
committed
fix conflicts
Signed-off-by: Sophie du Couédic <[email protected]>
2 parents af7f3c7 + 9511982 commit 4a86784

16 files changed

+70
-169
lines changed

.github/workflows/test.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,17 @@ jobs:
3939
- name: "vLLM:main"
4040
repo: "git+https://github.com/vllm-project/vllm --branch main"
4141
test_suite:
42-
- name: "V0-e2e"
43-
markers: "v0 and cpu and e2e"
44-
flags: "--timeout=300"
45-
- name: "V1-e2e"
46-
markers: "v1 and cpu and e2e and not cb"
42+
- name: "static batching"
43+
markers: "cpu and decoder and not cb"
44+
flags: "--timeout=300 --forked"
45+
- name: "embedding"
46+
markers: "cpu and embedding"
4747
flags: "--timeout=300 --forked"
48-
- name: "V1-cb"
49-
markers: "v1 and cpu and cb"
48+
- name: "continuous batching"
49+
markers: "cpu and cb"
5050
flags: "--timeout=300 --forked"
51-
- name: "V1-worker and utils"
52-
markers: "v1 and not e2e or utils"
51+
- name: "worker and utils"
52+
markers: "not e2e"
5353
flags: "--timeout=300"
5454

5555
name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})"

docs/contributing/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ uv pip install --group dev
9292
Now, you can run the tests:
9393

9494
```sh
95-
python -m pytest -v -x tests -m "v1 and cpu and e2e"
95+
python -m pytest -v -x tests -m "cpu and e2e"
9696
```
9797

9898
Here is a list of `pytest` markers you can use to filter them:

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,6 @@ pythonpath = ["."]
121121
markers = [
122122
"skip_global_cleanup",
123123
"e2e: Tests using end-to-end engine spin-up",
124-
"v0: Tests using vLLM v0 engine",
125-
"v1: Tests using vLLM v1 engine",
126124
"cb: Continuous batching tests",
127125
"cpu: Tests using CPU (i.e. eager) backend",
128126
"spyre: Tests using Spyre hardware backend",

tests/conftest.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def remote_openai_server(request):
6868
model = params['model']
6969
warmup_shape = params['warmup_shape']
7070
backend = params['backend']
71-
vllm_version = params['vllm_version']
7271
except KeyError as e:
7372
raise pytest.UsageError(
7473
"Error setting up remote_openai_server params") from e
@@ -79,14 +78,13 @@ def remote_openai_server(request):
7978
warmup_prompt_length = [t[0] for t in warmup_shape]
8079
warmup_new_tokens = [t[1] for t in warmup_shape]
8180
warmup_batch_size = [t[2] for t in warmup_shape]
82-
v1_flag = "1" if vllm_version == "V1" else "0"
8381
env_dict = {
8482
"VLLM_SPYRE_WARMUP_PROMPT_LENS":
8583
','.join(map(str, warmup_prompt_length)),
8684
"VLLM_SPYRE_WARMUP_NEW_TOKENS": ','.join(map(str, warmup_new_tokens)),
8785
"VLLM_SPYRE_WARMUP_BATCH_SIZES": ','.join(map(str, warmup_batch_size)),
8886
"VLLM_SPYRE_DYNAMO_BACKEND": backend,
89-
"VLLM_USE_V1": v1_flag
87+
"VLLM_USE_V1": "1"
9088
}
9189

9290
# Add extra server args if present in test

tests/e2e/test_spyre_basic.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""
55

66
import pytest
7-
from spyre_util import (VLLM_VERSIONS, compare_results, create_random_request,
7+
from spyre_util import (compare_results, create_random_request,
88
generate_hf_output, generate_spyre_vllm_output,
99
get_spyre_backend_list, get_spyre_model_list)
1010
from vllm import EngineArgs, SamplingParams
@@ -33,13 +33,11 @@
3333
"warmup_shape", [(64, 20, 4), (64, 20, 8), (128, 20, 4),
3434
(128, 20, 8)]) # (prompt_length/new_tokens/batch_size)
3535
@pytest.mark.parametrize("backend", get_spyre_backend_list())
36-
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
3736
def test_output(
3837
model: str,
3938
prompts: list[str],
4039
warmup_shape: tuple[int, int, int],
4140
backend: str,
42-
vllm_version: str,
4341
monkeypatch: pytest.MonkeyPatch,
4442
) -> None:
4543
'''
@@ -72,7 +70,6 @@ def test_output(
7270
sampling_params=vllm_sampling_params,
7371
tensor_parallel_size=1,
7472
backend=backend,
75-
vllm_version=vllm_version,
7673
monkeypatch=monkeypatch)
7774

7875
hf_results = generate_hf_output(model=model,
@@ -96,13 +93,11 @@ def test_output(
9693
@pytest.mark.parametrize(
9794
"warmup_shape", [(64, 20, 4)]) # (prompt_length/new_tokens/batch_size)
9895
@pytest.mark.parametrize("backend", ["sendnn_decoder"])
99-
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
10096
def test_output_sendnn_decoder(
10197
model: str,
10298
prompts: list[str],
10399
warmup_shape: tuple[int, int, int],
104100
backend: str,
105-
vllm_version: str,
106101
monkeypatch: pytest.MonkeyPatch,
107102
) -> None:
108103
'''
@@ -127,7 +122,6 @@ def test_output_sendnn_decoder(
127122
sampling_params=vllm_sampling_params,
128123
tensor_parallel_size=1,
129124
backend=backend,
130-
vllm_version=vllm_version,
131125
monkeypatch=monkeypatch)
132126

133127
hf_results = generate_hf_output(model=model,
@@ -145,11 +139,9 @@ def test_output_sendnn_decoder(
145139

146140
@pytest.mark.parametrize("model", get_spyre_model_list())
147141
@pytest.mark.parametrize("backend", get_spyre_backend_list())
148-
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
149142
def test_batch_handling(
150143
model: str,
151144
backend: str,
152-
vllm_version: str,
153145
monkeypatch: pytest.MonkeyPatch,
154146
):
155147
"""Test that the spyre worker correctly handles batches of requests that
@@ -184,7 +176,6 @@ def test_batch_handling(
184176
sampling_params=vllm_sampling_params,
185177
tensor_parallel_size=1,
186178
backend=backend,
187-
vllm_version=vllm_version,
188179
monkeypatch=monkeypatch)
189180

190181
assert vllm_results[0]["text"] == " 3 2 "
@@ -195,10 +186,7 @@ def test_batch_handling(
195186

196187
@pytest.mark.parametrize("model", get_spyre_model_list())
197188
@pytest.mark.parametrize("backend", get_spyre_backend_list())
198-
@pytest.mark.parametrize("vllm_version",
199-
[pytest.param("V1", marks=pytest.mark.v1, id="v1")])
200-
def test_full_batch_scheduling(model: str, backend: str, vllm_version: str,
201-
monkeypatch):
189+
def test_full_batch_scheduling(model: str, backend: str, monkeypatch):
202190
"""Test that we can schedule a full batch of prompts."""
203191

204192
# We need to ensure here that the max number of tokens in a full batch

tests/e2e/test_spyre_cb.py

Lines changed: 11 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,9 @@
88
from typing import Any
99

1010
import pytest
11-
<<<<<<< HEAD
1211
from spyre_util import (compare_results, create_random_request,
1312
generate_hf_output, generate_spyre_vllm_output,
14-
get_spyre_model_list)
15-
=======
16-
from spyre_util import (create_random_request, generate_cb_spyre_vllm_output,
1713
get_spyre_backend_list, get_spyre_model_list)
18-
>>>>>>> origin/main
1914
from vllm import EngineArgs, SamplingParams
2015
from vllm.v1.engine import EngineCoreRequest
2116
from vllm.v1.engine.core import EngineCore
@@ -28,16 +23,12 @@
2823
"appropriately completes the request. Be polite in your response to the "
2924
"user.\n\n### Instruction:\n{}\n\n### Response:")
3025

31-
<<<<<<< HEAD
3226

3327
@pytest.mark.cb
3428
@pytest.mark.parametrize("max_num_seqs", [2, 3, 4],
3529
ids=lambda val: f"max_num_seqs({val})")
3630
@pytest.mark.parametrize("model", get_spyre_model_list())
37-
@pytest.mark.parametrize(
38-
"backend", [pytest.param("eager", marks=pytest.mark.cpu, id="eager")])
39-
# commenting v1 since we don't want this test to run with v1 marker yet
40-
# @pytest.mark.v1
31+
@pytest.mark.parametrize("backend", get_spyre_backend_list())
4132
@pytest.mark.parametrize("prompts", [[
4233
template.format("Provide a list of instructions "
4334
"for preparing chicken soup."),
@@ -47,35 +38,6 @@
4738
"how do I add multiple new columns in m for power query or power bi?"),
4839
template.format("Convert char to string in Java."),
4940
]])
50-
=======
51-
@pytest.mark.cb
52-
@pytest.mark.v1
53-
@pytest.mark.parametrize("max_num_seqs", [2, 3, 4],
54-
ids=lambda val: f"max_num_seqs({val})")
55-
@pytest.mark.parametrize("model", get_spyre_model_list())
56-
@pytest.mark.parametrize("backend", get_spyre_backend_list())
57-
@pytest.mark.parametrize(
58-
"prompts",
59-
[
60-
[
61-
"7 6 5 4",
62-
"10 9 8 7",
63-
],
64-
[
65-
"7 6 5 4",
66-
"10 9 8 7",
67-
"8 7 6 5",
68-
],
69-
[
70-
"7 6 5 4",
71-
"10 9 8 7",
72-
"8 7 6 5",
73-
"9 8 7 6",
74-
],
75-
],
76-
ids=lambda val: f"num_prompts({len(val)})",
77-
)
78-
>>>>>>> origin/main
7941
def test_cb_handling(
8042
model: str,
8143
backend: str,
@@ -107,7 +69,6 @@ def test_cb_handling(
10769
backend=backend,
10870
max_num_seqs=max_num_seqs,
10971
use_cb=True,
110-
vllm_version="V1", # CB runs in V1 only
11172
monkeypatch=monkeypatch)
11273

11374
hf_results = generate_hf_output(model=model,
@@ -124,7 +85,6 @@ def test_cb_handling(
12485

12586

12687
@pytest.mark.cb
127-
# @pytest.mark.v1
12888
@pytest.mark.parametrize("max_num_seqs", [2])
12989
@pytest.mark.parametrize("model", get_spyre_model_list())
13090
@pytest.mark.parametrize(
@@ -149,18 +109,16 @@ def test_cb_max_tokens(
149109
logprobs=0)
150110

151111
with pytest.raises(ValueError, match="max model context length"):
152-
generate_spyre_vllm_output(
153-
model=model,
154-
prompts=overflow_prompt,
155-
max_model_len=max_model_len,
156-
block_size=max_model_len,
157-
sampling_params=vllm_sampling_params,
158-
tensor_parallel_size=1,
159-
backend=backend,
160-
max_num_seqs=max_num_seqs,
161-
use_cb=True,
162-
vllm_version="V1", # CB runs in V1 only
163-
monkeypatch=monkeypatch)
112+
generate_spyre_vllm_output(model=model,
113+
prompts=overflow_prompt,
114+
max_model_len=max_model_len,
115+
block_size=max_model_len,
116+
sampling_params=vllm_sampling_params,
117+
tensor_parallel_size=1,
118+
backend=backend,
119+
max_num_seqs=max_num_seqs,
120+
use_cb=True,
121+
monkeypatch=monkeypatch)
164122

165123

166124
def get_params_test_blocks_borders_aligned_prompts():
@@ -683,7 +641,6 @@ def augment_checked_steps(
683641

684642

685643
@pytest.mark.cb
686-
@pytest.mark.v1
687644
@pytest.mark.parametrize("model", get_spyre_model_list())
688645
@pytest.mark.parametrize("backend", get_spyre_backend_list())
689646
@pytest.mark.parametrize("max_num_seqs", [2])

tests/e2e/test_spyre_embeddings.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@
2020
[(64, 4), (64, 8), (128, 4),
2121
(128, 8)]) # (prompt_length/batch_size)
2222
@pytest.mark.parametrize("backend", get_spyre_backend_list())
23-
@pytest.mark.parametrize(
24-
"vllm_version",
25-
[pytest.param("V0", marks=pytest.mark.v0, id="v0")
26-
]) # TODO: Replace with VLLM_VERSIONS when v1 is supported.
23+
# TODO: Add it when v1 is supported.
24+
@pytest.mark.parametrize("vllm_version", ["V0"])
2725
def test_output(
2826
model: str,
2927
prompts: list[str],

tests/e2e/test_spyre_max_new_tokens.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""
55

66
import pytest
7-
from spyre_util import (VLLM_VERSIONS, compare_results, generate_hf_output,
7+
from spyre_util import (compare_results, generate_hf_output,
88
generate_spyre_vllm_output, get_spyre_backend_list,
99
get_spyre_model_list)
1010
from vllm import SamplingParams
@@ -27,14 +27,12 @@
2727
@pytest.mark.parametrize(
2828
"warmup_shape", [(64, 10, 4)]) # (prompt_length/new_tokens/batch_size)
2929
@pytest.mark.parametrize("backend", get_spyre_backend_list())
30-
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
3130
def test_output(
3231
model: str,
3332
prompts: list[str],
3433
stop_last: bool,
3534
warmup_shape: tuple[int, int, int],
3635
backend: str,
37-
vllm_version: str,
3836
monkeypatch: pytest.MonkeyPatch,
3937
) -> None:
4038
'''
@@ -88,7 +86,6 @@ def test_output(
8886
sampling_params=vllm_sampling_params,
8987
tensor_parallel_size=1,
9088
backend=backend,
91-
vllm_version=vllm_version,
9289
monkeypatch=monkeypatch)
9390

9491
hf_results = generate_hf_output(model=model,

tests/e2e/test_spyre_online.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
import openai
22
import pytest
3-
from spyre_util import (VLLM_VERSIONS, get_spyre_backend_list,
4-
get_spyre_model_list)
3+
from spyre_util import get_spyre_backend_list, get_spyre_model_list
54

65

76
@pytest.mark.parametrize("model", get_spyre_model_list())
87
@pytest.mark.parametrize("backend", get_spyre_backend_list())
98
@pytest.mark.parametrize("warmup_shape", [[
109
(64, 20, 4),
1110
]])
12-
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
13-
def test_openai_serving(remote_openai_server, model, warmup_shape, backend,
14-
vllm_version):
11+
def test_openai_serving(remote_openai_server, model, warmup_shape, backend):
1512
"""Test online serving using the `vllm serve` CLI"""
1613

1714
client = remote_openai_server.get_client()
@@ -48,10 +45,6 @@ def test_openai_serving(remote_openai_server, model, warmup_shape, backend,
4845
completion = client.completions.create(model=model,
4946
prompt="Hello World!",
5047
max_tokens=25)
51-
# V1 should raise
52-
assert vllm_version == "V0"
53-
assert len(completion.choices) == 1
54-
assert len(completion.choices[0].text) == 0
5548
except openai.BadRequestError as e:
5649
assert "warmup" in str(e)
5750

@@ -60,9 +53,8 @@ def test_openai_serving(remote_openai_server, model, warmup_shape, backend,
6053
@pytest.mark.parametrize("backend", ["sendnn"])
6154
@pytest.mark.parametrize("quantization", ["gptq"])
6255
@pytest.mark.parametrize("warmup_shape", [[(64, 20, 4)]])
63-
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
6456
def test_openai_serving_gptq(remote_openai_server, model, backend,
65-
warmup_shape, vllm_version, quantization):
57+
warmup_shape, quantization):
6658
"""Test online serving a GPTQ model with the sendnn backend only"""
6759

6860
client = remote_openai_server.get_client()

tests/e2e/test_spyre_online_multi.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import pytest
2-
from spyre_util import (VLLM_VERSIONS, get_spyre_backend_list,
3-
get_spyre_model_list)
2+
from spyre_util import get_spyre_backend_list, get_spyre_model_list
43

54

65
@pytest.mark.multi
@@ -11,9 +10,8 @@
1110
@pytest.mark.parametrize(
1211
"backend", [b for b in get_spyre_backend_list() if "eager" not in str(b)])
1312
@pytest.mark.parametrize("tensor_parallel_size", ["2", "4", "8"])
14-
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
1513
def test_openai_tp_serving(remote_openai_server, model, warmup_shape, backend,
16-
vllm_version, tensor_parallel_size):
14+
tensor_parallel_size):
1715
"""Test online serving with tensor parallelism using the `vllm serve` CLI"""
1816

1917
client = remote_openai_server.get_client()

0 commit comments

Comments
 (0)