Skip to content

Commit bb17bbf

Browse files
committed
Add comment
Signed-off-by: Rafael Vasquez <[email protected]>
1 parent e541cc8 commit bb17bbf

File tree

2 files changed

+2
-25
lines changed

2 files changed

+2
-25
lines changed

tests/e2e/test_spyre_online.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -83,31 +83,6 @@ def test_openai_serving_gptq(remote_openai_server, model, backend,
8383
assert len(completion.choices[0].text) > 0
8484

8585

86-
@pytest.mark.quantized
87-
@pytest.mark.parametrize("model", get_spyre_model_list(quantization="fp8"))
88-
@pytest.mark.parametrize("backend", ["sendnn"])
89-
@pytest.mark.parametrize("warmup_shape", [[(64, 20, 1)]])
90-
def test_openai_serving_fp8(remote_openai_server, model, backend,
91-
warmup_shape):
92-
"""Test online serving a GPTQ model with the sendnn backend only"""
93-
94-
client = remote_openai_server.get_client()
95-
completion = client.completions.create(model=model,
96-
prompt="Hello World!",
97-
max_tokens=5,
98-
temperature=0.0)
99-
assert len(completion.choices) == 1
100-
assert len(completion.choices[0].text) > 0
101-
102-
completion = client.completions.create(model=model,
103-
prompt="Hello World!",
104-
max_tokens=5,
105-
temperature=1.0,
106-
n=2)
107-
assert len(completion.choices) == 2
108-
assert len(completion.choices[0].text) > 0
109-
110-
11186
@pytest.mark.parametrize("model", get_spyre_model_list())
11287
@pytest.mark.parametrize("cb",
11388
[pytest.param(1, marks=pytest.mark.cb, id="cb")])

vllm_spyre/platform.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ class SpyrePlatform(Platform):
4040
# "spyre" device_name no longer worked due to https://github.com/vllm-project/vllm/pull/16464
4141
device_name: str = "cpu"
4242
device_type: str = "cpu"
43+
# compressed-tensors supported by
44+
# https://github.com/foundation-model-stack/fms-model-optimizer/blob/main/fms_mo/aiu_addons/__init__.py
4345
supported_quantization: list[str] = ["gptq", "fp8", "compressed-tensors"]
4446
_warmup_shapes: Optional[tuple[dict[str, int], ...]] = None
4547
_block_size: int = 64 # hardcoded Spyre constraint for now

0 commit comments

Comments
 (0)