From 6f6ae9bdaaa937919467f80d1e735497222794f1 Mon Sep 17 00:00:00 2001 From: Gaurav-Kumbhat Date: Thu, 16 Oct 2025 16:27:30 -0500 Subject: [PATCH] :bug: Fix fp8 model name check with quantization check Signed-off-by: Gaurav-Kumbhat --- vllm_spyre/v1/worker/spyre_worker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_spyre/v1/worker/spyre_worker.py b/vllm_spyre/v1/worker/spyre_worker.py index 3002891a..7e72857c 100644 --- a/vllm_spyre/v1/worker/spyre_worker.py +++ b/vllm_spyre/v1/worker/spyre_worker.py @@ -444,7 +444,8 @@ def _warmup_spyre_dynamic_size(self, special_token_ids): 0, len(valid_token_ids_tensor), (3, prompt_len))] # TODO: we need 2 requests for warmup on FP8+CB - is_fp8_plus_cb = 'FP8' in self.model_config.model and \ + # Check if model is quantized + is_fp8_plus_cb = self.model_config.quantization is not None and \ envs_spyre.VLLM_SPYRE_USE_CB req_count = 3 if is_fp8_plus_cb else 2 requests = [