From 6f6ae9bdaaa937919467f80d1e735497222794f1 Mon Sep 17 00:00:00 2001
From: Gaurav-Kumbhat <Gaurav.Kumbhat@ibm.com>
Date: Thu, 16 Oct 2025 16:27:30 -0500
Subject: [PATCH] :bug: Fix fp8 model name check with quantization check

Signed-off-by: Gaurav-Kumbhat <Gaurav.Kumbhat@ibm.com>
---
 vllm_spyre/v1/worker/spyre_worker.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm_spyre/v1/worker/spyre_worker.py b/vllm_spyre/v1/worker/spyre_worker.py
index 3002891a..7e72857c 100644
--- a/vllm_spyre/v1/worker/spyre_worker.py
+++ b/vllm_spyre/v1/worker/spyre_worker.py
@@ -444,7 +444,8 @@ def _warmup_spyre_dynamic_size(self, special_token_ids):
             0, len(valid_token_ids_tensor), (3, prompt_len))]
 
         # TODO: we need 2 requests for warmup on FP8+CB
-        is_fp8_plus_cb = 'FP8' in self.model_config.model and \
+        # Check if model is quantized
+        is_fp8_plus_cb = self.model_config.quantization is not None and \
             envs_spyre.VLLM_SPYRE_USE_CB
         req_count = 3 if is_fp8_plus_cb else 2
         requests = [