vllm-project · joerunde · Oct 17, 2025 · Oct 16, 2025 · ckadner · Oct 17, 2025
@@ -444,7 +444,8 @@ def _warmup_spyre_dynamic_size(self, special_token_ids):
             0, len(valid_token_ids_tensor), (3, prompt_len))]
 
         # TODO: we need 2 requests for warmup on FP8+CB
-        is_fp8_plus_cb = 'FP8' in self.model_config.model and \
+        # Check if model is quantized
 def is_granite_3_8b(cls, model_config: ModelConfig): 
 def find_known_models_by_model_config(model_config: ModelConfig) -> list[str]: 
 def is_granite_3_8b(cls, model_config: ModelConfig): 
 def find_known_models_by_model_config(model_config: ModelConfig) -> list[str]: 
+        is_fp8_plus_cb = self.model_config.quantization is not None and \
             envs_spyre.VLLM_SPYRE_USE_CB
         req_count = 3 if is_fp8_plus_cb else 2
         requests = [