Print compile cache enablement along with warmup time (#321)

sducouedic · web-flow · commit a86787dd39dc · 2025-07-17T17:51:37.000+02:00
Provide indication on whether the cache is enabled / disabled next to
the warmup time.

---------

Signed-off-by: Sophie du Couédic &lt;sop@zurich.ibm.com&gt;
diff --git a/vllm_spyre/v1/worker/spyre_worker.py b/vllm_spyre/v1/worker/spyre_worker.py
@@ -419,7 +419,10 @@ def _warmup_spyre_dynamic_size(self, special_token_ids):
 
         warmup_end_t = time.time()
         warmup_total_t = warmup_end_t - warmup_start_t
-        logger.info("[WARMUP] Finished in %.3fs", warmup_total_t)
+        compile_cache_str = 'enabled' if int(
+            os.getenv("TORCH_SENDNN_CACHE_ENABLE", "0")) else 'disabled'
+        logger.info("[WARMUP] Finished in %.3fs (compilation cache %s)",
+                    warmup_total_t, compile_cache_str)
 
         maybe_override_signals_handler()
 
@@ -549,9 +552,12 @@ def _warmup_spyre_fixed_size(self, prompt_len, num_decode_tokens,
                               batch_size=batch_size,
                               max_tokens=num_decode_tokens,
                               prompt_len=prompt_len)
+        compile_cache_str = 'enabled' if int(
+            os.getenv("TORCH_SENDNN_CACHE_ENABLE", "0")) else 'disabled'
         logger.info(
             "[WARMUP] Prompt length %d and max output tokens %d "
-            "finished in %.3fs", prompt_len, num_decode_tokens, warmup_total_t)
+            "finished in %.3fs (compilation cache %s)", prompt_len,
+            num_decode_tokens, warmup_total_t, compile_cache_str)
         maybe_override_signals_handler()
 
     def _warmup_model_forward_pass(