Skip to content

Commit a86787d

Browse files
authored
Print compile cache enablement along with warmup time (#321)
Provide indication on whether the cache is enabled / disabled next to the warmup time. --------- Signed-off-by: Sophie du Couédic <[email protected]>
1 parent 9d21620 commit a86787d

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

vllm_spyre/v1/worker/spyre_worker.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,10 @@ def _warmup_spyre_dynamic_size(self, special_token_ids):
419419

420420
warmup_end_t = time.time()
421421
warmup_total_t = warmup_end_t - warmup_start_t
422-
logger.info("[WARMUP] Finished in %.3fs", warmup_total_t)
422+
compile_cache_str = 'enabled' if int(
423+
os.getenv("TORCH_SENDNN_CACHE_ENABLE", "0")) else 'disabled'
424+
logger.info("[WARMUP] Finished in %.3fs (compilation cache %s)",
425+
warmup_total_t, compile_cache_str)
423426

424427
maybe_override_signals_handler()
425428

@@ -549,9 +552,12 @@ def _warmup_spyre_fixed_size(self, prompt_len, num_decode_tokens,
549552
batch_size=batch_size,
550553
max_tokens=num_decode_tokens,
551554
prompt_len=prompt_len)
555+
compile_cache_str = 'enabled' if int(
556+
os.getenv("TORCH_SENDNN_CACHE_ENABLE", "0")) else 'disabled'
552557
logger.info(
553558
"[WARMUP] Prompt length %d and max output tokens %d "
554-
"finished in %.3fs", prompt_len, num_decode_tokens, warmup_total_t)
559+
"finished in %.3fs (compilation cache %s)", prompt_len,
560+
num_decode_tokens, warmup_total_t, compile_cache_str)
555561
maybe_override_signals_handler()
556562

557563
def _warmup_model_forward_pass(

0 commit comments

Comments
 (0)