Store warmup shapes in config

tdoublep · tdoublep · commit 5b95b157e4c0 · 2025-03-25T12:10:27.000+01:00
Signed-off-by: Thomas Parnell &lt;tpa@zurich.ibm.com&gt;
diff --git a/vllm_spyre/core/scheduler.py b/vllm_spyre/core/scheduler.py
@@ -669,7 +669,7 @@ def _schedule_prefills(
         seq_groups: List[ScheduledSequenceGroup] = []
 
         # SPYRE SPECIFIC CODE BLOCK START
-        spyre_warmup_shapes = current_platform.get_warmup_shapes()
+        spyre_warmup_shapes = self.scheduler_config.spyre_warmup_shapes
         applicable_spyre_warmup_shapes = list(spyre_warmup_shapes)
         # SPYRE SPECIFIC CODE BLOCK END
 
diff --git a/vllm_spyre/platform.py b/vllm_spyre/platform.py
@@ -22,7 +22,6 @@ class SpyrePlatform(Platform):
     device_name: str = "spyre"
     device_type: str = "cpu"
     supported_quantization: list[str] = ["gptq"]
-    spyre_warmup_shapes: tuple[dict[str, int], ...]
 
     @classmethod
     def get_device_name(cls, device_id: int = 0) -> str:
@@ -79,7 +78,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         cls.set_warmup_shapes(scheduler_config)
         max_batch_size = 0
         max_seq_len = 0
-        for shape in cls.get_warmup_shapes():
+        for shape in scheduler_config.spyre_warmup_shapes:
             max_batch_size = max(max_batch_size, shape['batch_size'])
             max_seq_len = max(max_batch_size,
                               shape['prompt_length'] + shape['new_tokens'])
@@ -153,15 +152,11 @@ def set_warmup_shapes(cls, scheduler_config) -> None:
         logger.info("VLLM_SPYRE_WARMUP_NEW_TOKENS = %s", wup_new_tokens)
         logger.info("VLLM_SPYRE_WARMUP_BATCH_SIZES = %s", wup_batch_sizes)
 
-        cls.spyre_warmup_shapes = tuple(
+        scheduler_config.spyre_warmup_shapes = tuple(
             sorted([{
                 'prompt_length': pl,
                 'new_tokens': nt,
                 'batch_size': bs
             } for pl, nt, bs in zip(wup_prompt_lens, wup_new_tokens,
                                     wup_batch_sizes)],
                    key=operator.itemgetter('batch_size', 'prompt_length')))
-
-    @classmethod
-    def get_warmup_shapes(cls) -> tuple[dict[str, int], ...]:
-        return cls.spyre_warmup_shapes
diff --git a/vllm_spyre/worker/spyre_model_runner.py b/vllm_spyre/worker/spyre_model_runner.py
@@ -126,7 +126,7 @@ def _prepare_prompt(
         input_token_list: List[torch.Tensor] = []
 
         # find warmup shape to be used for padding and batching
-        spyre_warmup_shapes = current_platform.get_warmup_shapes()
+        spyre_warmup_shapes = self.scheduler_config.spyre_warmup_shapes
         applicable_spyre_warmup_shapes = [
             shape for shape in spyre_warmup_shapes
             if len(seq_group_metadata_list) <= shape['batch_size']
diff --git a/vllm_spyre/worker/spyre_worker.py b/vllm_spyre/worker/spyre_worker.py
@@ -146,7 +146,7 @@ def load_model(self):
         # for all requested model warmups
         # printing env variables for debugging purposes
         load_model_start_t = time.time()
-        spyre_warmup_shapes = current_platform.get_warmup_shapes()
+        spyre_warmup_shapes = self.vllm_config.scheduler_config.spyre_warmup_shapes
         wup_prompt_lens, wup_new_tokens = zip(*[(s["prompt_length"],
                                                  s["new_tokens"])
                                                 for s in spyre_warmup_shapes])