|
2 | 2 | import json |
3 | 3 | import os |
4 | 4 | import platform |
| 5 | +import signal |
5 | 6 | import time |
6 | 7 | from typing import Optional, Union, cast |
7 | 8 |
|
@@ -406,6 +407,8 @@ def _warmup_spyre_dynamic_size(self, special_token_ids): |
406 | 407 | logger.info("Warmup finished.") |
407 | 408 | logger.info("Warmup took %.3fs", warmup_total_t) |
408 | 409 |
|
| 410 | + maybe_override_signals_handler() |
| 411 | + |
409 | 412 | def _warmup_spyre_fixed_size(self, prompt_len, num_decode_tokens, |
410 | 413 | special_token_ids, batch_size): |
411 | 414 |
|
@@ -524,6 +527,7 @@ def _warmup_spyre_fixed_size(self, prompt_len, num_decode_tokens, |
524 | 527 | logger.info( |
525 | 528 | "Warmup took %.3fs (for prompt length %d and max output tokens %d)", |
526 | 529 | warmup_total_t, prompt_len, num_decode_tokens) |
| 530 | + maybe_override_signals_handler() |
527 | 531 |
|
528 | 532 | def _warmup_model_forward_pass( |
529 | 533 | self, |
@@ -566,3 +570,27 @@ def execute_model( |
566 | 570 | ) -> Optional[ModelRunnerOutput]: |
567 | 571 | output = self.model_runner.execute_model(scheduler_output) |
568 | 572 | return output if self.is_driver_worker else None |
| 573 | + |
| 574 | + |
| 575 | +# Ref: https://github.com/vllm-project/vllm/blob/5fbbfe9a4c13094ad72ed3d6b4ef208a7ddc0fd7/vllm/v1/executor/multiproc_executor.py#L446 # noqa: E501 |
| 576 | +# TODO: review this in the future |
| 577 | +# This setup is a workaround to suppress logs that are dumped at the shutdown |
| 578 | +# of the engine (only on V1) when vllm runs with multiprocess. The undesired |
| 579 | +# behavior happens because g3log from Spyre runtime overrides the signal |
| 580 | +# handler from vLLM when it starts a process for the engine code. Therefore, |
| 581 | +# the engine does not have a chance to gracefully shutdown. |
| 582 | +def maybe_override_signals_handler(): |
| 583 | + if not (envs.VLLM_USE_V1 and envs.VLLM_ENABLE_V1_MULTIPROCESSING |
| 584 | + and envs_spyre.VLLM_SPYRE_OVERRIDE_SIGNALS_HANDLER): |
| 585 | + return |
| 586 | + |
| 587 | + shutdown_requested = False |
| 588 | + |
| 589 | + def signal_handler(signum, frame): |
| 590 | + nonlocal shutdown_requested |
| 591 | + if not shutdown_requested: |
| 592 | + shutdown_requested = True |
| 593 | + raise SystemExit() |
| 594 | + |
| 595 | + signal.signal(signal.SIGTERM, signal_handler) |
| 596 | + signal.signal(signal.SIGINT, signal_handler) |
0 commit comments