🔧 Update default simple compile backend (#506)

joerunde · web-flow · commit 0ae7872df9e7 · 2025-10-06T14:28:29.000-06:00
# Description

Updates the default simple compile backend to `inductor` to match vLLM.

This could be considered slightly risky because it updates default
behavior, but we do want to run with `inductor` where possible. This was
previously set to `eager` because we had issues linking g++ correctly
and needed a fast workaround.

Impact here should be minimal. If users run into problems with
environments that don't have python headers and a valid compiler
toolchain, they can run with `VLLM_SPYRE_SIMPLE_COMPILE_BACKEND=eager`

Signed-off-by: Joe Runde &lt;joe@joerun.de&gt;
diff --git a/vllm_spyre/envs.py b/vllm_spyre/envs.py
@@ -23,7 +23,7 @@
     VLLM_SPYRE_WORKER_LOG_REDIRECT_DIR: str = ""
     VLLM_SPYRE_GLOO_TIMEOUT_MINUTES: int = 60
     VLLM_SPYRE_REQUIRE_PRECOMPILED_DECODERS: bool = False
-    VLLM_SPYRE_SIMPLE_COMPILE_BACKEND: str = "eager"
+    VLLM_SPYRE_SIMPLE_COMPILE_BACKEND: str = "inductor"
     VLLM_SPYRE_NUM_CPUS: int = 0
 
 logger = init_logger(__name__)
@@ -172,7 +172,7 @@ def _backend_backwards_compat() -> str:
     # Defaults to eager, iductor can be used if python headers and a compiler
     # are available.
     "VLLM_SPYRE_SIMPLE_COMPILE_BACKEND":
-    lambda: os.getenv("VLLM_SPYRE_SIMPLE_COMPILE_BACKEND", "eager"),
+    lambda: os.getenv("VLLM_SPYRE_SIMPLE_COMPILE_BACKEND", "inductor"),
 
     # Configures the number of CPUs used when determining multi-threading
     # configurations