include torchao in CI

jerryzh168 · jerryzh168 · commit 3bded6f19364 · 2025-05-30T15:02:09.000-07:00
Signed-off-by: Jerry Zhang &lt;jerryzh168@gmail.com&gt;
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -420,6 +420,9 @@ steps:
   - vllm/model_executor/layers/quantization
   - tests/quantization
   commands:
+  # temporary install here since we need nightly, will move to requirements/test.in
+  # after torchao 0.12 release
+  - pip install --pre torchao --index-url https://download.pytorch.org/whl/nightly/cu126
   - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
 
 - label: LM Eval Small Models # 53min
diff --git a/vllm/model_executor/layers/quantization/torchao.py b/vllm/model_executor/layers/quantization/torchao.py
@@ -13,6 +13,7 @@
 from vllm.model_executor.layers.quantization.base_config import (
     QuantizationConfig, QuantizeMethodBase)
 from vllm.model_executor.utils import set_weight_attrs
+from vllm.utils import is_torch_equal_or_newer
 
 logger = init_logger(__name__)
 
@@ -22,13 +23,18 @@ class TorchAOConfig(QuantizationConfig):
 
     def __init__(self, torchao_config) -> None:
         self.torchao_config = torchao_config
-        # TODO (jerryzh168): enable after 2.8.0
         # TorchAO quantization relies on tensor subclasses. In order,
         # to enable proper caching this needs standalone compile
-        # os.environ["VLLM_TEST_STANDALONE_COMPILE"] = "1"
-        # logger.info("Using TorchAO: Setting VLLM_TEST_STANDALONE_COMPILE=1")
-        os.environ["VLLM_DISABLE_COMPILE_CACHE"] = "1"
-        logger.info("Using TorchAO: Setting VLLM_DISABLE_COMPILE_CACHE=1")
+        if is_torch_equal_or_newer("2.8.0"):
+            os.environ["VLLM_TEST_STANDALONE_COMPILE"] = "1"
+            logger.info(
+                "Using TorchAO: Setting VLLM_TEST_STANDALONE_COMPILE=1")
+
+        # TODO: remove after the torch dependency is updated to 2.8
+        if is_torch_equal_or_newer(
+                "2.7.0") and not is_torch_equal_or_newer("2.8.0"):
+            os.environ["VLLM_DISABLE_COMPILE_CACHE"] = "1"
+            logger.info("Using TorchAO: Setting VLLM_DISABLE_COMPILE_CACHE=1")
 
     def __repr__(self) -> str:
         return f"TorchAOConfig({self.torchao_config})"