Replace readme paths with urls (#1097)

kylesayrs · web-flow · commit a76563ab99c0 · 2025-01-29T18:12:29.000-05:00
## Purpose ## * Files with the `.md` extension are not listed in the [MANIFEST.in](https://github.com/vllm-project/llm-compressor/blob/main/MANIFEST.in), meaning that they will not be included in the LLM Compressor pypi package. This means that references to these files are left dangling for users who have installed from the pypi package. Rather than including `.md` in the package and having to also ship all the large images files associated with them, this PR moves the references to urls hosted by github * While the github url paths may change between versions, this solution works in lieu of a dedicated readthedoc build for each version * This solution also aligns with the practice of other libraries which point to hosted urls rather than file paths * Note that this does not apply to files which are themselves `.md` files, as these files will not be included in the pypi distribution * `src/llmcompressor/transformers/finetune/README.md` * `src/llmcompressor/pipelines/sequential/README.md` ## Changes ## * Replace readme file paths with urls * Small change to `DisableQuantization` to better catch cases where exceptions such as tracing exceptions are triggered ## Testing ## * N/A --------- Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -247,7 +247,8 @@ def on_initialize(self, state: State, **kwargs) -> bool:
                 warnings.warn(
                     f"Failed to trace {model_name} with inputs {input_names}. For more "
                     "information on tracing with the sequential pipeline, see "
-                    "`src/llmcompressor/transformers/tracing/GUIDE.md`"
+                    "https://github.com/vllm-project/llm-compressor/blob/main/"
+                    "src/llmcompressor/transformers/tracing/GUIDE.md"
                 )
             if isinstance(exception, unfixable_errors):
                 raise exception
diff --git a/src/llmcompressor/modifiers/smoothquant/utils.py b/src/llmcompressor/modifiers/smoothquant/utils.py
@@ -1,5 +1,4 @@
 import functools
-import pathlib
 from collections import namedtuple
 from typing import Dict, List, Tuple, Union
 
@@ -94,7 +93,10 @@ def wrapper(*args, **kwargs):
         try:
             return func(*args, **kwargs)
         except Exception as original_exception:
-            readme_location = pathlib.Path(__file__).parent / "README.md"
+            readme_location = (
+                "https://github.com/vllm-project/llm-compressor/tree/main/"
+                "src/llmcompressor/modifiers/smoothquant"
+            )
             raise RuntimeError(
                 f"Error resolving mappings for given architecture."
                 f"Please refer to the README at {readme_location} for more information."
diff --git a/src/llmcompressor/utils/helpers.py b/src/llmcompressor/utils/helpers.py
@@ -1091,9 +1091,11 @@ def DisableQuantization(model: torch.nn.Module):
     """
     Disable quantization from QuantizationModifier
     """
-    model.apply(disable_quantization)
-    yield
-    model.apply(enable_quantization)
+    try:
+        model.apply(disable_quantization)
+        yield
+    finally:
+        model.apply(enable_quantization)
 
 
 @contextlib.contextmanager
diff --git a/tests/llmcompressor/modifiers/smoothquant/test_utils.py b/tests/llmcompressor/modifiers/smoothquant/test_utils.py
@@ -12,7 +12,10 @@
 
 @pytest.mark.unit
 def test_handle_mapping_resolution_errors():
-    README_LOCATION = "llmcompressor/modifiers/smoothquant/README.md"
+    README_LOCATION = (
+        "https://github.com/vllm-project/llm-compressor/tree/main/"
+        "src/llmcompressor/modifiers/smoothquant"
+    )
 
     @handle_mapping_resolution_errors
     def func_that_raises_exception():