Skip to content

Commit 6f02c91

Browse files
horheynmdsikkarahul-tuli
committed
[Test Fix] Fix Consecutive oneshot (#971)
~~Contingent on merge of huggingface/transformers#34719 ~~ ^ has been merged not yet released ~~ ^ has been released Blocked on neuralmagic/compressed-tensors#237 SUMMARY: * In multiple optimization tests, automatically decompress model if provided as optimized model * Fix recipe stage length * Revive old code * When running multiple optimizations (ex. oneshot then finetune, oneshot and oneshot), the recipes needs to be added to the session using `initialize_recipe`. Example here https://github.com/vllm-project/llm-compressor/pull/971/files#diff-c9ae8b3ad24d13abeea5b649a5fd6d0b0925f5c9cc40220cbfbe21ae81242f8dR63-R65 TEST PLAN: ran the test using transformers main Must pass tests/llmcompressor/transformers/obcq/test_consecutive_runs.py --------- Co-authored-by: Dipika Sikka <[email protected]> Co-authored-by: Rahul Tuli <[email protected]> Signed-off-by: Rahul Tuli <[email protected]>
1 parent 84899e6 commit 6f02c91

File tree

3 files changed

+146
-11
lines changed

3 files changed

+146
-11
lines changed

src/llmcompressor/transformers/finetune/text_generation.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
PreTrainedModel,
3131
set_seed,
3232
)
33+
from transformers.utils.quantization_config import CompressedTensorsConfig
3334

3435
from llmcompressor.core import pre_initialize_structure, reset_session
3536
from llmcompressor.pytorch.model_load.helpers import (
@@ -52,7 +53,10 @@
5253
from llmcompressor.transformers.sparsification.sparse_model import (
5354
get_shared_processor_src,
5455
)
55-
from llmcompressor.transformers.utils.helpers import detect_last_checkpoint
56+
from llmcompressor.transformers.utils.helpers import (
57+
detect_last_checkpoint,
58+
is_model_ct_quantized_from_path,
59+
)
5660
from llmcompressor.typing import Processor
5761
from llmcompressor.utils.fsdp.helpers import is_fsdp_model
5862

@@ -224,6 +228,13 @@ def initialize_model_from_path(
224228
"trust_remote_code": model_args.trust_remote_code_model,
225229
}
226230
# this calls from_pretrained under the hood so should be FSDP safe
231+
232+
# optimized models must be decompressed to carry out oneshot/train/etc
233+
if is_model_ct_quantized_from_path(model_path):
234+
model_kwargs["quantization_config"] = CompressedTensorsConfig(
235+
run_compressed=False
236+
)
237+
227238
model = AutoModelForCausalLM.from_pretrained(
228239
model_path,
229240
**model_kwargs,

src/llmcompressor/transformers/utils/helpers.py

+104-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,13 @@
44
"""
55

66
import os
7-
from typing import TYPE_CHECKING, Optional
7+
from pathlib import Path
8+
from typing import TYPE_CHECKING, Optional, Union
89

10+
import requests
11+
from huggingface_hub import HUGGINGFACE_CO_URL_HOME, hf_hub_download
912
from loguru import logger
13+
from transformers import AutoConfig
1014
from transformers.trainer_utils import get_last_checkpoint
1115

1216
if TYPE_CHECKING:
@@ -15,6 +19,7 @@
1519
__all__ = [
1620
"RECIPE_FILE_NAME",
1721
"detect_last_checkpoint",
22+
"is_model_ct_quantized_from_path",
1823
]
1924

2025
RECIPE_FILE_NAME = "recipe.yaml"
@@ -54,3 +59,101 @@ def detect_last_checkpoint(
5459
)
5560

5661
return last_checkpoint
62+
63+
64+
def is_model_ct_quantized_from_path(path: str) -> bool:
65+
"""
66+
Determine if model from path is quantized based
67+
on the config
68+
69+
:param path: path to the model or HF stub
70+
:return: True if config contains quantization_config from the given path
71+
72+
"""
73+
config = AutoConfig.from_pretrained(path)
74+
if config is not None:
75+
if (
76+
hasattr(config, "quantization_config")
77+
and config.quantization_config["quant_method"] == "compressed-tensors"
78+
):
79+
return True
80+
return False
81+
82+
83+
def infer_recipe_from_model_path(model_path: Union[str, Path]) -> Optional[str]:
84+
"""
85+
Infer the recipe from the model_path.
86+
87+
:param model_path: The path to the model to load. It can be one of the following:
88+
- a path to the model directory
89+
- a path to the model file
90+
- Hugging face model ID
91+
:return: The path to the recipe file if found, None otherwise.
92+
"""
93+
model_path = model_path.as_posix() if isinstance(model_path, Path) else model_path
94+
95+
if os.path.isdir(model_path) or os.path.isfile(model_path):
96+
# Model path is a local path to the model directory or file
97+
model_path = (
98+
os.path.dirname(model_path) if os.path.isfile(model_path) else model_path
99+
)
100+
recipe = os.path.join(model_path, RECIPE_FILE_NAME)
101+
102+
if os.path.isfile(recipe):
103+
logger.info(f"Found recipe in the model_path: {recipe}")
104+
return recipe
105+
logger.debug(f"No recipe found in the model_path: {model_path}")
106+
return None
107+
108+
# If the model path is a Hugging Face model ID
109+
recipe = recipe_from_huggingface_model_id(hf_stub=model_path)
110+
111+
if recipe is None:
112+
logger.info("Failed to infer the recipe from the model_path")
113+
114+
return recipe
115+
116+
117+
def recipe_from_huggingface_model_id(
118+
hf_stub: str, recipe_file_name: str = RECIPE_FILE_NAME
119+
) -> Optional[str]:
120+
"""
121+
Attempts to download the recipe from the Hugging Face model ID.
122+
123+
:param hf_stub: Assumed to be the Hugging Face model ID.
124+
:param recipe_file_name: The name of the recipe file to download.
125+
Defaults to RECIPE_FILE_NAME.
126+
:return: A tuple:
127+
- The path to the recipe file if found, None otherwise.
128+
- True if hf_stub is a valid Hugging Face model ID, False otherwise.
129+
"""
130+
model_id_url = os.path.join(HUGGINGFACE_CO_URL_HOME, hf_stub)
131+
request = requests.head(model_id_url)
132+
133+
if request.status_code != 200:
134+
logger.debug(
135+
(
136+
"hf_stub is not a valid Hugging Face model ID. ",
137+
"Skipping recipe resolution.",
138+
)
139+
)
140+
return None
141+
142+
try:
143+
logger.info(
144+
"Attempting to download a recipe ",
145+
f"{hf_stub} " f"from {HUGGINGFACE_CO_URL_HOME}",
146+
)
147+
recipe = hf_hub_download(repo_id=hf_stub, filename=recipe_file_name)
148+
logger.info(f"Found recipe: {recipe_file_name} for model ID: {hf_stub}.")
149+
except Exception as e:
150+
logger.error(
151+
(
152+
f"Unable to find recipe {recipe_file_name} "
153+
f"for model ID: {hf_stub}: {e}."
154+
"Skipping recipe resolution."
155+
)
156+
)
157+
recipe = None
158+
159+
return recipe

tests/llmcompressor/transformers/obcq/test_consecutive_runs.py

+30-9
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
import pytest
66
import yaml
77
from parameterized import parameterized_class
8+
from transformers import AutoModelForCausalLM
9+
from transformers.utils.quantization_config import CompressedTensorsConfig
810

11+
from llmcompressor.transformers.utils.helpers import infer_recipe_from_model_path
912
from tests.testing_utils import parse_params, requires_gpu
1013

1114
CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs"
@@ -15,13 +18,15 @@
1518

1619

1720
class TestConsecutiveRuns(unittest.TestCase):
21+
quantization_config = CompressedTensorsConfig(run_compressed=False)
22+
1823
def _test_consecutive_runs(
1924
self, tolerance: float, num_calibration_samples: int = 16
2025
):
2126
import math
2227

2328
from llmcompressor.core import active_session
24-
from llmcompressor.pytorch.model_load.helpers import get_session_model
29+
from llmcompressor.pytorch.model_load.helpers import initialize_recipe
2530
from llmcompressor.pytorch.utils.helpers import tensor_sparsity
2631
from llmcompressor.transformers import oneshot
2732
from llmcompressor.utils.pytorch import qat_active
@@ -36,19 +41,29 @@ def _test_consecutive_runs(
3641
oneshot_device=self.device,
3742
clear_sparse_session=False,
3843
)
39-
first_tiny_model = get_session_model()
44+
45+
first_model = AutoModelForCausalLM.from_pretrained(
46+
self.output_first,
47+
device_map="auto",
48+
quantization_config=self.quantization_config,
49+
)
50+
4051
layer_0_sparse = tensor_sparsity(
41-
first_tiny_model.model.layers[0].self_attn.k_proj.weight
52+
first_model.model.layers[0].self_attn.k_proj.weight
4253
)
4354
assert math.isclose(layer_0_sparse.item(), 0.5, rel_tol=tolerance)
44-
assert qat_active(first_tiny_model)
55+
assert qat_active(first_model)
4556

4657
session = active_session()
4758
session_recipe = session.lifecycle.recipe_container.compiled_recipe
4859
stages = [stage.group for stage in session_recipe.stages]
4960
self.assertEqual(len(stages), 1)
5061
session.reset()
5162

63+
recipe = infer_recipe_from_model_path(model_path=self.output_first)
64+
if recipe:
65+
initialize_recipe(model=first_model, recipe_path=recipe)
66+
5267
# reload saved model and up sparsity to 0.7
5368
oneshot(
5469
model=self.output_first,
@@ -57,15 +72,19 @@ def _test_consecutive_runs(
5772
recipe=self.second_recipe,
5873
output_dir=self.output_second,
5974
oneshot_device=self.device,
60-
clear_sparse_session=False,
6175
)
6276

63-
second_tiny_model = get_session_model()
77+
second_model = AutoModelForCausalLM.from_pretrained(
78+
self.output_second,
79+
device_map="auto",
80+
quantization_config=self.quantization_config,
81+
)
82+
6483
layer_0_sparse = tensor_sparsity(
65-
second_tiny_model.model.layers[0].self_attn.k_proj.weight
84+
second_model.model.layers[0].self_attn.k_proj.weight
6685
)
6786
assert math.isclose(layer_0_sparse.item(), 0.7, rel_tol=tolerance)
68-
assert qat_active(second_tiny_model)
87+
assert qat_active(second_model)
6988

7089
session = active_session()
7190
session_recipe = session.lifecycle.recipe_container.compiled_recipe
@@ -119,7 +138,9 @@ def setUp(self):
119138
from transformers import AutoModelForCausalLM
120139

121140
self.model = AutoModelForCausalLM.from_pretrained(
122-
self.model, device_map=self.device
141+
self.model,
142+
device_map=self.device,
143+
quantization_config=self.quantization_config,
123144
)
124145

125146
self.output = "./oneshot_output"

0 commit comments

Comments
 (0)