update

vllm-project · Mar 10, 2025 · 3084f39 · 3084f39
1 parent 744a311
commit 3084f39
Showing 1 changed file with 9 additions and 9 deletions.
diff --git a/examples/weight_transform.py b/examples/weight_transform.py
@@ -15,7 +15,6 @@
 
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier
-
 # U(W)V.T
 
 ignore = ["re:.*.mlp.down_proj$", "lm_head"]
@@ -64,32 +63,32 @@
 # It will then apply the second set of args
 # any overalp will be applied in order
 v_scheme = TransformationScheme(
-    transform_type="random-hadamard",
+    transform_type="hadamard",
     groups=[v_linear_args],
     transform_creation_args={"size": 2048},
 )
 
 v_scheme_down_proj = TransformationScheme(
-    transform_type="random-hadamard",
+    transform_type="hadamard",
     groups=[v_down_proj],
     transform_creation_args={"size": 8192},
 )
 
 # We could combine multiple args to the same scheme but then would make it more difficult to consolidate order of transforms
 u_scheme_q_o_down_proj = TransformationScheme(
-    transform_type="random-hadamard",
+    transform_type="hadamard",
     groups=[u_q_o_down_proj],
     transform_creation_args={"size": 2048},
 )
 
 u_scheme_gate_up_proj = TransformationScheme(
-    transform_type="random-hadamard",
+    transform_type="hadamard",
     groups=[u_gate_up_proj],
     transform_creation_args={"size": 8192},
 )
 
 u_scheme_k_v_proj = TransformationScheme(
-    transform_type="random-hadamard",
+    transform_type="hadamard",
     groups=[u_k_v_proj],
     transform_creation_args={"size": 512},
 )
@@ -116,13 +115,14 @@
                 symmetric=True,
                 strategy=QuantizationStrategy.GROUP,
                 group_size=128,
+                observer="mse"
             ),
         )
     },
     transforms_config=config,
 )
 
-MODEL_ID = "meta-llama/Llama-3.2-1B"
+MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
 
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID, device_map="auto", torch_dtype="auto"
@@ -139,9 +139,9 @@
 print("==========================================\n\n")
 
 # Save to disk compressed.
-SAVE_DIR = MODEL_ID.split("/")[1] + "-W4A16-Transforms"
+SAVE_DIR = MODEL_ID.split("/")[1] + "-W4A16-uncompressed-hadamard-random"
 
-model.save_pretrained(SAVE_DIR)
+model.save_pretrained(SAVE_DIR, save_compressed=False)
 tokenizer.save_pretrained(SAVE_DIR)
 
 """