better shapes

kylesayrs · kylesayrs · commit 4cc5ace8cc4d · 2025-10-14T00:19:35.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/quantization/lifecycle/initialize.py b/src/compressed_tensors/quantization/lifecycle/initialize.py
@@ -41,10 +41,11 @@
     disable_hf_hook,
     get_execution_device,
     get_head_dim,
+    get_num_attn_heads,
+    get_num_kv_heads,
     register_offload_parameter,
 )
 from torch.nn import Module, Parameter
-from transformers import PretrainedConfig
 
 
 __all__ = [
@@ -292,17 +293,23 @@ def initialize_attn_qparams(
 
     _validate_attention_scheme(scheme)
 
-    config: PretrainedConfig = getattr(kv_cache, "config")
+    # extract shapes from config
+    config = kv_cache.config
+    num_attn_heads = get_num_attn_heads(config)
+    num_kv_heads = get_num_kv_heads(config)
     head_dim = get_head_dim(config)
-    observed_shape = (head_dim,)  # (batch_size, num_attention_heads, slen, head_dim)
+
+    # (batch_size, num_heads, slen, head_dim)
+    q_observed_shape = (num_attn_heads, None, head_dim)
+    kv_observed_shape = (num_kv_heads, None, head_dim)
     observed_dtype = next(module.parameters()).dtype
 
     if impl is not None:
         initialize_qparams(
             module,
             "q",
             scheme.input_activations,
-            observed_shape=observed_shape,
+            observed_shape=q_observed_shape,
             observed_dtype=observed_dtype,
             force_zero_point=force_zero_point,
         )
@@ -312,15 +319,15 @@ def initialize_attn_qparams(
             module,
             "k",
             scheme.input_activations,
-            observed_shape=observed_shape,
+            observed_shape=kv_observed_shape,
             observed_dtype=observed_dtype,
             force_zero_point=force_zero_point,
         )
         initialize_qparams(
             module,
             "v",
             scheme.input_activations,
-            observed_shape=observed_shape,
+            observed_shape=kv_observed_shape,
             observed_dtype=observed_dtype,
             force_zero_point=force_zero_point,
         )
diff --git a/src/compressed_tensors/utils/helpers.py b/src/compressed_tensors/utils/helpers.py
@@ -45,6 +45,8 @@
     "unpack_bitmasks",
     "patch_attr",
     "ParameterizedDefaultDict",
+    "get_num_attn_heads",
+    "get_num_kv_heads",
     "get_head_dim",
 ]
 
@@ -399,12 +401,60 @@ def get(self, *args, factory_kwargs: Mapping = MappingProxyType({})) -> Any:
             return self[args]
 
 
+def get_num_attn_heads(config: PretrainedConfig) -> int:
+    """
+    Get the number of attention heads used by a model
+
+    :param config: model config
+    :return: num_attention_heads of model
+    """
+    if hasattr(config, "num_attention_heads"):
+        return config.num_attention_heads
+
+    elif hasattr(config, "hidden_size") and hasattr(config, "head_dim"):
+        return config.hidden_size // config.head_dim
+
+    else:
+        raise ValueError(
+            "Cannot determine num_attention_heads from config. Config must define "
+            "either `num_attention_heads` or both `hidden_size` and `head_dim`. "
+            f"{config}"
+        )
+
+
+def get_num_kv_heads(config: PretrainedConfig) -> int:
+    """
+    Get the number of key-value attention heads used by a model
+
+    :param config: model config
+    :return: num_key_value_heads of model
+    """
+    if hasattr(config, "num_key_value_heads"):
+        return config.num_key_value_heads
+
+    else:
+        raise ValueError(
+            "Cannot determine num_key_value_heads from config. Config must define "
+            f"`num_key_value_heads`. {config}"
+        )
+
+
 def get_head_dim(config: PretrainedConfig) -> int:
+    """
+    Get the number of dimensions used by the attention heads of a model
+
+    :param config: model config
+    :return: head_dim of model
+    """
     if hasattr(config, "head_dim"):
         return config.head_dim
 
     elif hasattr(config, "hidden_size") and hasattr(config, "num_attention_heads"):
         return config.hidden_size // config.num_attention_heads
 
     else:
-        raise ValueError()
+        raise ValueError(
+            "Cannot determine head_dim from config. Config must define "
+            "either `head_dim` or both `hidden_size` and `num_attention_heads`. "
+            f"{config}"
+        )