up

sayakpaul · sayakpaul · commit 8e07445540e6 · 2025-08-22T19:21:56.000+05:30
diff --git a/src/diffusers/loaders/single_file_model.py b/src/diffusers/loaders/single_file_model.py
@@ -42,6 +42,7 @@
     convert_ltx_vae_checkpoint_to_diffusers,
     convert_lumina2_to_diffusers,
     convert_mochi_transformer_checkpoint_to_diffusers,
+    convert_nunchaku_flux_to_diffusers,
     convert_sana_transformer_to_diffusers,
     convert_sd3_transformer_checkpoint_to_diffusers,
     convert_stable_cascade_unet_single_file_to_diffusers,
@@ -341,18 +342,6 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
                 user_agent=user_agent,
             )
         if quantization_config is not None:
-            # For `nunchaku` checkpoints, we might want to determine the `modules_to_not_convert`.
-            original_modules_to_not_convert = quantization_config.modules_to_not_convert or []
-            determined_modules_to_not_convert = _maybe_determine_modules_to_not_convert(
-                quantization_config, checkpoint
-            )
-            if determined_modules_to_not_convert:
-                original_modules_to_not_convert.extend(determined_modules_to_not_convert)
-                original_modules_to_not_convert = list(set(original_modules_to_not_convert))
-                logger.info(
-                    f"`modules_to_not_convert` in the quantization_config was updated from {quantization_config.modules_to_not_convert} to {original_modules_to_not_convert}."
-                )
-                quantization_config.modules_to_not_convert = original_modules_to_not_convert
             hf_quantizer = DiffusersAutoQuantizer.from_config(quantization_config)
             hf_quantizer.validate_environment()
             torch_dtype = hf_quantizer.update_torch_dtype(torch_dtype)
@@ -433,9 +422,14 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
             model = cls.from_config(diffusers_model_config)
 
         checkpoint_mapping_kwargs = _get_mapping_function_kwargs(checkpoint_mapping_fn, **kwargs)
-        if not (
-            quantization_config is not None and quantization_config.quant_method == "nunchaku"
-        ) and _should_convert_state_dict_to_diffusers(model.state_dict(), checkpoint):
+        model_state_dict = model.state_dict()
+        # TODO: Only flux nunchaku checkpoint for now. Unify with how checkpoint mappers are done.
+        # For `nunchaku` checkpoints, we might want to determine the `modules_to_not_convert`.
+        if quantization_config is not None and quantization_config.quant_method == "nunchaku":
+            diffusers_format_checkpoint = convert_nunchaku_flux_to_diffusers(
+                checkpoint, model_state_dict=model_state_dict
+            )
+        elif _should_convert_state_dict_to_diffusers(model_state_dict, checkpoint):
             diffusers_format_checkpoint = checkpoint_mapping_fn(
                 config=diffusers_model_config, checkpoint=checkpoint, **checkpoint_mapping_kwargs
             )
@@ -446,6 +440,23 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
             raise SingleFileComponentError(
                 f"Failed to load {mapping_class_name}. Weights for this component appear to be missing in the checkpoint."
             )
+
+        # This step is better off here than above because `diffusers_format_checkpoint` holds the keys we expect.
+        if quantization_config is not None:
+            original_modules_to_not_convert = quantization_config.modules_to_not_convert or []
+            determined_modules_to_not_convert = _maybe_determine_modules_to_not_convert(
+                quantization_config, checkpoint
+            )
+            if determined_modules_to_not_convert:
+                determined_modules_to_not_convert.extend(original_modules_to_not_convert)
+                determined_modules_to_not_convert = list(set(determined_modules_to_not_convert))
+                logger.info(
+                    f"`modules_to_not_convert` in the quantization_config was updated from {quantization_config.modules_to_not_convert} to {determined_modules_to_not_convert}."
+                )
+                quantization_config.modules_to_not_convert = original_modules_to_not_convert
+                # Update the `quant_config`.
+                hf_quantizer.quantization_config = quantization_config
+
         # Check if `_keep_in_fp32_modules` is not None
         use_keep_in_fp32_modules = (cls._keep_in_fp32_modules is not None) and (
             (torch_dtype == torch.float16) or hasattr(hf_quantizer, "use_keep_in_fp32_modules")
@@ -473,6 +484,12 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
             unexpected_keys = [
                 param_name for param_name in diffusers_format_checkpoint if param_name not in empty_state_dict
             ]
+            for k in unexpected_keys:
+                if "single_transformer_blocks.0" in k:
+                    print(f"Unexpected {k=}")
+            for k in empty_state_dict:
+                if "single_transformer_blocks.0" in k:
+                    print(f"model {k=}")
             device_map = {"": param_device}
             load_model_dict_into_meta(
                 model,
diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py
@@ -2189,6 +2189,101 @@ def convert_animatediff_checkpoint_to_diffusers(checkpoint, **kwargs):
     return converted_state_dict
 
 
+# Adapted from https://github.com/nunchaku-tech/nunchaku/blob/3ec299f439f9986a69ded320798cab4e258c871d/nunchaku/models/transformers/transformer_flux_v2.py#L395
+def convert_nunchaku_flux_to_diffusers(checkpoint, **kwargs):
+    from .single_file_utils_nunchaku import _unpack_qkv_state_dict
+
+    _SMOOTH_ORIG_RE = re.compile(r"\.smooth_orig(\.|$)")
+    _SMOOTH_RE = re.compile(r"\.smooth(\.|$)")
+
+    new_state_dict = {}
+    model_state_dict = kwargs["model_state_dict"]
+
+    ckpt_keys = list(checkpoint.keys())
+    for k in ckpt_keys:
+        if "qweight" in k:
+            # only the shape information of this tensor is needed
+            v = checkpoint[k]
+            # if the tensor has qweight, but does not have low-rank branch, we need to add some artificial tensors
+            for t in ["lora_up", "lora_down"]:
+                new_k = k.replace(".qweight", f".{t}")
+                if new_k not in ckpt_keys:
+                    oc, ic = v.shape
+                    ic = ic * 2  # v is packed into INT8, so we need to double the size
+                    checkpoint[k.replace(".qweight", f".{t}")] = torch.zeros(
+                        (0, ic) if t == "lora_down" else (oc, 0), device=v.device, dtype=torch.bfloat16
+                    )
+
+    for k, v in checkpoint.items():
+        new_k = k  # start with original, then apply independent replacements
+
+        if k.startswith("single_transformer_blocks."):
+            # attention / qkv / norms
+            new_k = new_k.replace(".qkv_proj.", ".attn.to_qkv.")
+            new_k = new_k.replace(".out_proj.", ".attn.to_out.")
+            new_k = new_k.replace(".norm_k.", ".attn.norm_k.")
+            new_k = new_k.replace(".norm_q.", ".attn.norm_q.")
+
+            # mlp heads
+            new_k = new_k.replace(".mlp_fc1.", ".proj_mlp.")
+            new_k = new_k.replace(".mlp_fc2.", ".proj_out.")
+
+            # smooth params (use regex to avoid substring collisions)
+            new_k = _SMOOTH_ORIG_RE.sub(r".smooth_factor_orig\1", new_k)
+            new_k = _SMOOTH_RE.sub(r".smooth_factor\1", new_k)
+
+            # lora -> proj
+            new_k = new_k.replace(".lora_down", ".proj_down")
+            new_k = new_k.replace(".lora_up", ".proj_up")
+
+        elif k.startswith("transformer_blocks."):
+            # feed-forward (context & base)
+            new_k = new_k.replace(".mlp_context_fc1.", ".ff_context.net.0.proj.")
+            new_k = new_k.replace(".mlp_context_fc2.", ".ff_context.net.2.")
+            new_k = new_k.replace(".mlp_fc1.", ".ff.net.0.proj.")
+            new_k = new_k.replace(".mlp_fc2.", ".ff.net.2.")
+
+            # attention projections
+            new_k = new_k.replace(".qkv_proj_context.", ".attn.add_qkv_proj.")
+            new_k = new_k.replace(".qkv_proj.", ".attn.to_qkv.")
+            new_k = new_k.replace(".out_proj.", ".attn.to_out.0.")
+            new_k = new_k.replace(".out_proj_context.", ".attn.to_add_out.")
+
+            # norms
+            new_k = new_k.replace(".norm_k.", ".attn.norm_k.")
+            new_k = new_k.replace(".norm_q.", ".attn.norm_q.")
+            new_k = new_k.replace(".norm_added_k.", ".attn.norm_added_k.")
+            new_k = new_k.replace(".norm_added_q.", ".attn.norm_added_q.")
+
+            # smooth params
+            new_k = _SMOOTH_ORIG_RE.sub(r".smooth_factor_orig\1", new_k)
+            new_k = _SMOOTH_RE.sub(r".smooth_factor\1", new_k)
+
+            # lora -> proj
+            new_k = new_k.replace(".lora_down", ".proj_down")
+            new_k = new_k.replace(".lora_up", ".proj_up")
+
+        new_state_dict[new_k] = v
+
+    new_state_dict = _unpack_qkv_state_dict(new_state_dict)
+
+    # some remnant keys need to be patched
+    new_sd_keys = list(new_state_dict.keys())
+    for k in new_sd_keys:
+        if "qweight" in k:
+            no_qweight_k = ".".join(k.split(".qweight")[:-1])
+            for unexpected_k in ["wzeros"]:
+                unexpected_k = no_qweight_k + f".{unexpected_k}"
+                if unexpected_k in new_sd_keys:
+                    _ = new_state_dict.pop(unexpected_k)
+    for k in model_state_dict:
+        if k not in new_state_dict:
+            # CPU device for now
+            new_state_dict[k] = torch.ones_like(k, device="cpu")
+
+    return new_state_dict
+
+
 def convert_flux_transformer_checkpoint_to_diffusers(checkpoint, **kwargs):
     converted_state_dict = {}
     keys = list(checkpoint.keys())
diff --git a/src/diffusers/loaders/single_file_utils_nunchaku.py b/src/diffusers/loaders/single_file_utils_nunchaku.py
@@ -0,0 +1,102 @@
+import re
+
+import torch
+
+
+_QKV_ANCHORS_NUNCHAKU = ("to_qkv", "add_qkv_proj")
+_ALLOWED_SUFFIXES_NUNCHAKU = {
+    "bias",
+    "lora_down",
+    "lora_up",
+    "qweight",
+    "smooth_factor",
+    "smooth_factor_orig",
+    "wscales",
+}
+
+_QKV_NUNCHAKU_REGEX = re.compile(
+    rf"^(?P<prefix>.*)\.(?:{'|'.join(map(re.escape, _QKV_ANCHORS_NUNCHAKU))})\.(?P<suffix>.+)$"
+)
+
+
+def _pick_split_dim(t: torch.Tensor, suffix: str) -> int:
+    """
+    Choose which dimension to split by 3. Heuristics:
+      - 1D -> dim 0
+      - 2D -> prefer dim=1 for 'qweight' (common layout [*, 3*out_features]),
+              otherwise prefer dim=0 (common layout [3*out_features, *]).
+      - If preferred dim isn't divisible by 3, try the other; else error.
+    """
+    shape = list(t.shape)
+    if len(shape) == 0:
+        raise ValueError("Cannot split a scalar into Q/K/V.")
+
+    if len(shape) == 1:
+        dim = 0
+        if shape[dim] % 3 == 0:
+            return dim
+        raise ValueError(f"1D tensor of length {shape[0]} not divisible by 3.")
+
+    # len(shape) >= 2
+    preferred = 1 if suffix == "qweight" else 0
+    other = 0 if preferred == 1 else 1
+
+    if shape[preferred] % 3 == 0:
+        return preferred
+    if shape[other] % 3 == 0:
+        return other
+
+    # Fall back: any dim divisible by 3
+    for d, s in enumerate(shape):
+        if s % 3 == 0:
+            return d
+
+    raise ValueError(f"None of the dims {shape} are divisible by 3 for suffix '{suffix}'.")
+
+
+def _split_qkv(t: torch.Tensor, dim: int):
+    return torch.tensor_split(t, 3, dim=dim)
+
+
+def _unpack_qkv_state_dict(
+    state_dict: dict, anchors=_QKV_ANCHORS_NUNCHAKU, allowed_suffixes=_ALLOWED_SUFFIXES_NUNCHAKU
+):
+    """
+    Convert fused QKV entries (e.g., '...to_qkv.bias', '...qkv_proj.wscales') into separate Q/K/V entries:
+        '...to_q.bias', '...to_k.bias', '...to_v.bias' '...to_q.wscales', '...to_k.wscales', '...to_v.wscales'
+    Returns a NEW dict; original is not modified.
+
+    Only keys with suffix in `allowed_suffixes` are processed. Keys with non-divisible-by-3 tensors raise a ValueError.
+    """
+    anchors = tuple(anchors)
+    allowed_suffixes = set(allowed_suffixes)
+
+    new_sd: dict = {}
+    for k, v in state_dict.items():
+        m = _QKV_NUNCHAKU_REGEX.match(k)
+        if m:
+            suffix = m.group("suffix")
+            if suffix not in allowed_suffixes:
+                # keep as-is if it's not one of the targeted suffixes
+                new_sd[k] = v
+                continue
+
+            prefix = m.group("prefix")  # everything before .to_qkv/.qkv_proj
+            # Decide split axis
+            split_dim = _pick_split_dim(v, suffix)
+            q, k_, vv = _split_qkv(v, dim=split_dim)
+
+            # Build new keys
+            base_q = f"{prefix}.to_q.{suffix}"
+            base_k = f"{prefix}.to_k.{suffix}"
+            base_v = f"{prefix}.to_v.{suffix}"
+
+            # Write into result dict
+            new_sd[base_q] = q
+            new_sd[base_k] = k_
+            new_sd[base_v] = vv
+        else:
+            # not a fused qkv key
+            new_sd[k] = v
+
+    return new_sd
diff --git a/src/diffusers/quantizers/nunchaku/nunchaku_quantizer.py b/src/diffusers/quantizers/nunchaku/nunchaku_quantizer.py
@@ -2,13 +2,7 @@
 
 from diffusers.utils.import_utils import is_nunchaku_version
 
-from ...utils import (
-    get_module_from_name,
-    is_accelerate_available,
-    is_nunchaku_available,
-    is_torch_available,
-    logging,
-)
+from ...utils import get_module_from_name, is_accelerate_available, is_nunchaku_available, is_torch_available, logging
 from ...utils.torch_utils import is_fp8_available
 from ..base import DiffusersQuantizer
 
@@ -20,15 +14,20 @@
 if is_torch_available():
     import torch
 
-if is_accelerate_available():
-    pass
-
 if is_nunchaku_available():
     from .utils import replace_with_nunchaku_linear
 
 logger = logging.get_logger(__name__)
 
 
+KEY_MAP = {
+    "lora_down": "proj_down",
+    "lora_up": "proj_up",
+    "smooth_orig": "smooth_factor_orig",
+    "smooth": "smooth_factor",
+}
+
+
 class NunchakuQuantizer(DiffusersQuantizer):
     r"""
     Diffusers Quantizer for Nunchaku (https://github.com/nunchaku-tech/nunchaku)
@@ -98,33 +97,11 @@ def create_quantized_param(
         from nunchaku.models.linear import SVDQW4A4Linear
 
         module, tensor_name = get_module_from_name(model, param_name)
-        state_dict = args[0]
         if tensor_name not in module._parameters and tensor_name not in module._buffers:
             raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.")
 
         if isinstance(module, SVDQW4A4Linear):
-            if param_value.ndim == 1:
-                module._parameters[tensor_name] = torch.nn.Parameter(param_value, requires_grad=False).to(
-                    target_device
-                )
-            elif tensor_name == "qweight":
-                module._parameters[tensor_name] = torch.nn.Parameter(param_value, requires_grad=False).to(
-                    target_device
-                )
-                # if the tensor has qweight, but does not have low-rank branch, we need to add some artificial tensors
-                for t in ["lora_up", "lora_down"]:
-                    # need to check at the state dict level for this
-                    new_tensor_name = param_name.replace(".qweight", f".{t}")
-                    if new_tensor_name not in state_dict:
-                        oc, ic = param_value.shape
-                        ic = ic * 2  # v is packed into INT8, so we need to double the size
-                        module._parameters[t] = torch.zeros(
-                            (0, ic) if t == "lora_down" else (oc, 0), device=param_value.device, dtype=torch.bfloat16
-                        )
-            else:
-                module._parameters[tensor_name] = torch.nn.Parameter(param_value, requires_grad=False).to(
-                    target_device
-                )
+            module._parameters[tensor_name] = torch.nn.Parameter(param_value, requires_grad=False).to(target_device)
 
     def adjust_max_memory(self, max_memory: Dict[str, Union[int, str]]) -> Dict[str, Union[int, str]]:
         max_memory = {key: val * 0.90 for key, val in max_memory.items()}