diff --git a/invokeai/app/invocations/flux_lora_loader.py b/invokeai/app/invocations/flux_lora_loader.py index d9e655a5077..f4181032d72 100644 --- a/invokeai/app/invocations/flux_lora_loader.py +++ b/invokeai/app/invocations/flux_lora_loader.py @@ -8,7 +8,7 @@ invocation_output, ) from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType -from invokeai.app.invocations.model import CLIPField, LoRAField, ModelIdentifierField, TransformerField +from invokeai.app.invocations.model import CLIPField, LoRAField, ModelIdentifierField, T5EncoderField, TransformerField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.model_manager.config import BaseModelType @@ -21,6 +21,9 @@ class FluxLoRALoaderOutput(BaseInvocationOutput): default=None, description=FieldDescriptions.transformer, title="FLUX Transformer" ) clip: Optional[CLIPField] = OutputField(default=None, description=FieldDescriptions.clip, title="CLIP") + t5_encoder: Optional[T5EncoderField] = OutputField( + default=None, description=FieldDescriptions.t5_encoder, title="T5 Encoder" + ) @invocation( @@ -28,7 +31,7 @@ class FluxLoRALoaderOutput(BaseInvocationOutput): title="FLUX LoRA", tags=["lora", "model", "flux"], category="model", - version="1.1.0", + version="1.2.0", classification=Classification.Prototype, ) class FluxLoRALoaderInvocation(BaseInvocation): @@ -50,6 +53,12 @@ class FluxLoRALoaderInvocation(BaseInvocation): description=FieldDescriptions.clip, input=Input.Connection, ) + t5_encoder: T5EncoderField | None = InputField( + default=None, + title="T5 Encoder", + description=FieldDescriptions.t5_encoder, + input=Input.Connection, + ) def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput: lora_key = self.lora.key @@ -62,6 +71,8 @@ def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput: raise ValueError(f'LoRA "{lora_key}" already applied to transformer.') if self.clip and any(lora.lora.key == lora_key for lora in self.clip.loras): raise ValueError(f'LoRA "{lora_key}" already applied to CLIP encoder.') + if self.t5_encoder and any(lora.lora.key == lora_key for lora in self.t5_encoder.loras): + raise ValueError(f'LoRA "{lora_key}" already applied to T5 encoder.') output = FluxLoRALoaderOutput() @@ -82,6 +93,14 @@ def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput: weight=self.weight, ) ) + if self.t5_encoder is not None: + output.t5_encoder = self.t5_encoder.model_copy(deep=True) + output.t5_encoder.loras.append( + LoRAField( + lora=self.lora, + weight=self.weight, + ) + ) return output @@ -91,7 +110,7 @@ def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput: title="FLUX LoRA Collection Loader", tags=["lora", "model", "flux"], category="model", - version="1.1.0", + version="1.2.0", classification=Classification.Prototype, ) class FLUXLoRACollectionLoader(BaseInvocation): @@ -113,6 +132,12 @@ class FLUXLoRACollectionLoader(BaseInvocation): description=FieldDescriptions.clip, input=Input.Connection, ) + t5_encoder: T5EncoderField | None = InputField( + default=None, + title="T5 Encoder", + description=FieldDescriptions.t5_encoder, + input=Input.Connection, + ) def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput: output = FluxLoRALoaderOutput() @@ -140,4 +165,9 @@ def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput: output.clip = self.clip.model_copy(deep=True) output.clip.loras.append(lora) + if self.t5_encoder is not None: + if output.t5_encoder is None: + output.t5_encoder = self.t5_encoder.model_copy(deep=True) + output.t5_encoder.loras.append(lora) + return output diff --git a/invokeai/app/invocations/flux_model_loader.py b/invokeai/app/invocations/flux_model_loader.py index 884b01a9805..ae1f4bc2b92 100644 --- a/invokeai/app/invocations/flux_model_loader.py +++ b/invokeai/app/invocations/flux_model_loader.py @@ -40,7 +40,7 @@ class FluxModelLoaderOutput(BaseInvocationOutput): title="Flux Main Model", tags=["model", "flux"], category="model", - version="1.0.4", + version="1.0.5", classification=Classification.Prototype, ) class FluxModelLoaderInvocation(BaseInvocation): @@ -87,7 +87,7 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: return FluxModelLoaderOutput( transformer=TransformerField(transformer=transformer, loras=[]), clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0), - t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder), + t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder, loras=[]), vae=VAEField(vae=vae), max_seq_len=max_seq_lengths[transformer_config.config_path], ) diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 74c293d0c09..0e84829de5d 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -19,7 +19,7 @@ from invokeai.backend.flux.modules.conditioner import HFEncoder from invokeai.backend.model_manager.config import ModelFormat from invokeai.backend.patches.layer_patcher import LayerPatcher -from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_CLIP_PREFIX +from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_CLIP_PREFIX, FLUX_LORA_T5_PREFIX from invokeai.backend.patches.model_patch_raw import ModelPatchRaw from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo @@ -71,13 +71,45 @@ def invoke(self, context: InvocationContext) -> FluxConditioningOutput: def _t5_encode(self, context: InvocationContext) -> torch.Tensor: prompt = [self.prompt] + t5_encoder_info = context.models.load(self.t5_encoder.text_encoder) + t5_encoder_config = t5_encoder_info.config + assert t5_encoder_config is not None + with ( - context.models.load(self.t5_encoder.text_encoder) as t5_text_encoder, + t5_encoder_info.model_on_device() as (cached_weights, t5_text_encoder), context.models.load(self.t5_encoder.tokenizer) as t5_tokenizer, + ExitStack() as exit_stack, ): assert isinstance(t5_text_encoder, T5EncoderModel) assert isinstance(t5_tokenizer, (T5Tokenizer, T5TokenizerFast)) + # Determine if the model is quantized. + # If the model is quantized, then we need to apply the LoRA weights as sidecar layers. This results in + # slower inference than direct patching, but is agnostic to the quantization format. + if t5_encoder_config.format in [ModelFormat.T5Encoder, ModelFormat.Diffusers]: + model_is_quantized = False + elif t5_encoder_config.format in [ + ModelFormat.BnbQuantizedLlmInt8b, + ModelFormat.BnbQuantizednf4b, + ModelFormat.GGUFQuantized, + ]: + model_is_quantized = True + else: + raise ValueError(f"Unsupported model format: {t5_encoder_config.format}") + + # Apply LoRA models to the T5 encoder. + # Note: We apply the LoRA after the encoder has been moved to its target device for faster patching. + exit_stack.enter_context( + LayerPatcher.apply_smart_model_patches( + model=t5_text_encoder, + patches=self._t5_lora_iterator(context), + prefix=FLUX_LORA_T5_PREFIX, + dtype=t5_text_encoder.dtype, + cached_weights=cached_weights, + force_sidecar_patching=model_is_quantized, + ) + ) + t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len) context.util.signal_progress("Running T5 encoder") @@ -132,3 +164,10 @@ def _clip_lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[Mode assert isinstance(lora_info.model, ModelPatchRaw) yield (lora_info.model, lora.weight) del lora_info + + def _t5_lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelPatchRaw, float]]: + for lora in self.t5_encoder.loras: + lora_info = context.models.load(lora.lora) + assert isinstance(lora_info.model, ModelPatchRaw) + yield (lora_info.model, lora.weight) + del lora_info diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 846f64cfb2a..cdb4b5ccca6 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -68,6 +68,7 @@ class CLIPField(BaseModel): class T5EncoderField(BaseModel): tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel") text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel") + loras: List[LoRAField] = Field(description="LoRAs to apply on model loading") class VAEField(BaseModel): diff --git a/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_linear.py b/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_linear.py index 7d5784563e3..c440526b9b9 100644 --- a/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_linear.py +++ b/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_linear.py @@ -7,7 +7,6 @@ CustomModuleMixin, ) from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch -from invokeai.backend.patches.layers.concatenated_lora_layer import ConcatenatedLoRALayer from invokeai.backend.patches.layers.flux_control_lora_layer import FluxControlLoRALayer from invokeai.backend.patches.layers.lora_layer import LoRALayer @@ -22,25 +21,6 @@ def linear_lora_forward(input: torch.Tensor, lora_layer: LoRALayer, lora_weight: return x -def concatenated_lora_forward( - input: torch.Tensor, concatenated_lora_layer: ConcatenatedLoRALayer, lora_weight: float -) -> torch.Tensor: - """An optimized implementation of the residual calculation for a sidecar ConcatenatedLoRALayer.""" - x_chunks: list[torch.Tensor] = [] - for lora_layer in concatenated_lora_layer.lora_layers: - x_chunk = torch.nn.functional.linear(input, lora_layer.down) - if lora_layer.mid is not None: - x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.mid) - x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.up, bias=lora_layer.bias) - x_chunk *= lora_weight * lora_layer.scale() - x_chunks.append(x_chunk) - - # TODO(ryand): Generalize to support concat_axis != 0. - assert concatenated_lora_layer.concat_axis == 0 - x = torch.cat(x_chunks, dim=-1) - return x - - def autocast_linear_forward_sidecar_patches( orig_module: torch.nn.Linear, input: torch.Tensor, patches_and_weights: list[tuple[BaseLayerPatch, float]] ) -> torch.Tensor: @@ -66,8 +46,6 @@ def autocast_linear_forward_sidecar_patches( output += linear_lora_forward(orig_input, patch, patch_weight) elif isinstance(patch, LoRALayer): output += linear_lora_forward(input, patch, patch_weight) - elif isinstance(patch, ConcatenatedLoRALayer): - output += concatenated_lora_forward(input, patch, patch_weight) else: unprocessed_patches_and_weights.append((patch, patch_weight)) diff --git a/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_module_mixin.py b/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_module_mixin.py index a7312517a48..0563f3cb366 100644 --- a/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_module_mixin.py +++ b/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_module_mixin.py @@ -3,6 +3,8 @@ import torch from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.param_shape_utils import get_param_shape +from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor class CustomModuleMixin: @@ -42,6 +44,20 @@ def _aggregate_patch_parameters( device: torch.device | None = None, ): """Helper function that aggregates the parameters from all patches into a single dict.""" + # HACK(ryand): If the original parameters are in a quantized format whose weights can't be accessed, we replace + # them with dummy tensors on the 'meta' device. This allows patch layers to access the shapes of the original + # parameters. But, of course, any sub-layers that need to access the actual values of the parameters will fail. + for param_name in orig_params.keys(): + param = orig_params[param_name] + if type(param) is torch.nn.Parameter and type(param.data) is torch.Tensor: + pass + elif type(param) is GGMLTensor: + # Move to device and dequantize here. Doing it in the patch layer can result in redundant casts / + # dequantizations. + orig_params[param_name] = param.to(device=device).get_dequantized_tensor() + else: + orig_params[param_name] = torch.empty(get_param_shape(param), device="meta") + params: dict[str, torch.Tensor] = {} for patch, patch_weight in patches_and_weights: diff --git a/invokeai/backend/model_manager/load/model_loaders/lora.py b/invokeai/backend/model_manager/load/model_loaders/lora.py index 7e89a882210..116dee49139 100644 --- a/invokeai/backend/model_manager/load/model_loaders/lora.py +++ b/invokeai/backend/model_manager/load/model_loaders/lora.py @@ -31,6 +31,10 @@ is_state_dict_likely_in_flux_kohya_format, lora_model_from_flux_kohya_state_dict, ) +from invokeai.backend.patches.lora_conversions.flux_onetrainer_lora_conversion_utils import ( + is_state_dict_likely_in_flux_onetrainer_format, + lora_model_from_flux_onetrainer_state_dict, +) from invokeai.backend.patches.lora_conversions.sd_lora_conversion_utils import lora_model_from_sd_state_dict from invokeai.backend.patches.lora_conversions.sdxl_lora_conversion_utils import convert_sdxl_keys_to_diffusers_format @@ -84,8 +88,12 @@ def _load_model( elif config.format == ModelFormat.LyCORIS: if is_state_dict_likely_in_flux_kohya_format(state_dict=state_dict): model = lora_model_from_flux_kohya_state_dict(state_dict=state_dict) + elif is_state_dict_likely_in_flux_onetrainer_format(state_dict=state_dict): + model = lora_model_from_flux_onetrainer_state_dict(state_dict=state_dict) elif is_state_dict_likely_flux_control(state_dict=state_dict): model = lora_model_from_flux_control_state_dict(state_dict=state_dict) + else: + raise ValueError(f"LoRA model is in unsupported FLUX format: {config.format}") else: raise ValueError(f"LoRA model is in unsupported FLUX format: {config.format}") elif self._model_base in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]: diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index c6f0873e2cb..82378d08e01 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -46,6 +46,9 @@ from invokeai.backend.patches.lora_conversions.flux_kohya_lora_conversion_utils import ( is_state_dict_likely_in_flux_kohya_format, ) +from invokeai.backend.patches.lora_conversions.flux_onetrainer_lora_conversion_utils import ( + is_state_dict_likely_in_flux_onetrainer_format, +) from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor from invokeai.backend.quantization.gguf.loaders import gguf_sd_loader from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel @@ -283,7 +286,7 @@ def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: Optional[C return ModelType.Main elif key.startswith(("encoder.conv_in", "decoder.conv_in")): return ModelType.VAE - elif key.startswith(("lora_te_", "lora_unet_")): + elif key.startswith(("lora_te_", "lora_unet_", "lora_te1_", "lora_te2_", "lora_transformer_")): return ModelType.LoRA # "lora_A.weight" and "lora_B.weight" are associated with models in PEFT format. We don't support all PEFT # LoRA models, but as of the time of writing, we support Diffusers FLUX PEFT LoRA models. @@ -632,6 +635,7 @@ def get_format(self) -> ModelFormat: def get_base_type(self) -> BaseModelType: if ( is_state_dict_likely_in_flux_kohya_format(self.checkpoint) + or is_state_dict_likely_in_flux_onetrainer_format(self.checkpoint) or is_state_dict_likely_in_flux_diffusers_format(self.checkpoint) or is_state_dict_likely_flux_control(self.checkpoint) ): diff --git a/invokeai/backend/patches/layers/concatenated_lora_layer.py b/invokeai/backend/patches/layers/concatenated_lora_layer.py deleted file mode 100644 index a699a47433d..00000000000 --- a/invokeai/backend/patches/layers/concatenated_lora_layer.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import Optional, Sequence - -import torch - -from invokeai.backend.patches.layers.lora_layer import LoRALayer -from invokeai.backend.patches.layers.lora_layer_base import LoRALayerBase - - -class ConcatenatedLoRALayer(LoRALayerBase): - """A LoRA layer that is composed of multiple LoRA layers concatenated along a specified axis. - - This class was created to handle a special case with FLUX LoRA models. In the BFL FLUX model format, the attention - Q, K, V matrices are concatenated along the first dimension. In the diffusers LoRA format, the Q, K, V matrices are - stored as separate tensors. This class enables diffusers LoRA layers to be used in BFL FLUX models. - """ - - def __init__(self, lora_layers: Sequence[LoRALayer], concat_axis: int = 0): - super().__init__(alpha=None, bias=None) - - self.lora_layers = lora_layers - self.concat_axis = concat_axis - - def _rank(self) -> int | None: - return None - - def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: - # TODO(ryand): Currently, we pass orig_weight=None to the sub-layers. If we want to support sub-layers that - # require this value, we will need to implement chunking of the original weight tensor here. - # Note that we must apply the sub-layer scales here. - layer_weights = [lora_layer.get_weight(None) * lora_layer.scale() for lora_layer in self.lora_layers] # pyright: ignore[reportArgumentType] - return torch.cat(layer_weights, dim=self.concat_axis) - - def get_bias(self, orig_bias: torch.Tensor | None) -> Optional[torch.Tensor]: - # TODO(ryand): Currently, we pass orig_bias=None to the sub-layers. If we want to support sub-layers that - # require this value, we will need to implement chunking of the original bias tensor here. - # Note that we must apply the sub-layer scales here. - layer_biases: list[torch.Tensor] = [] - for lora_layer in self.lora_layers: - layer_bias = lora_layer.get_bias(None) - if layer_bias is not None: - layer_biases.append(layer_bias * lora_layer.scale()) - - if len(layer_biases) == 0: - return None - - assert len(layer_biases) == len(self.lora_layers) - return torch.cat(layer_biases, dim=self.concat_axis) - - def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None): - super().to(device=device, dtype=dtype) - for lora_layer in self.lora_layers: - lora_layer.to(device=device, dtype=dtype) - - def calc_size(self) -> int: - return super().calc_size() + sum(lora_layer.calc_size() for lora_layer in self.lora_layers) diff --git a/invokeai/backend/patches/layers/dora_layer.py b/invokeai/backend/patches/layers/dora_layer.py new file mode 100644 index 00000000000..3e52ce95783 --- /dev/null +++ b/invokeai/backend/patches/layers/dora_layer.py @@ -0,0 +1,115 @@ +from typing import Dict, Optional + +import torch + +from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.cast_to_device import cast_to_device +from invokeai.backend.patches.layers.lora_layer_base import LoRALayerBase +from invokeai.backend.util.calc_tensor_size import calc_tensors_size + + +class DoRALayer(LoRALayerBase): + """A DoRA layer. As defined in https://arxiv.org/pdf/2402.09353.""" + + def __init__( + self, + up: torch.Tensor, + down: torch.Tensor, + dora_scale: torch.Tensor, + alpha: float | None, + bias: Optional[torch.Tensor], + ): + super().__init__(alpha, bias) + self.up = up + self.down = down + self.dora_scale = dora_scale + + @classmethod + def from_state_dict_values(cls, values: Dict[str, torch.Tensor]): + alpha = cls._parse_alpha(values.get("alpha", None)) + bias = cls._parse_bias( + values.get("bias_indices", None), values.get("bias_values", None), values.get("bias_size", None) + ) + + layer = cls( + up=values["lora_up.weight"], + down=values["lora_down.weight"], + dora_scale=values["dora_scale"], + alpha=alpha, + bias=bias, + ) + + cls.warn_on_unhandled_keys( + values=values, + handled_keys={ + # Default keys. + "alpha", + "bias_indices", + "bias_values", + "bias_size", + # Layer-specific keys. + "lora_up.weight", + "lora_down.weight", + "dora_scale", + }, + ) + + return layer + + def _rank(self) -> int: + return self.down.shape[0] + + def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: + orig_weight = cast_to_device(orig_weight, self.up.device) + + # Note: Variable names (e.g. delta_v) are based on the paper. + delta_v = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1) + delta_v = delta_v.reshape(orig_weight.shape) + + delta_v = delta_v * self.scale() + + # At this point, out_weight is the unnormalized direction matrix. + out_weight = orig_weight + delta_v + + # TODO(ryand): Simplify this logic. + direction_norm = ( + out_weight.transpose(0, 1) + .reshape(out_weight.shape[1], -1) + .norm(dim=1, keepdim=True) + .reshape(out_weight.shape[1], *[1] * (out_weight.dim() - 1)) + .transpose(0, 1) + ) + + out_weight *= self.dora_scale / direction_norm + + return out_weight - orig_weight + + def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None): + super().to(device=device, dtype=dtype) + self.up = self.up.to(device=device, dtype=dtype) + self.down = self.down.to(device=device, dtype=dtype) + self.dora_scale = self.dora_scale.to(device=device, dtype=dtype) + + def calc_size(self) -> int: + return super().calc_size() + calc_tensors_size([self.up, self.down, self.dora_scale]) + + def get_parameters(self, orig_parameters: dict[str, torch.Tensor], weight: float) -> dict[str, torch.Tensor]: + if any(p.device.type == "meta" for p in orig_parameters.values()): + # If any of the original parameters are on the 'meta' device, we assume this is because the base model is in + # a quantization format that doesn't allow easy dequantization. + raise RuntimeError( + "The base model quantization format (likely bitsandbytes) is not compatible with DoRA patches." + ) + + scale = self.scale() + params = {"weight": self.get_weight(orig_parameters["weight"]) * weight} + bias = self.get_bias(orig_parameters.get("bias", None)) + if bias is not None: + params["bias"] = bias * (weight * scale) + + # Reshape all params to match the original module's shape. + for param_name, param_weight in params.items(): + orig_param = orig_parameters[param_name] + if param_weight.shape != orig_param.shape: + params[param_name] = param_weight.reshape(orig_param.shape) + + return params diff --git a/invokeai/backend/patches/layers/merged_layer_patch.py b/invokeai/backend/patches/layers/merged_layer_patch.py new file mode 100644 index 00000000000..ec2039e746c --- /dev/null +++ b/invokeai/backend/patches/layers/merged_layer_patch.py @@ -0,0 +1,65 @@ +from dataclasses import dataclass +from typing import Sequence + +import torch + +from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.param_shape_utils import get_param_shape + + +@dataclass +class Range: + start: int + end: int + + +class MergedLayerPatch(BaseLayerPatch): + """A patch layer that is composed of multiple sub-layers merged together. + + This class was created to handle a special case with FLUX LoRA models. In the BFL FLUX model format, the attention + Q, K, V matrices are concatenated along the first dimension. In the diffusers LoRA format, the Q, K, V matrices are + stored as separate tensors. This class enables diffusers LoRA layers to be used in BFL FLUX models. + """ + + def __init__( + self, + lora_layers: Sequence[BaseLayerPatch], + ranges: Sequence[Range], + ): + super().__init__() + + self.lora_layers = lora_layers + # self.ranges[i] is the range for the i'th lora layer along the 0'th weight dimension. + self.ranges = ranges + assert len(self.ranges) == len(self.lora_layers) + + def get_parameters(self, orig_parameters: dict[str, torch.Tensor], weight: float) -> dict[str, torch.Tensor]: + out_parameters: dict[str, torch.Tensor] = {} + + for lora_layer, range in zip(self.lora_layers, self.ranges, strict=True): + sliced_parameters: dict[str, torch.Tensor] = { + n: p[range.start : range.end] for n, p in orig_parameters.items() + } + + # Note that `weight` is applied in the sub-layers, no need to apply it in this function. + layer_out_parameters = lora_layer.get_parameters(sliced_parameters, weight) + + for out_param_name, out_param in layer_out_parameters.items(): + if out_param_name not in out_parameters: + # If not already in the output dict, initialize an output tensor with the same shape as the full + # original parameter. + out_parameters[out_param_name] = torch.zeros( + get_param_shape(orig_parameters[out_param_name]), + dtype=out_param.dtype, + device=out_param.device, + ) + out_parameters[out_param_name][range.start : range.end] += out_param + + return out_parameters + + def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None): + for lora_layer in self.lora_layers: + lora_layer.to(device=device, dtype=dtype) + + def calc_size(self) -> int: + return sum(lora_layer.calc_size() for lora_layer in self.lora_layers) diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py index 85a0c18dd5a..8141a56644a 100644 --- a/invokeai/backend/patches/layers/utils.py +++ b/invokeai/backend/patches/layers/utils.py @@ -3,6 +3,7 @@ import torch from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.dora_layer import DoRALayer from invokeai.backend.patches.layers.full_layer import FullLayer from invokeai.backend.patches.layers.ia3_layer import IA3Layer from invokeai.backend.patches.layers.loha_layer import LoHALayer @@ -14,8 +15,9 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseLayerPatch: # Detect layers according to LyCORIS detection logic(`weight_list_det`) # https://github.com/KohakuBlueleaf/LyCORIS/tree/8ad8000efb79e2b879054da8c9356e6143591bad/lycoris/modules - - if "lora_up.weight" in state_dict: + if "dora_scale" in state_dict: + return DoRALayer.from_state_dict_values(state_dict) + elif "lora_up.weight" in state_dict: # LoRA a.k.a LoCon return LoRALayer.from_state_dict_values(state_dict) elif "hada_w1_a" in state_dict: diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index b4fa4814688..6a36db7b592 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -3,8 +3,8 @@ import torch from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch -from invokeai.backend.patches.layers.concatenated_lora_layer import ConcatenatedLoRALayer -from invokeai.backend.patches.layers.lora_layer import LoRALayer +from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range +from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from invokeai.backend.patches.model_patch_raw import ModelPatchRaw @@ -33,13 +33,21 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te def lora_model_from_flux_diffusers_state_dict( state_dict: Dict[str, torch.Tensor], alpha: float | None ) -> ModelPatchRaw: - """Loads a state dict in the Diffusers FLUX LoRA format into a LoRAModelRaw object. + # Group keys by layer. + grouped_state_dict: dict[str, dict[str, torch.Tensor]] = _group_by_layer(state_dict) + layers = lora_layers_from_flux_diffusers_grouped_state_dict(grouped_state_dict, alpha) + return ModelPatchRaw(layers=layers) + + +def lora_layers_from_flux_diffusers_grouped_state_dict( + grouped_state_dict: Dict[str, Dict[str, torch.Tensor]], alpha: float | None +) -> dict[str, BaseLayerPatch]: + """Converts a grouped state dict with Diffusers FLUX LoRA keys to LoRA layers with BFL keys (i.e. the module key + format used by Invoke). This function is based on: https://github.com/huggingface/diffusers/blob/55ac421f7bb12fd00ccbef727be4dc2f3f920abb/scripts/convert_flux_to_diffusers.py """ - # Group keys by layer. - grouped_state_dict: dict[str, dict[str, torch.Tensor]] = _group_by_layer(state_dict) # Remove the "transformer." prefix from all keys. grouped_state_dict = {k.replace("transformer.", ""): v for k, v in grouped_state_dict.items()} @@ -53,17 +61,26 @@ def lora_model_from_flux_diffusers_state_dict( layers: dict[str, BaseLayerPatch] = {} - def add_lora_layer_if_present(src_key: str, dst_key: str) -> None: - if src_key in grouped_state_dict: - src_layer_dict = grouped_state_dict.pop(src_key) - value = { + def get_lora_layer_values(src_layer_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + if "lora_A.weight" in src_layer_dict: + # The LoRA keys are in PEFT format. + values = { "lora_down.weight": src_layer_dict.pop("lora_A.weight"), "lora_up.weight": src_layer_dict.pop("lora_B.weight"), } if alpha is not None: - value["alpha"] = torch.tensor(alpha) - layers[dst_key] = LoRALayer.from_state_dict_values(values=value) + values["alpha"] = torch.tensor(alpha) assert len(src_layer_dict) == 0 + return values + else: + # Assume that the LoRA keys are in Kohya format. + return src_layer_dict + + def add_lora_layer_if_present(src_key: str, dst_key: str) -> None: + if src_key in grouped_state_dict: + src_layer_dict = grouped_state_dict.pop(src_key) + values = get_lora_layer_values(src_layer_dict) + layers[dst_key] = any_lora_layer_from_state_dict(values) def add_qkv_lora_layer_if_present( src_keys: list[str], @@ -79,29 +96,24 @@ def add_qkv_lora_layer_if_present( if not any(keys_present): return - sub_layers: list[LoRALayer] = [] + dim_0_offset = 0 + sub_layers: list[BaseLayerPatch] = [] + sub_layer_ranges: list[Range] = [] for src_key, src_weight_shape in zip(src_keys, src_weight_shapes, strict=True): src_layer_dict = grouped_state_dict.pop(src_key, None) if src_layer_dict is not None: - values = { - "lora_down.weight": src_layer_dict.pop("lora_A.weight"), - "lora_up.weight": src_layer_dict.pop("lora_B.weight"), - } - if alpha is not None: - values["alpha"] = torch.tensor(alpha) - assert values["lora_down.weight"].shape[1] == src_weight_shape[1] - assert values["lora_up.weight"].shape[0] == src_weight_shape[0] - sub_layers.append(LoRALayer.from_state_dict_values(values=values)) - assert len(src_layer_dict) == 0 + values = get_lora_layer_values(src_layer_dict) + # assert values["lora_down.weight"].shape[1] == src_weight_shape[1] + # assert values["lora_up.weight"].shape[0] == src_weight_shape[0] + sub_layers.append(any_lora_layer_from_state_dict(values)) + sub_layer_ranges.append(Range(dim_0_offset, dim_0_offset + src_weight_shape[0])) else: if not allow_missing_keys: raise ValueError(f"Missing LoRA layer: '{src_key}'.") - values = { - "lora_up.weight": torch.zeros((src_weight_shape[0], 1)), - "lora_down.weight": torch.zeros((1, src_weight_shape[1])), - } - sub_layers.append(LoRALayer.from_state_dict_values(values=values)) - layers[dst_qkv_key] = ConcatenatedLoRALayer(lora_layers=sub_layers) + + dim_0_offset += src_weight_shape[0] + + layers[dst_qkv_key] = MergedLayerPatch(sub_layers, sub_layer_ranges) # time_text_embed.timestep_embedder -> time_in. add_lora_layer_if_present("time_text_embed.timestep_embedder.linear_1", "time_in.in_layer") @@ -217,7 +229,7 @@ def add_qkv_lora_layer_if_present( layers_with_prefix = {f"{FLUX_LORA_TRANSFORMER_PREFIX}{k}": v for k, v in layers.items()} - return ModelPatchRaw(layers=layers_with_prefix) + return layers_with_prefix def _group_by_layer(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]: diff --git a/invokeai/backend/patches/lora_conversions/flux_kohya_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_kohya_lora_conversion_utils.py index 6ff0d2fa3cc..41e41dbb517 100644 --- a/invokeai/backend/patches/lora_conversions/flux_kohya_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_kohya_lora_conversion_utils.py @@ -7,6 +7,7 @@ from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict from invokeai.backend.patches.lora_conversions.flux_lora_constants import ( FLUX_LORA_CLIP_PREFIX, + FLUX_LORA_T5_PREFIX, FLUX_LORA_TRANSFORMER_PREFIX, ) from invokeai.backend.patches.model_patch_raw import ModelPatchRaw @@ -26,6 +27,14 @@ # lora_te1_text_model_encoder_layers_0_mlp_fc1.lora_up.weight FLUX_KOHYA_CLIP_KEY_REGEX = r"lora_te1_text_model_encoder_layers_(\d+)_(mlp|self_attn)_(\w+)\.?.*" +# A regex pattern that matches all of the T5 keys in the Kohya FLUX LoRA format. +# Example keys: +# lora_te2_encoder_block_0_layer_0_SelfAttention_k.alpha +# lora_te2_encoder_block_0_layer_0_SelfAttention_k.dora_scale +# lora_te2_encoder_block_0_layer_0_SelfAttention_k.lora_down.weight +# lora_te2_encoder_block_0_layer_0_SelfAttention_k.lora_up.weight +FLUX_KOHYA_T5_KEY_REGEX = r"lora_te2_encoder_block_(\d+)_layer_(\d+)_(DenseReluDense|SelfAttention)_(\w+)_?(\w+)?\.?.*" + def is_state_dict_likely_in_flux_kohya_format(state_dict: Dict[str, Any]) -> bool: """Checks if the provided state dict is likely in the Kohya FLUX LoRA format. @@ -34,7 +43,9 @@ def is_state_dict_likely_in_flux_kohya_format(state_dict: Dict[str, Any]) -> boo perfect-precision detector would require checking all keys against a whitelist and verifying tensor shapes.) """ return all( - re.match(FLUX_KOHYA_TRANSFORMER_KEY_REGEX, k) or re.match(FLUX_KOHYA_CLIP_KEY_REGEX, k) + re.match(FLUX_KOHYA_TRANSFORMER_KEY_REGEX, k) + or re.match(FLUX_KOHYA_CLIP_KEY_REGEX, k) + or re.match(FLUX_KOHYA_T5_KEY_REGEX, k) for k in state_dict.keys() ) @@ -48,27 +59,34 @@ def lora_model_from_flux_kohya_state_dict(state_dict: Dict[str, torch.Tensor]) - grouped_state_dict[layer_name] = {} grouped_state_dict[layer_name][param_name] = value - # Split the grouped state dict into transformer and CLIP state dicts. + # Split the grouped state dict into transformer, CLIP, and T5 state dicts. transformer_grouped_sd: dict[str, dict[str, torch.Tensor]] = {} clip_grouped_sd: dict[str, dict[str, torch.Tensor]] = {} + t5_grouped_sd: dict[str, dict[str, torch.Tensor]] = {} for layer_name, layer_state_dict in grouped_state_dict.items(): if layer_name.startswith("lora_unet"): transformer_grouped_sd[layer_name] = layer_state_dict elif layer_name.startswith("lora_te1"): clip_grouped_sd[layer_name] = layer_state_dict + elif layer_name.startswith("lora_te2"): + t5_grouped_sd[layer_name] = layer_state_dict else: raise ValueError(f"Layer '{layer_name}' does not match the expected pattern for FLUX LoRA weights.") # Convert the state dicts to the InvokeAI format. transformer_grouped_sd = _convert_flux_transformer_kohya_state_dict_to_invoke_format(transformer_grouped_sd) clip_grouped_sd = _convert_flux_clip_kohya_state_dict_to_invoke_format(clip_grouped_sd) + t5_grouped_sd = _convert_flux_t5_kohya_state_dict_to_invoke_format(t5_grouped_sd) # Create LoRA layers. layers: dict[str, BaseLayerPatch] = {} - for layer_key, layer_state_dict in transformer_grouped_sd.items(): - layers[FLUX_LORA_TRANSFORMER_PREFIX + layer_key] = any_lora_layer_from_state_dict(layer_state_dict) - for layer_key, layer_state_dict in clip_grouped_sd.items(): - layers[FLUX_LORA_CLIP_PREFIX + layer_key] = any_lora_layer_from_state_dict(layer_state_dict) + for model_prefix, grouped_sd in [ + (FLUX_LORA_TRANSFORMER_PREFIX, transformer_grouped_sd), + (FLUX_LORA_CLIP_PREFIX, clip_grouped_sd), + (FLUX_LORA_T5_PREFIX, t5_grouped_sd), + ]: + for layer_key, layer_state_dict in grouped_sd.items(): + layers[model_prefix + layer_key] = any_lora_layer_from_state_dict(layer_state_dict) # Create and return the LoRAModelRaw. return ModelPatchRaw(layers=layers) @@ -123,3 +141,31 @@ def replace_func(match: re.Match[str]) -> str: raise ValueError(f"Key '{k}' does not match the expected pattern for FLUX LoRA weights.") return converted_dict + + +def _convert_flux_t5_kohya_state_dict_to_invoke_format(state_dict: Dict[str, T]) -> Dict[str, T]: + """Converts a T5 LoRA state dict from the Kohya FLUX LoRA format to LoRA weight format used internally by + InvokeAI. + + Example key conversions: + + "lora_te2_encoder_block_0_layer_0_SelfAttention_k" -> "encoder.block.0.layer.0.SelfAttention.k" + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_0" -> "encoder.block.0.layer.1.DenseReluDense.wi.0" + """ + + def replace_func(match: re.Match[str]) -> str: + s = f"encoder.block.{match.group(1)}.layer.{match.group(2)}.{match.group(3)}.{match.group(4)}" + if match.group(5): + s += f".{match.group(5)}" + return s + + converted_dict: dict[str, T] = {} + for k, v in state_dict.items(): + match = re.match(FLUX_KOHYA_T5_KEY_REGEX, k) + if match: + new_key = re.sub(FLUX_KOHYA_T5_KEY_REGEX, replace_func, k) + converted_dict[new_key] = v + else: + raise ValueError(f"Key '{k}' does not match the expected pattern for FLUX LoRA weights.") + + return converted_dict diff --git a/invokeai/backend/patches/lora_conversions/flux_lora_constants.py b/invokeai/backend/patches/lora_conversions/flux_lora_constants.py index 4f854d14421..28575144627 100644 --- a/invokeai/backend/patches/lora_conversions/flux_lora_constants.py +++ b/invokeai/backend/patches/lora_conversions/flux_lora_constants.py @@ -1,3 +1,4 @@ # Prefixes used to distinguish between transformer and CLIP text encoder keys in the FLUX InvokeAI LoRA format. FLUX_LORA_TRANSFORMER_PREFIX = "lora_transformer-" FLUX_LORA_CLIP_PREFIX = "lora_clip-" +FLUX_LORA_T5_PREFIX = "lora_t5-" diff --git a/invokeai/backend/patches/lora_conversions/flux_onetrainer_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_onetrainer_lora_conversion_utils.py new file mode 100644 index 00000000000..0413f0ef49f --- /dev/null +++ b/invokeai/backend/patches/lora_conversions/flux_onetrainer_lora_conversion_utils.py @@ -0,0 +1,163 @@ +import re +from typing import Any, Dict + +import torch + +from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict +from invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils import ( + lora_layers_from_flux_diffusers_grouped_state_dict, +) +from invokeai.backend.patches.lora_conversions.flux_kohya_lora_conversion_utils import ( + FLUX_KOHYA_CLIP_KEY_REGEX, + FLUX_KOHYA_T5_KEY_REGEX, + _convert_flux_clip_kohya_state_dict_to_invoke_format, + _convert_flux_t5_kohya_state_dict_to_invoke_format, +) +from invokeai.backend.patches.lora_conversions.flux_lora_constants import ( + FLUX_LORA_CLIP_PREFIX, + FLUX_LORA_T5_PREFIX, +) +from invokeai.backend.patches.lora_conversions.kohya_key_utils import ( + INDEX_PLACEHOLDER, + ParsingTree, + insert_periods_into_kohya_key, +) +from invokeai.backend.patches.model_patch_raw import ModelPatchRaw + +# A regex pattern that matches all of the transformer keys in the OneTrainer FLUX LoRA format. +# The OneTrainer format uses a mix of the Kohya and Diffusers formats: +# - The base model keys are in Diffusers format. +# - Periods are replaced with underscores, to match Kohya. +# - The LoRA key suffixes (e.g. .alpha, .lora_down.weight, .lora_up.weight) match Kohya. +# Example keys: +# - "lora_transformer_single_transformer_blocks_0_attn_to_k.alpha" +# - "lora_transformer_single_transformer_blocks_0_attn_to_k.dora_scale" +# - "lora_transformer_single_transformer_blocks_0_attn_to_k.lora_down.weight" +# - "lora_transformer_single_transformer_blocks_0_attn_to_k.lora_up.weight" +FLUX_ONETRAINER_TRANSFORMER_KEY_REGEX = ( + r"lora_transformer_(single_transformer_blocks|transformer_blocks)_(\d+)_(\w+)\.(.*)" +) + + +def is_state_dict_likely_in_flux_onetrainer_format(state_dict: Dict[str, Any]) -> bool: + """Checks if the provided state dict is likely in the OneTrainer FLUX LoRA format. + + This is intended to be a high-precision detector, but it is not guaranteed to have perfect precision. (A + perfect-precision detector would require checking all keys against a whitelist and verifying tensor shapes.) + + Note that OneTrainer matches the Kohya format for the CLIP and T5 models. + """ + return all( + re.match(FLUX_ONETRAINER_TRANSFORMER_KEY_REGEX, k) + or re.match(FLUX_KOHYA_CLIP_KEY_REGEX, k) + or re.match(FLUX_KOHYA_T5_KEY_REGEX, k) + for k in state_dict.keys() + ) + + +def lora_model_from_flux_onetrainer_state_dict(state_dict: Dict[str, torch.Tensor]) -> ModelPatchRaw: # type: ignore + # Group keys by layer. + grouped_state_dict: dict[str, dict[str, torch.Tensor]] = {} + for key, value in state_dict.items(): + layer_name, param_name = key.split(".", 1) + if layer_name not in grouped_state_dict: + grouped_state_dict[layer_name] = {} + grouped_state_dict[layer_name][param_name] = value + + # Split the grouped state dict into transformer, CLIP, and T5 state dicts. + transformer_grouped_sd: dict[str, dict[str, torch.Tensor]] = {} + clip_grouped_sd: dict[str, dict[str, torch.Tensor]] = {} + t5_grouped_sd: dict[str, dict[str, torch.Tensor]] = {} + for layer_name, layer_state_dict in grouped_state_dict.items(): + if layer_name.startswith("lora_transformer"): + transformer_grouped_sd[layer_name] = layer_state_dict + elif layer_name.startswith("lora_te1"): + clip_grouped_sd[layer_name] = layer_state_dict + elif layer_name.startswith("lora_te2"): + t5_grouped_sd[layer_name] = layer_state_dict + else: + raise ValueError(f"Layer '{layer_name}' does not match the expected pattern for FLUX LoRA weights.") + + # Convert the state dicts to the InvokeAI format. + clip_grouped_sd = _convert_flux_clip_kohya_state_dict_to_invoke_format(clip_grouped_sd) + t5_grouped_sd = _convert_flux_t5_kohya_state_dict_to_invoke_format(t5_grouped_sd) + + # Create LoRA layers. + layers: dict[str, BaseLayerPatch] = {} + for model_prefix, grouped_sd in [ + # (FLUX_LORA_TRANSFORMER_PREFIX, transformer_grouped_sd), + (FLUX_LORA_CLIP_PREFIX, clip_grouped_sd), + (FLUX_LORA_T5_PREFIX, t5_grouped_sd), + ]: + for layer_key, layer_state_dict in grouped_sd.items(): + layers[model_prefix + layer_key] = any_lora_layer_from_state_dict(layer_state_dict) + + # Handle the transformer. + transformer_layers = _convert_flux_transformer_onetrainer_state_dict_to_invoke_format(transformer_grouped_sd) + layers.update(transformer_layers) + + # Create and return the LoRAModelRaw. + return ModelPatchRaw(layers=layers) + + +# This parsing tree was generated by calling `generate_kohya_parsing_tree_from_keys()` on the keys in +# flux_lora_diffusers_format.py. +flux_transformer_kohya_parsing_tree: ParsingTree = { + "transformer": { + "single_transformer_blocks": { + INDEX_PLACEHOLDER: { + "attn": {"to_k": {}, "to_q": {}, "to_v": {}}, + "norm": {"linear": {}}, + "proj_mlp": {}, + "proj_out": {}, + } + }, + "transformer_blocks": { + INDEX_PLACEHOLDER: { + "attn": { + "add_k_proj": {}, + "add_q_proj": {}, + "add_v_proj": {}, + "to_add_out": {}, + "to_k": {}, + "to_out": {INDEX_PLACEHOLDER: {}}, + "to_q": {}, + "to_v": {}, + }, + "ff": {"net": {INDEX_PLACEHOLDER: {"proj": {}}}}, + "ff_context": {"net": {INDEX_PLACEHOLDER: {"proj": {}}}}, + "norm1": {"linear": {}}, + "norm1_context": {"linear": {}}, + } + }, + } +} + + +def _convert_flux_transformer_onetrainer_state_dict_to_invoke_format( + state_dict: Dict[str, Dict[str, torch.Tensor]], +) -> dict[str, BaseLayerPatch]: + """Converts a FLUX transformer LoRA state dict from the OneTrainer FLUX LoRA format to the LoRA weight format used + internally by InvokeAI. + """ + + # Step 1: Convert the Kohya-style keys with underscores to classic keys with periods. + # Example: + # "lora_transformer_single_transformer_blocks_0_attn_to_k.lora_down.weight" -> "transformer.single_transformer_blocks.0.attn.to_k.lora_down.weight" + lora_prefix = "lora_" + lora_prefix_length = len(lora_prefix) + kohya_state_dict: dict[str, Dict[str, torch.Tensor]] = {} + for key in state_dict.keys(): + # Remove the "lora_" prefix. + assert key.startswith(lora_prefix) + new_key = key[lora_prefix_length:] + + # Add periods to the Kohya-style module keys. + new_key = insert_periods_into_kohya_key(new_key, flux_transformer_kohya_parsing_tree) + + # Replace the old key with the new key. + kohya_state_dict[new_key] = state_dict[key] + + # Step 2: Convert diffusers module names to the BFL module names. + return lora_layers_from_flux_diffusers_grouped_state_dict(kohya_state_dict, alpha=None) diff --git a/invokeai/backend/patches/lora_conversions/kohya_key_utils.py b/invokeai/backend/patches/lora_conversions/kohya_key_utils.py new file mode 100644 index 00000000000..42e4c9854fa --- /dev/null +++ b/invokeai/backend/patches/lora_conversions/kohya_key_utils.py @@ -0,0 +1,102 @@ +from typing import Iterable + +INDEX_PLACEHOLDER = "index_placeholder" + + +# Type alias for a 'ParsingTree', which is a recursive dict with string keys. +ParsingTree = dict[str, "ParsingTree"] + + +def insert_periods_into_kohya_key(key: str, parsing_tree: ParsingTree) -> str: + """Insert periods into a Kohya key based on a parsing tree. + + Kohya format keys are produced by replacing periods with underscores in the original key. + + Example: + ``` + key = "module_a_module_b_0_attn_to_k" + parsing_tree = { + "module_a": { + "module_b": { + INDEX_PLACEHOLDER: { + "attn": {}, + }, + }, + }, + } + result = insert_periods_into_kohya_key(key, parsing_tree) + > "module_a.module_b.0.attn.to_k" + ``` + """ + # Split key into parts by underscore. + parts = key.split("_") + + # Build up result by walking through parsing tree and parts. + result_parts: list[str] = [] + current_part = "" + current_tree = parsing_tree + + for part in parts: + if len(current_part) > 0: + current_part = current_part + "_" + current_part += part + + if current_part in current_tree: + # Match found. + current_tree = current_tree[current_part] + result_parts.append(current_part) + current_part = "" + elif current_part.isnumeric() and INDEX_PLACEHOLDER in current_tree: + # Match found with index placeholder. + current_tree = current_tree[INDEX_PLACEHOLDER] + result_parts.append(current_part) + current_part = "" + + if len(current_part) > 0: + raise ValueError(f"Key {key} does not match parsing tree {parsing_tree}.") + + return ".".join(result_parts) + + +def generate_kohya_parsing_tree_from_keys(keys: Iterable[str]) -> ParsingTree: + """Generate a parsing tree from a list of keys. + + Example: + ``` + keys = [ + "module_a.module_b.0.attn.to_k", + "module_a.module_b.1.attn.to_k", + "module_a.module_c.proj", + ] + + tree = generate_kohya_parsing_tree_from_keys(keys) + > { + > "module_a": { + > "module_b": { + > INDEX_PLACEHOLDER: { + > "attn": { + > "to_k": {}, + > "to_q": {}, + > }, + > } + > }, + > "module_c": { + > "proj": {}, + > } + > } + > } + ``` + """ + tree: ParsingTree = {} + for key in keys: + subtree: ParsingTree = tree + for module_name in key.split("."): + key = module_name + if module_name.isnumeric(): + key = INDEX_PLACEHOLDER + + if key not in subtree: + subtree[key] = {} + + subtree = subtree[key] + return tree diff --git a/invokeai/backend/quantization/gguf/ggml_tensor.py b/invokeai/backend/quantization/gguf/ggml_tensor.py index 62be2bdb637..d48948dcfa9 100644 --- a/invokeai/backend/quantization/gguf/ggml_tensor.py +++ b/invokeai/backend/quantization/gguf/ggml_tensor.py @@ -54,7 +54,9 @@ def apply_to_quantized_tensor(func, args, kwargs): torch.ops.aten.addmm.default: dequantize_and_run, # pyright: ignore torch.ops.aten.mul.Tensor: dequantize_and_run, # pyright: ignore torch.ops.aten.add.Tensor: dequantize_and_run, # pyright: ignore + torch.ops.aten.sub.Tensor: dequantize_and_run, # pyright: ignore torch.ops.aten.allclose.default: dequantize_and_run, # pyright: ignore + torch.ops.aten.slice.Tensor: dequantize_and_run, # pyright: ignore } if torch.backends.mps.is_available(): diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addFLUXLoRAs.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addFLUXLoRAs.ts index a57e655c739..a24adc07b7e 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addFLUXLoRAs.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addFLUXLoRAs.ts @@ -35,11 +35,13 @@ export const addFLUXLoRAs = ( // Use model loader as transformer input g.addEdge(modelLoader, 'transformer', loraCollectionLoader, 'transformer'); g.addEdge(modelLoader, 'clip', loraCollectionLoader, 'clip'); + g.addEdge(modelLoader, 't5_encoder', loraCollectionLoader, 't5_encoder'); // Reroute model connections through the LoRA collection loader g.deleteEdgesTo(denoise, ['transformer']); - g.deleteEdgesTo(fluxTextEncoder, ['clip']); + g.deleteEdgesTo(fluxTextEncoder, ['clip', 't5_encoder']); g.addEdge(loraCollectionLoader, 'transformer', denoise, 'transformer'); g.addEdge(loraCollectionLoader, 'clip', fluxTextEncoder, 'clip'); + g.addEdge(loraCollectionLoader, 't5_encoder', fluxTextEncoder, 't5_encoder'); for (const lora of enabledLoRAs) { const { weight } = lora; diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 9796483a4b7..9ff86645554 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -6194,6 +6194,12 @@ export type components = { * @default null */ clip?: components["schemas"]["CLIPField"] | null; + /** + * T5 Encoder + * @description T5 tokenizer and text encoder + * @default null + */ + t5_encoder?: components["schemas"]["T5EncoderField"] | null; /** * type * @default flux_lora_collection_loader @@ -7336,6 +7342,12 @@ export type components = { * @default null */ clip?: components["schemas"]["CLIPField"] | null; + /** + * T5 Encoder + * @description T5 tokenizer and text encoder + * @default null + */ + t5_encoder?: components["schemas"]["T5EncoderField"] | null; /** * type * @default flux_lora_loader @@ -7361,6 +7373,12 @@ export type components = { * @default null */ clip: components["schemas"]["CLIPField"] | null; + /** + * T5 Encoder + * @description T5 tokenizer and text encoder + * @default null + */ + t5_encoder: components["schemas"]["T5EncoderField"] | null; /** * type * @default flux_lora_loader_output @@ -18345,6 +18363,11 @@ export type components = { tokenizer: components["schemas"]["ModelIdentifierField"]; /** @description Info to load text_encoder submodel */ text_encoder: components["schemas"]["ModelIdentifierField"]; + /** + * Loras + * @description LoRAs to apply on model loading + */ + loras: components["schemas"]["LoRAField"][]; }; /** TBLR */ TBLR: { diff --git a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py index 789a83f9f01..c1e77c333bb 100644 --- a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py +++ b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py @@ -13,10 +13,10 @@ ) from invokeai.backend.patches.layer_patcher import LayerPatcher from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch -from invokeai.backend.patches.layers.concatenated_lora_layer import ConcatenatedLoRALayer from invokeai.backend.patches.layers.flux_control_lora_layer import FluxControlLoRALayer from invokeai.backend.patches.layers.lokr_layer import LoKRLayer from invokeai.backend.patches.layers.lora_layer import LoRALayer +from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range from invokeai.backend.util.original_weights_storage import OriginalWeightsStorage from tests.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.test_custom_invoke_linear_8_bit_lt import ( build_linear_8bit_lt_layer, @@ -328,17 +328,21 @@ def patch_under_test(request: pytest.FixtureRequest) -> PatchUnderTest: elif layer_type == "concatenated_lora": sub_layer_out_features = [16, 16, 32] - # Create a ConcatenatedLoRA layer. + # Create a MergedLayerPatch. sub_layers: list[LoRALayer] = [] + sub_layer_ranges: list[Range] = [] + dim_0_offset = 0 for out_features in sub_layer_out_features: down = torch.randn(rank, in_features) up = torch.randn(out_features, rank) bias = torch.randn(out_features) sub_layers.append(LoRALayer(up=up, mid=None, down=down, alpha=1.0, bias=bias)) - concatenated_lora_layer = ConcatenatedLoRALayer(sub_layers, concat_axis=0) + sub_layer_ranges.append(Range(dim_0_offset, dim_0_offset + out_features)) + dim_0_offset += out_features + merged_layer_patch = MergedLayerPatch(sub_layers, sub_layer_ranges) input = torch.randn(1, in_features) - return ([(concatenated_lora_layer, 0.7)], input) + return ([(merged_layer_patch, 0.7)], input) elif layer_type == "flux_control_lora": # Create a FluxControlLoRALayer. patched_in_features = 40 diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_dora_onetrainer_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_dora_onetrainer_format.py new file mode 100644 index 00000000000..10157636de8 --- /dev/null +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_dora_onetrainer_format.py @@ -0,0 +1,2029 @@ +# A sample state dict in the OneTrainer FLUX DoRA format. +# This state dict is based on the ball_flux.safetensors file from here: +# https://github.com/invoke-ai/InvokeAI/issues/6912 +state_dict_keys = { + "lora_te1_text_model_encoder_layers_0_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_0_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_0_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_0_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_0_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_0_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_0_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_0_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_0_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_0_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_0_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_0_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_0_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_0_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_0_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_0_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_0_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_10_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_10_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_10_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_10_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_10_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_10_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_10_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_10_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_10_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_10_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_10_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_10_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_10_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_10_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_10_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_10_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_10_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_11_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_11_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_11_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_11_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_11_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_11_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_11_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_11_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_11_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_11_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_11_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_11_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_11_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_11_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_11_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_11_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_11_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_1_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_1_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_1_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_1_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_1_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_1_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_1_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_1_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_1_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_1_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_1_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_1_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_1_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_1_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_1_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_1_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_1_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_2_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_2_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_2_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_2_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_2_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_2_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_2_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_2_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_2_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_2_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_2_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_2_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_2_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_2_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_2_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_2_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_2_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_3_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_3_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_3_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_3_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_3_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_3_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_3_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_3_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_3_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_3_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_3_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_3_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_3_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_3_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_3_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_3_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_3_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_4_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_4_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_4_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_4_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_4_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_4_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_4_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_4_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_4_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_4_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_4_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_4_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_4_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_4_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_4_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_4_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_4_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_5_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_5_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_5_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_5_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_5_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_5_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_5_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_5_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_5_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_5_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_5_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_5_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_5_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_5_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_5_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_5_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_5_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_6_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_6_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_6_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_6_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_6_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_6_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_6_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_6_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_6_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_6_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_6_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_6_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_6_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_6_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_6_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_6_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_6_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_7_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_7_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_7_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_7_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_7_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_7_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_7_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_7_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_7_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_7_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_7_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_7_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_7_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_7_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_7_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_7_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_7_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_8_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_8_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_8_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_8_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_8_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_8_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_8_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_8_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_8_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_8_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_8_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_8_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_8_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_8_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_8_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_8_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_8_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_9_mlp_fc1.alpha": [], + "lora_te1_text_model_encoder_layers_9_mlp_fc1.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_9_mlp_fc1.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_9_mlp_fc1.lora_up.weight": [3072, 4], + "lora_te1_text_model_encoder_layers_9_mlp_fc2.alpha": [], + "lora_te1_text_model_encoder_layers_9_mlp_fc2.dora_scale": [1, 3072], + "lora_te1_text_model_encoder_layers_9_mlp_fc2.lora_down.weight": [4, 3072], + "lora_te1_text_model_encoder_layers_9_mlp_fc2.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_9_self_attn_k_proj.alpha": [], + "lora_te1_text_model_encoder_layers_9_self_attn_k_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_k_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_k_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_9_self_attn_out_proj.alpha": [], + "lora_te1_text_model_encoder_layers_9_self_attn_out_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_out_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_out_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_9_self_attn_q_proj.alpha": [], + "lora_te1_text_model_encoder_layers_9_self_attn_q_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_q_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_q_proj.lora_up.weight": [768, 4], + "lora_te1_text_model_encoder_layers_9_self_attn_v_proj.alpha": [], + "lora_te1_text_model_encoder_layers_9_self_attn_v_proj.dora_scale": [1, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_v_proj.lora_down.weight": [4, 768], + "lora_te1_text_model_encoder_layers_9_self_attn_v_proj.lora_up.weight": [768, 4], + "lora_te2_encoder_block_0_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_0_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_0_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_0_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_0_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_0_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_0_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_0_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_0_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_0_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_10_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_10_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_10_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_10_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_10_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_10_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_10_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_10_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_10_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_10_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_11_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_11_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_11_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_11_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_11_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_11_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_11_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_11_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_11_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_11_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_12_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_12_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_12_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_12_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_12_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_12_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_12_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_12_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_12_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_12_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_13_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_13_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_13_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_13_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_13_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_13_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_13_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_13_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_13_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_13_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_14_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_14_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_14_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_14_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_14_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_14_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_14_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_14_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_14_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_14_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_15_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_15_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_15_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_15_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_15_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_15_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_15_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_15_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_15_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_15_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_16_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_16_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_16_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_16_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_16_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_16_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_16_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_16_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_16_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_16_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_17_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_17_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_17_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_17_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_17_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_17_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_17_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_17_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_17_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_17_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_18_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_18_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_18_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_18_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_18_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_18_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_18_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_18_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_18_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_18_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_19_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_19_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_19_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_19_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_19_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_19_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_19_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_19_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_19_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_19_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_1_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_1_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_1_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_1_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_1_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_1_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_1_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_1_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_1_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_1_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_20_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_20_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_20_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_20_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_20_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_20_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_20_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_20_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_20_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_20_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_21_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_21_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_21_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_21_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_21_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_21_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_21_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_21_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_21_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_21_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_22_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_22_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_22_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_22_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_22_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_22_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_22_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_22_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_22_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_22_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_23_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_23_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_23_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_23_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_23_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_23_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_23_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_23_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_23_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_23_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_2_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_2_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_2_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_2_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_2_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_2_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_2_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_2_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_2_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_2_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_3_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_3_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_3_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_3_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_3_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_3_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_3_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_3_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_3_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_3_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_4_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_4_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_4_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_4_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_4_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_4_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_4_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_4_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_4_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_4_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_5_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_5_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_5_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_5_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_5_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_5_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_5_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_5_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_5_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_5_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_6_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_6_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_6_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_6_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_6_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_6_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_6_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_6_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_6_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_6_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_7_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_7_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_7_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_7_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_7_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_7_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_7_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_7_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_7_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_7_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_8_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_8_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_8_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_8_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_8_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_8_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_8_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_8_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_8_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_8_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_9_layer_0_SelfAttention_k.alpha": [], + "lora_te2_encoder_block_9_layer_0_SelfAttention_k.dora_scale": [1, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_k.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_k.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_9_layer_0_SelfAttention_o.alpha": [], + "lora_te2_encoder_block_9_layer_0_SelfAttention_o.dora_scale": [1, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_o.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_o.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_9_layer_0_SelfAttention_q.alpha": [], + "lora_te2_encoder_block_9_layer_0_SelfAttention_q.dora_scale": [1, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_q.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_q.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_9_layer_0_SelfAttention_v.alpha": [], + "lora_te2_encoder_block_9_layer_0_SelfAttention_v.dora_scale": [1, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_v.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_9_layer_0_SelfAttention_v.lora_up.weight": [4096, 4], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_0.alpha": [], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_0.dora_scale": [1, 4096], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_0.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_0.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_1.alpha": [], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_1.dora_scale": [1, 4096], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_1.lora_down.weight": [4, 4096], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wi_1.lora_up.weight": [10240, 4], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wo.alpha": [], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wo.dora_scale": [1, 10240], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wo.lora_down.weight": [4, 10240], + "lora_te2_encoder_block_9_layer_1_DenseReluDense_wo.lora_up.weight": [4096, 4], + "lora_transformer_single_transformer_blocks_0_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_0_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_0_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_0_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_0_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_0_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_0_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_0_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_0_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_0_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_0_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_0_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_10_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_10_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_10_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_10_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_10_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_10_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_10_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_10_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_10_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_10_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_10_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_10_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_11_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_11_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_11_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_11_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_11_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_11_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_11_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_11_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_11_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_11_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_11_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_11_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_12_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_12_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_12_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_12_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_12_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_12_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_12_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_12_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_12_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_12_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_12_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_12_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_13_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_13_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_13_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_13_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_13_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_13_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_13_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_13_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_13_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_13_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_13_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_13_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_14_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_14_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_14_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_14_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_14_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_14_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_14_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_14_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_14_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_14_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_14_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_14_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_15_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_15_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_15_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_15_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_15_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_15_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_15_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_15_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_15_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_15_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_15_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_15_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_16_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_16_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_16_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_16_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_16_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_16_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_16_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_16_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_16_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_16_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_16_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_16_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_17_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_17_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_17_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_17_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_17_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_17_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_17_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_17_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_17_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_17_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_17_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_17_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_18_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_18_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_18_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_18_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_18_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_18_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_18_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_18_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_18_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_18_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_18_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_18_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_19_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_19_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_19_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_19_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_19_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_19_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_19_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_19_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_19_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_19_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_19_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_19_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_1_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_1_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_1_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_1_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_1_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_1_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_1_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_1_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_1_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_1_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_1_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_1_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_20_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_20_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_20_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_20_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_20_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_20_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_20_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_20_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_20_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_20_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_20_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_20_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_21_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_21_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_21_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_21_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_21_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_21_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_21_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_21_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_21_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_21_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_21_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_21_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_22_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_22_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_22_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_22_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_22_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_22_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_22_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_22_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_22_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_22_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_22_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_22_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_23_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_23_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_23_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_23_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_23_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_23_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_23_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_23_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_23_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_23_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_23_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_23_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_24_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_24_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_24_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_24_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_24_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_24_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_24_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_24_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_24_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_24_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_24_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_24_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_25_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_25_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_25_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_25_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_25_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_25_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_25_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_25_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_25_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_25_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_25_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_25_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_26_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_26_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_26_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_26_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_26_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_26_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_26_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_26_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_26_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_26_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_26_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_26_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_27_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_27_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_27_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_27_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_27_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_27_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_27_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_27_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_27_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_27_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_27_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_27_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_28_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_28_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_28_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_28_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_28_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_28_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_28_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_28_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_28_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_28_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_28_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_28_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_29_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_29_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_29_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_29_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_29_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_29_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_29_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_29_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_29_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_29_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_29_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_29_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_2_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_2_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_2_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_2_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_2_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_2_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_2_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_2_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_2_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_2_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_2_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_2_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_30_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_30_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_30_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_30_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_30_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_30_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_30_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_30_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_30_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_30_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_30_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_30_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_31_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_31_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_31_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_31_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_31_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_31_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_31_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_31_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_31_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_31_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_31_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_31_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_32_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_32_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_32_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_32_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_32_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_32_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_32_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_32_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_32_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_32_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_32_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_32_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_33_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_33_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_33_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_33_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_33_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_33_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_33_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_33_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_33_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_33_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_33_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_33_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_34_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_34_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_34_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_34_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_34_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_34_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_34_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_34_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_34_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_34_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_34_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_34_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_35_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_35_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_35_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_35_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_35_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_35_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_35_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_35_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_35_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_35_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_35_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_35_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_36_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_36_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_36_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_36_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_36_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_36_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_36_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_36_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_36_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_36_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_36_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_36_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_37_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_37_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_37_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_37_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_37_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_37_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_37_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_37_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_37_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_37_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_37_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_37_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_3_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_3_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_3_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_3_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_3_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_3_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_3_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_3_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_3_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_3_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_3_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_3_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_4_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_4_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_4_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_4_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_4_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_4_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_4_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_4_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_4_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_4_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_4_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_4_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_5_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_5_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_5_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_5_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_5_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_5_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_5_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_5_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_5_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_5_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_5_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_5_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_6_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_6_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_6_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_6_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_6_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_6_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_6_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_6_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_6_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_6_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_6_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_6_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_7_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_7_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_7_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_7_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_7_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_7_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_7_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_7_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_7_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_7_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_7_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_7_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_8_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_8_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_8_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_8_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_8_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_8_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_8_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_8_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_8_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_8_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_8_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_8_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_9_attn_to_k.alpha": [], + "lora_transformer_single_transformer_blocks_9_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_9_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_9_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_9_attn_to_q.alpha": [], + "lora_transformer_single_transformer_blocks_9_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_9_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_9_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_single_transformer_blocks_9_attn_to_v.alpha": [], + "lora_transformer_single_transformer_blocks_9_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_single_transformer_blocks_9_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_single_transformer_blocks_9_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_0_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_0_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_0_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_0_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_0_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_0_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_0_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_0_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_0_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_0_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_0_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_10_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_10_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_10_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_10_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_10_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_10_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_10_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_10_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_10_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_10_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_10_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_11_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_11_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_11_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_11_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_11_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_11_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_11_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_11_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_11_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_11_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_11_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_12_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_12_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_12_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_12_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_12_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_12_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_12_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_12_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_12_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_12_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_12_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_13_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_13_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_13_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_13_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_13_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_13_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_13_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_13_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_13_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_13_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_13_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_14_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_14_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_14_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_14_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_14_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_14_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_14_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_14_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_14_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_14_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_14_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_15_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_15_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_15_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_15_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_15_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_15_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_15_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_15_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_15_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_15_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_15_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_16_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_16_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_16_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_16_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_16_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_16_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_16_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_16_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_16_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_16_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_16_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_17_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_17_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_17_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_17_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_17_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_17_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_17_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_17_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_17_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_17_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_17_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_18_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_18_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_18_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_18_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_18_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_18_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_18_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_18_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_18_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_18_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_18_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_1_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_1_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_1_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_1_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_1_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_1_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_1_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_1_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_1_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_1_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_1_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_2_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_2_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_2_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_2_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_2_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_2_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_2_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_2_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_2_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_2_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_2_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_3_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_3_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_3_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_3_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_3_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_3_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_3_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_3_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_3_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_3_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_3_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_4_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_4_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_4_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_4_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_4_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_4_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_4_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_4_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_4_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_4_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_4_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_5_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_5_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_5_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_5_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_5_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_5_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_5_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_5_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_5_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_5_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_5_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_6_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_6_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_6_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_6_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_6_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_6_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_6_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_6_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_6_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_6_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_6_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_7_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_7_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_7_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_7_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_7_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_7_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_7_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_7_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_7_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_7_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_7_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_8_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_8_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_8_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_8_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_8_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_8_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_8_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_8_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_8_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_8_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_8_attn_to_v.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_add_k_proj.alpha": [], + "lora_transformer_transformer_blocks_9_attn_add_k_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_add_k_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_add_k_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_add_q_proj.alpha": [], + "lora_transformer_transformer_blocks_9_attn_add_q_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_add_q_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_add_q_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_add_v_proj.alpha": [], + "lora_transformer_transformer_blocks_9_attn_add_v_proj.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_add_v_proj.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_add_v_proj.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_to_add_out.alpha": [], + "lora_transformer_transformer_blocks_9_attn_to_add_out.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_to_add_out.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_to_add_out.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_to_k.alpha": [], + "lora_transformer_transformer_blocks_9_attn_to_k.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_to_k.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_to_k.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_to_out_0.alpha": [], + "lora_transformer_transformer_blocks_9_attn_to_out_0.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_to_out_0.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_to_out_0.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_to_q.alpha": [], + "lora_transformer_transformer_blocks_9_attn_to_q.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_to_q.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_to_q.lora_up.weight": [3072, 4], + "lora_transformer_transformer_blocks_9_attn_to_v.alpha": [], + "lora_transformer_transformer_blocks_9_attn_to_v.dora_scale": [1, 3072], + "lora_transformer_transformer_blocks_9_attn_to_v.lora_down.weight": [4, 3072], + "lora_transformer_transformer_blocks_9_attn_to_v.lora_up.weight": [3072, 4], +} diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py index b80577a3493..2bdb883faff 100644 --- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py @@ -6,6 +6,9 @@ lora_model_from_flux_diffusers_state_dict, ) from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import ( + state_dict_keys as flux_onetrainer_state_dict_keys, +) from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_format import ( state_dict_keys as flux_diffusers_state_dict_keys, ) @@ -27,12 +30,13 @@ def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, l assert is_state_dict_likely_in_flux_diffusers_format(state_dict) -def test_is_state_dict_likely_in_flux_diffusers_format_false(): +@pytest.mark.parametrize("sd_keys", [flux_kohya_state_dict_keys, flux_onetrainer_state_dict_keys]) +def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str, list[int]]): """Test that is_state_dict_likely_in_flux_diffusers_format() returns False for a state dict that is in the Kohya FLUX LoRA format. """ # Construct a state dict that is not in the Kohya FLUX LoRA format. - state_dict = keys_to_mock_state_dict(flux_kohya_state_dict_keys) + state_dict = keys_to_mock_state_dict(sd_keys) assert not is_state_dict_likely_in_flux_diffusers_format(state_dict) diff --git a/tests/backend/patches/lora_conversions/test_flux_kohya_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_kohya_lora_conversion_utils.py index 4c58c11586e..52b8ecc9c9c 100644 --- a/tests/backend/patches/lora_conversions/test_flux_kohya_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_kohya_lora_conversion_utils.py @@ -13,6 +13,9 @@ FLUX_LORA_CLIP_PREFIX, FLUX_LORA_TRANSFORMER_PREFIX, ) +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import ( + state_dict_keys as flux_onetrainer_state_dict_keys, +) from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_format import ( state_dict_keys as flux_diffusers_state_dict_keys, ) @@ -34,11 +37,12 @@ def test_is_state_dict_likely_in_flux_kohya_format_true(sd_keys: dict[str, list[ assert is_state_dict_likely_in_flux_kohya_format(state_dict) -def test_is_state_dict_likely_in_flux_kohya_format_false(): +@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_onetrainer_state_dict_keys]) +def test_is_state_dict_likely_in_flux_kohya_format_false(sd_keys: dict[str, list[int]]): """Test that is_state_dict_likely_in_flux_kohya_format() returns False for a state dict that is in the Diffusers FLUX LoRA format. """ - state_dict = keys_to_mock_state_dict(flux_diffusers_state_dict_keys) + state_dict = keys_to_mock_state_dict(sd_keys) assert not is_state_dict_likely_in_flux_kohya_format(state_dict) diff --git a/tests/backend/patches/lora_conversions/test_flux_onetrainer_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_onetrainer_lora_conversion_utils.py new file mode 100644 index 00000000000..cf8a27d5ada --- /dev/null +++ b/tests/backend/patches/lora_conversions/test_flux_onetrainer_lora_conversion_utils.py @@ -0,0 +1,77 @@ +import pytest + +from invokeai.backend.patches.lora_conversions.flux_lora_constants import ( + FLUX_LORA_CLIP_PREFIX, + FLUX_LORA_T5_PREFIX, + FLUX_LORA_TRANSFORMER_PREFIX, +) +from invokeai.backend.patches.lora_conversions.flux_onetrainer_lora_conversion_utils import ( + is_state_dict_likely_in_flux_onetrainer_format, + lora_model_from_flux_onetrainer_state_dict, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import ( + state_dict_keys as flux_onetrainer_state_dict_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_format import ( + state_dict_keys as flux_diffusers_state_dict_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_kohya_format import ( + state_dict_keys as flux_kohya_state_dict_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_kohya_with_te1_format import ( + state_dict_keys as flux_kohya_te1_state_dict_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict + + +def test_is_state_dict_likely_in_flux_onetrainer_format_true(): + """Test that is_state_dict_likely_in_flux_onetrainer_format() can identify a state dict in the OneTrainer + FLUX LoRA format. + """ + # Construct a state dict that is in the OneTrainer FLUX LoRA format. + state_dict = keys_to_mock_state_dict(flux_onetrainer_state_dict_keys) + + assert is_state_dict_likely_in_flux_onetrainer_format(state_dict) + + +@pytest.mark.parametrize( + "sd_keys", + [ + flux_kohya_state_dict_keys, + flux_kohya_te1_state_dict_keys, + flux_diffusers_state_dict_keys, + ], +) +def test_is_state_dict_likely_in_flux_onetrainer_format_false(sd_keys: dict[str, list[int]]): + """Test that is_state_dict_likely_in_flux_onetrainer_format() returns False for a state dict that is in the Diffusers + FLUX LoRA format. + """ + state_dict = keys_to_mock_state_dict(sd_keys) + assert not is_state_dict_likely_in_flux_onetrainer_format(state_dict) + + +def test_lora_model_from_flux_onetrainer_state_dict(): + state_dict = keys_to_mock_state_dict(flux_onetrainer_state_dict_keys) + + lora_model = lora_model_from_flux_onetrainer_state_dict(state_dict) + + # Check that the model has the correct number of LoRA layers. + expected_lora_layers: set[str] = set() + for k in flux_onetrainer_state_dict_keys: + k = k.replace(".lora_up.weight", "") + k = k.replace(".lora_down.weight", "") + k = k.replace(".alpha", "") + k = k.replace(".dora_scale", "") + expected_lora_layers.add(k) + # Drop the K/V/proj_mlp weights because these are all concatenated into a single layer in the BFL format (we keep + # the Q weights so that we count these layers once). + concatenated_weights = ["to_k", "to_v", "proj_mlp", "add_k_proj", "add_v_proj"] + expected_lora_layers = {k for k in expected_lora_layers if not any(w in k for w in concatenated_weights)} + + assert len(lora_model.layers) == len(expected_lora_layers) + + # Check that all of the layers have the expected prefix. + assert all( + k.startswith((FLUX_LORA_TRANSFORMER_PREFIX, FLUX_LORA_CLIP_PREFIX, FLUX_LORA_T5_PREFIX)) + for k in lora_model.layers.keys() + ) diff --git a/tests/backend/patches/lora_conversions/test_kohya_key_utils.py b/tests/backend/patches/lora_conversions/test_kohya_key_utils.py new file mode 100644 index 00000000000..8e3c9446955 --- /dev/null +++ b/tests/backend/patches/lora_conversions/test_kohya_key_utils.py @@ -0,0 +1,96 @@ +import pytest + +from invokeai.backend.patches.lora_conversions.kohya_key_utils import ( + INDEX_PLACEHOLDER, + ParsingTree, + generate_kohya_parsing_tree_from_keys, + insert_periods_into_kohya_key, +) + + +def test_insert_periods_into_kohya_key(): + """Test that insert_periods_into_kohya_key() correctly inserts periods into a Kohya key.""" + key = "module_a_module_b_0_attn_to_k" + parsing_tree: ParsingTree = { + "module_a": { + "module_b": { + INDEX_PLACEHOLDER: { + "attn": { + "to_k": {}, + }, + }, + }, + }, + } + result = insert_periods_into_kohya_key(key, parsing_tree) + assert result == "module_a.module_b.0.attn.to_k" + + +def test_insert_periods_into_kohya_key_invalid_key(): + """Test that insert_periods_into_kohya_key() raises ValueError for a key that is invalid.""" + key = "invalid_key_format" + parsing_tree: ParsingTree = { + "module_a": { + "module_b": { + INDEX_PLACEHOLDER: { + "attn": { + "to_k": {}, + }, + }, + }, + }, + } + with pytest.raises(ValueError): + insert_periods_into_kohya_key(key, parsing_tree) + + +def test_insert_periods_into_kohya_key_too_long(): + """Test that insert_periods_into_kohya_key() raises ValueError for a key that has a valid prefix, but is too long.""" + key = "module_a.module_b.0.attn.to_k.invalid_suffix" + parsing_tree: ParsingTree = { + "module_a": { + "module_b": { + INDEX_PLACEHOLDER: { + "attn": { + "to_k": {}, + }, + }, + }, + }, + } + with pytest.raises(ValueError): + insert_periods_into_kohya_key(key, parsing_tree) + + +def test_generate_kohya_parsing_tree_from_keys(): + """Test that generate_kohya_parsing_tree_from_keys() correctly generates a parsing tree.""" + keys = [ + "module_a.module_b.0.attn.to_k", + "module_a.module_b.1.attn.to_k", + "module_a.module_c.proj", + ] + + expected_tree: ParsingTree = { + "module_a": { + "module_b": { + INDEX_PLACEHOLDER: { + "attn": { + "to_k": {}, + }, + } + }, + "module_c": { + "proj": {}, + }, + } + } + + tree = generate_kohya_parsing_tree_from_keys(keys) + assert tree == expected_tree + + +def test_generate_kohya_parsing_tree_from_empty_keys(): + """Test that generate_kohya_parsing_tree_from_keys() handles empty input.""" + keys: list[str] = [] + tree = generate_kohya_parsing_tree_from_keys(keys) + assert tree == {}