Skip to content

开源实习-金逸-sequence_classification/vera模型微调 #1989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5,631 changes: 5,631 additions & 0 deletions applications/LayoutLM/layoutlm.ipynb

Large diffs are not rendered by default.

3,084 changes: 3,084 additions & 0 deletions examples/sequence_classification/VeRA.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions mindnlp/peft/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
PolyModel,
LNTuningConfig,
LNTuningModel,
VeraConfig,
VeraModel
)

from .utils import (
Expand Down
4 changes: 4 additions & 0 deletions mindnlp/peft/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
LNTuningModel,
PrefixTuningConfig,
PromptEncoderConfig,
VeraConfig,
VeraModel
)

MODEL_TYPE_TO_PEFT_MODEL_MAPPING = {
Expand All @@ -70,6 +72,7 @@
"LOHA": LoHaConfig,
"POLY": PolyConfig,
"LN_TUNING": LNTuningConfig,
"VERA": VeraConfig
}

PEFT_TYPE_TO_TUNER_MAPPING = {
Expand All @@ -80,6 +83,7 @@
"LOHA": LoHaModel,
"POLY": PolyModel,
"LN_TUNING": LNTuningModel,
"VERA": VeraConfig
}


Expand Down
2 changes: 2 additions & 0 deletions mindnlp/peft/peft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
LoHaModel,
PolyModel,
LNTuningModel,
VeraModel
)
from .utils import (
# SAFETENSORS_WEIGHTS_NAME,
Expand Down Expand Up @@ -71,6 +72,7 @@
PeftType.LOHA: LoHaModel,
PeftType.POLY: PolyModel,
PeftType.LN_TUNING: LNTuningModel,
PeftType.VERA :VeraModel
}

class PeftModel(nn.Module):
Expand Down
1 change: 1 addition & 0 deletions mindnlp/peft/tuners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@
from .multitask_prompt_tuning import MultitaskPromptEmbedding, MultitaskPromptTuningConfig, MultitaskPromptTuningInit
from .poly import PolyConfig, PolyModel
from .ln_tuning import LNTuningConfig, LNTuningModel
from .vera import VeraConfig,VeraModel
150 changes: 150 additions & 0 deletions mindnlp/peft/tuners/_buffer_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""buffer dict"""
import collections
from collections import OrderedDict
import mindspore.context
from mindnlp.core import nn
class BufferDict(nn.Module):
r"""
Holds buffers in a dictionary.

BufferDict can be indexed like a regular Python dictionary, but buffers it contains are properly registered, and
will be visible by all Cell methods. `mindspore.nn.BufferDict` is an **ordered** dictionary that respects

* the order of insertion, and
* in `mindspore.nn.BufferDict.update`, the order of the merged `OrderedDict`
or another `mindspore.nn.BufferDict` (the argument to
:meth:`~mindspore.nn.BufferDict.update`).

Note that :meth:`~mindspore.nn.BufferDict.update` with other unordered mapping
types (e.g., Python's plain `dict`) does not preserve the order of the
merged mapping.

Args:
buffers (iterable, optional):
a mapping (dictionary) of (string : :class:`~mindspore.Tensor`) or an iterable of key-value pairs
of type (string, :class:`~mindspore.Tensor`)

Example::

class MyCell(Cell):
def __init__(self):
super(MyCell, self).__init__()
self.buffers = BufferDict({
'left': Tensor(shape=(5, 10), dtype=mindspore.float32),
'right': Tensor(shape=(5, 10), dtype=mindspore.float32)
})

def construct(self, x, choice):
x = self.buffers[choice].matmul(x)
return x
"""

def __init__(self, buffers=None, persistent: bool = False):
r"""
Args:
buffers (`dict`):
A mapping (dictionary) from string to :class:`~mindspore.Tensor`, or an iterable of key-value pairs
of type (string, :class:`~mindspore.Tensor`).
"""
super(BufferDict, self).__init__()
if buffers is not None:
self.update(buffers)

self.persistent = persistent

def __getitem__(self, key):
return self._buffers[key]

def __setitem__(self, key, buffer):
self._buffers[key] = buffer

def __delitem__(self, key):
del self._buffers[key]

def __len__(self):
return len(self._buffers)

def __iter__(self):
return iter(self._buffers.keys())

def __contains__(self, key):
return key in self._buffers

def clear(self):
"""Remove all items from the BufferDict."""
self._buffers.clear()

def pop(self, key):
r"""Remove key from the BufferDict and return its buffer.

Args:
key (`str`):
Key to pop from the BufferDict
"""
v = self[key]
del self[key]
return v

def keys(self):
r"""Return an iterable of the BufferDict keys."""
return self._buffers.keys()

def items(self):
r"""Return an iterable of the BufferDict key/value pairs."""
return self._buffers.items()

def values(self):
r"""Return an iterable of the BufferDict values."""
return self._buffers.values()

def update(self, buffers):
r"""
Update the `mindspore.nn.BufferDict` with the key-value pairs from a
mapping or an iterable, overwriting existing keys.

Note:
If `buffers` is an `OrderedDict`, a `mindspore.nn.BufferDict`,
or an iterable of key-value pairs, the order of new elements in it is
preserved.

Args:
buffers (iterable):
a mapping (dictionary) from string to :class:`~mindspore.Tensor`,
or an iterable of key-value pairs of type (string, :class:`~mindspore.Tensor`)
"""
if not isinstance(buffers, collections.abc.Iterable):
raise TypeError(
"BuffersDict.update should be called with an "
"iterable of key/value pairs, but got " + type(buffers).__name__
)

if isinstance(buffers, collections.abc.Mapping):
if isinstance(buffers, (OrderedDict, BufferDict)):
for key, buffer in buffers.items():
self[key] = buffer
else:
for key, buffer in sorted(buffers.items()):
self[key] = buffer
else:
for j, p in enumerate(buffers):
if not isinstance(p, collections.abc.Iterable):
raise TypeError(
"BufferDict update sequence element " + str(j) + " should be Iterable; is" + type(p).__name__
)
if not len(p) == 2:
raise ValueError(
"BufferDict update sequence element " + str(j) + " has length " + str(len(p)) + "; 2 is required"
)
self[p[0]] = p[1]

def extra_repr(self):
child_lines = []
for k, p in self._buffers.items():
size_str = "x".join(str(size) for size in p.shape)
parastr = f"Buffer containing: [{type(p)} of size {size_str}]"
child_lines.append(" (" + k + "): " + parastr)
tmpstr = "\n".join(child_lines)
return tmpstr

def __call__(self, input):
raise RuntimeError("BufferDict should not be called.")
22 changes: 13 additions & 9 deletions mindnlp/peft/tuners/lycoris_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
LycorisConfig and LycorisLayer class for LyCORIS like adapters.
"""
# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -11,9 +14,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
LycorisConfig and LycorisLayer class for LyCORIS like adapters.
"""
from __future__ import annotations
from abc import abstractmethod
from dataclasses import dataclass, field
Expand Down Expand Up @@ -85,7 +85,8 @@ def __init__(self, base_layer: nn.Module) -> None:

@property
@abstractmethod
def _available_adapters(self) -> set[str]: ...
def _available_adapters(self) -> set[str]:
...

def _init_empty_weights(self, cls, *args, **kwargs) -> None:
# A helper method that allows to initialize the layer of the given class without spending time to initialize the
Expand All @@ -98,7 +99,8 @@ def _init_empty_weights(self, cls, *args, **kwargs) -> None:
cls.__init__(self, *args, device="meta", **kwargs)

@abstractmethod
def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs): ...
def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs):
...

# TODO: refactor LoRA to use the same approach
@abstractmethod
Expand All @@ -107,7 +109,8 @@ def _get_delta_activations(
) -> mindspore.Tensor:
"""Activations added on top of the base layer output (i.e. after the base layer forward pass)"""
@abstractmethod
def get_delta_weight(self, adapter_name: str) -> mindspore.Tensor: ...
def get_delta_weight(self, adapter_name: str) -> mindspore.Tensor:
...

def merge(
self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
Expand Down Expand Up @@ -147,7 +150,8 @@ def merge(
self.merged_adapters.append(active_adapter)

@abstractmethod
def reset_adapter_parameters(self, adapter_name: str): ...
def reset_adapter_parameters(self, adapter_name: str):
...

def set_scale(self, adapter, scale):
if adapter not in self._available_adapters:
Expand Down Expand Up @@ -192,7 +196,8 @@ def unscale_layer(self, scale=None) -> None:
self.scaling[active_adapter] /= scale

@abstractmethod
def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs): ...
def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs):
...


class LycorisTuner(BaseTuner):
Expand Down Expand Up @@ -291,7 +296,6 @@ def _replace_module(self, parent, child_name, new_module, child):
setattr(parent, child_name, new_module)
# It's not necessary to set requires_grad here, as that is handled by
# _mark_only_adapters_as_trainable

if not hasattr(new_module, "base_layer"):
new_module.weight = child.weight
if hasattr(child, "bias"):
Expand Down
16 changes: 14 additions & 2 deletions mindnlp/peft/tuners/tuners_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def __init__(self, model, peft_config: Union[PeftConfig, dict[str, PeftConfig]],
# self.add_adapter(adapter_name, self.peft_config[adapter_name])

self.model = model

# For advanced developers, if you want to attach multiple adapters to your
# model, just add a `peft_config` dict attribute to your model.
if not hasattr(self, "peft_config"):
Expand All @@ -175,13 +174,26 @@ def __init__(self, model, peft_config: Union[PeftConfig, dict[str, PeftConfig]],
# transformers models have a .config attribute, whose presence is assumed later on
# if not hasattr(self, "config"):
# self.config = {"model_type": "custom"}

self._pre_injection_hook(self.model, self.peft_config[adapter_name], adapter_name)
self.active_adapter: str | list[str] = adapter_name
self.inject_adapter(self.model, adapter_name)

# Copy the peft_config in the injected model.
self.model.peft_config = self.peft_config
def _pre_injection_hook(self, model: nn.Module, config: PeftConfig, adapter_name: str) -> None:
r"""
A hook to be called before the adapter is injected into the model. This method can be overridden by child
classes to perform any pre-injection operations.

Args:
model (`nn.Module`):
The model to be adapted.
config (`PeftConfig`):
The adapter config.
adapter_name (`str`):
The adapter name.
"""
pass
@property
def active_adapters(self) -> list[str]:
r"""
Expand Down
23 changes: 23 additions & 0 deletions mindnlp/peft/tuners/vera/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""vera init"""
# Copyright 2023-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#from peft.import_utils import is_bnb_4bit_available, is_bnb_available

from .config import VeraConfig
from .layer import Linear, VeraLayer
from .model import VeraModel


__all__ = ["Linear", "VeraConfig", "VeraLayer", "VeraModel"]
Loading