-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 850d4d7
Showing
13 changed files
with
2,760 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# ChatGLM2-6B-int4 | ||
|
||
- Add new functions in lora_utils.py | ||
- Add new method for class ChatGLMForConditionalGeneration in modeling_chatglm.py | ||
- Loading the model for ft with different manner in train_lora.py | ||
|
||
## Ref | ||
|
||
- https://github.com/DracoUnion/chatglm2-6b-int4-lora | ||
- https://github.com/shuxueslpi/chatGLM-6B-QLoRA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"output_dir": "saved_files/chatGLM_6B_int4_LoRA", | ||
"per_device_train_batch_size": 2, | ||
"gradient_accumulation_steps": 4, | ||
"per_device_eval_batch_size": 2, | ||
"learning_rate": 5e-4, | ||
"num_train_epochs": 10.0, | ||
"lr_scheduler_type": "linear", | ||
"warmup_ratio": 0.1, | ||
"logging_steps": 100, | ||
"save_strategy": "epoch", | ||
"load_best_model_at_end": false, | ||
"evaluation_strategy": "epoch", | ||
"optim": "adamw_torch", | ||
"fp16": false, | ||
"remove_unused_columns": false, | ||
"ddp_find_unused_parameters": false, | ||
"seed": 42 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
The ChatGLM2-6B License | ||
|
||
1. Definitions | ||
|
||
“Licensor” means the ChatGLM2-6B Model Team that distributes its Software. | ||
|
||
“Software” means the ChatGLM2-6B model parameters made available under this license. | ||
|
||
2. License Grant | ||
|
||
Subject to the terms and conditions of this License, the Licensor hereby grants to you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license to use the Software solely for your non-commercial research purposes. | ||
|
||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. | ||
|
||
3. Restriction | ||
|
||
You will not use, copy, modify, merge, publish, distribute, reproduce, or create derivative works of the Software, in whole or in part, for any commercial, military, or illegal purposes. | ||
|
||
You will not use the Software for any act that may undermine China's national security and national unity, harm the public interest of society, or infringe upon the rights and interests of human beings. | ||
|
||
4. Disclaimer | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
5. Limitation of Liability | ||
|
||
EXCEPT TO THE EXTENT PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER BASED IN TORT, NEGLIGENCE, CONTRACT, LIABILITY, OR OTHERWISE WILL ANY LICENSOR BE LIABLE TO YOU FOR ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES, OR ANY OTHER COMMERCIAL LOSSES, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. | ||
|
||
6. Dispute Resolution | ||
|
||
This license shall be governed and construed in accordance with the laws of People’s Republic of China. Any dispute arising from or in connection with this License shall be submitted to Haidian District People's Court in Beijing. | ||
|
||
Note that the license is subject to update to a more comprehensive version. For any questions related to the license and copyright, please contact us at [email protected]. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{ | ||
"_name_or_path": "THUDM/chatglm2-6b", | ||
"model_type": "chatglm", | ||
"architectures": [ | ||
"ChatGLMModel" | ||
], | ||
"auto_map": { | ||
"AutoConfig": "configuration_chatglm.ChatGLMConfig", | ||
"AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration", | ||
"AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration" | ||
}, | ||
"add_bias_linear": false, | ||
"add_qkv_bias": true, | ||
"apply_query_key_layer_scaling": true, | ||
"apply_residual_connection_post_layernorm": false, | ||
"attention_dropout": 0.0, | ||
"attention_softmax_in_fp32": true, | ||
"bias_dropout_fusion": true, | ||
"ffn_hidden_size": 13696, | ||
"fp32_residual_connection": false, | ||
"hidden_dropout": 0.0, | ||
"hidden_size": 4096, | ||
"kv_channels": 128, | ||
"layernorm_epsilon": 1e-05, | ||
"multi_query_attention": true, | ||
"multi_query_group_num": 2, | ||
"num_attention_heads": 32, | ||
"num_layers": 28, | ||
"original_rope": true, | ||
"padded_vocab_size": 65024, | ||
"post_layer_norm": true, | ||
"quantization_bit": 4, | ||
"rmsnorm": true, | ||
"seq_length": 32768, | ||
"use_cache": true, | ||
"torch_dtype": "float16", | ||
"transformers_version": "4.27.1", | ||
"tie_word_embeddings": false, | ||
"eos_token_id": 2, | ||
"pad_token_id": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from transformers import PretrainedConfig | ||
|
||
|
||
class ChatGLMConfig(PretrainedConfig): | ||
model_type = "chatglm" | ||
def __init__( | ||
self, | ||
num_layers=28, | ||
padded_vocab_size=65024, | ||
hidden_size=4096, | ||
ffn_hidden_size=13696, | ||
kv_channels=128, | ||
num_attention_heads=32, | ||
seq_length=2048, | ||
hidden_dropout=0.0, | ||
attention_dropout=0.0, | ||
layernorm_epsilon=1e-5, | ||
rmsnorm=True, | ||
apply_residual_connection_post_layernorm=False, | ||
post_layer_norm=True, | ||
add_bias_linear=False, | ||
add_qkv_bias=False, | ||
bias_dropout_fusion=True, | ||
multi_query_attention=False, | ||
multi_query_group_num=1, | ||
apply_query_key_layer_scaling=True, | ||
attention_softmax_in_fp32=True, | ||
fp32_residual_connection=False, | ||
quantization_bit=0, | ||
pre_seq_len=None, | ||
prefix_projection=False, | ||
**kwargs | ||
): | ||
self.num_layers = num_layers | ||
self.vocab_size = padded_vocab_size | ||
self.padded_vocab_size = padded_vocab_size | ||
self.hidden_size = hidden_size | ||
self.ffn_hidden_size = ffn_hidden_size | ||
self.kv_channels = kv_channels | ||
self.num_attention_heads = num_attention_heads | ||
self.seq_length = seq_length | ||
self.hidden_dropout = hidden_dropout | ||
self.attention_dropout = attention_dropout | ||
self.layernorm_epsilon = layernorm_epsilon | ||
self.rmsnorm = rmsnorm | ||
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm | ||
self.post_layer_norm = post_layer_norm | ||
self.add_bias_linear = add_bias_linear | ||
self.add_qkv_bias = add_qkv_bias | ||
self.bias_dropout_fusion = bias_dropout_fusion | ||
self.multi_query_attention = multi_query_attention | ||
self.multi_query_group_num = multi_query_group_num | ||
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling | ||
self.attention_softmax_in_fp32 = attention_softmax_in_fp32 | ||
self.fp32_residual_connection = fp32_residual_connection | ||
self.quantization_bit = quantization_bit | ||
self.pre_seq_len = pre_seq_len | ||
self.prefix_projection = prefix_projection | ||
super().__init__(**kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
''' | ||
Edited according to https://zhuanlan.zhihu.com/p/662569090 | ||
for CPU peft ft | ||
''' | ||
import torch | ||
from .quantization import * | ||
|
||
|
||
|
||
|
||
class LoraQuantizedLinear(torch.nn.Module): | ||
|
||
def __init__(self, q_linear, lora_r=32, lora_alpha=32, lora_dropout_rate=0.0): | ||
super().__init__() | ||
|
||
# 保存原始参数和Lora配置 | ||
self.lora_r = lora_r | ||
self.lora_alpha = lora_alpha | ||
self.lora_dropout_rate = lora_dropout_rate | ||
self.weight_bit_width = q_linear.weight_bit_width | ||
self.weight = q_linear.weight | ||
self.weight_scale = q_linear.weight_scale | ||
self.bias = q_linear.bias | ||
|
||
# 冻结原始参数 | ||
self.weight.requires_grad = False | ||
self.weight_scale.requires_grad = False | ||
if self.bias is not None: self.bias.requires_grad = False | ||
|
||
# 创建 Lora 参数,FP16 | ||
out_dim, in_dim = self.weight.shape | ||
# INT4 模型下,InDim 是原始大小的一半 | ||
if self.weight_bit_width == 4: in_dim *= 2 | ||
# LoraA 正态初始化 | ||
self.lora_a = torch.nn.Parameter(torch.empty( | ||
[self.lora_r, in_dim], | ||
device=self.weight.device, | ||
dtype=torch.float16, | ||
)) | ||
torch.nn.init.kaiming_normal_(self.lora_a) | ||
# LoraB 全零初始化 | ||
self.lora_b = torch.nn.Parameter(torch.zeros( | ||
[out_dim, self.lora_r], | ||
device=self.weight.device, | ||
dtype=torch.float16, | ||
)) | ||
self.lora_dropout = torch.nn.Dropout(self.lora_dropout_rate) | ||
self.lora_scale = self.lora_alpha / self.lora_r | ||
|
||
def forward(self, input): | ||
ori_output = QuantizedLinear.forward(self, input) | ||
lora_output = ( | ||
self.lora_dropout(input.half()) @ | ||
self.lora_a.transpose(0, 1) @ | ||
self.lora_b.transpose(0, 1) * | ||
self.lora_scale | ||
) | ||
return ori_output + lora_output.to(ori_output.dtype) | ||
|
||
def merge(self): | ||
# H = XW + b + XAB * s => H = X(W + AB * s) + b | ||
# 将 int 原始参数转成 fp16 | ||
weight = extract_weight_to_half(self.weight, self.weight_scale, self.weight_bit_width) | ||
# 合并 lora 参数 | ||
weight += self.lora_b @ self.lora_a * self.lora_scale | ||
# 再转回 int | ||
weight, weight_scale = half_weight_to_int(weight, self.weight_bit_width) | ||
self.weight = torch.nn.Parameter(weight, requires_grad=False) | ||
self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False) | ||
# 重新初始化 lora 两个矩阵 | ||
torch.nn.init.kaiming_normal_(self.lora_a) | ||
torch.nn.init.zeros_(self.lora_b) | ||
|
||
|
||
|
||
# 将所有的QuantizedLinear改成LoraQuantizedLinear | ||
def attach_lora(model, lora_r=32, lora_alpha=32, lora_dropout_rate=0.0): | ||
if model.lora_attached: return model | ||
lora_conf = dict(lora_r=lora_r, lora_alpha=lora_alpha, lora_dropout_rate=lora_dropout_rate) | ||
for mod in model.modules(): | ||
for name in dir(mod): | ||
submod = getattr(mod, name, None) | ||
if not isinstance(submod, QuantizedLinear): | ||
continue | ||
new_submod = LoraQuantizedLinear(submod, **lora_conf) | ||
setattr(mod, name, new_submod) | ||
|
||
for name, param in model.named_parameters(): | ||
if 'lora_' not in name: | ||
param.requires_grad = False | ||
model.lora_attached = True | ||
return model | ||
|
||
|
||
# 导出所有 Lora 参数。 | ||
def lora_state_dict(model): | ||
return { | ||
k:v | ||
for k, v in model.state_dict().items() | ||
if 'lora_' in k | ||
} | ||
|
||
|
||
def base_state_dict(model): | ||
return { | ||
k:v | ||
for k, v in model.state_dict().items() | ||
if 'lora_' not in k | ||
} | ||
|
||
|
||
# 将所有LoraQuantizedLinear的参数合并。 | ||
def merge_lora(model): | ||
for mod in model.modules(): | ||
if isinstance(mod, LoraQuantizedLinear): | ||
mod.merge() | ||
return model | ||
|
||
|
||
# 搜索模型的所有模块,再搜索它的直接子模块,如果任何东西是LoraQuantizedLinear,就把它替换回QuantizedLinear。 | ||
def detach_lora(model): | ||
if not model.lora_attached: return model | ||
|
||
for mod in model.modules(): | ||
for name in dir(mod): | ||
submod = getattr(mod, name, None) | ||
if not isinstance(submod, LoraQuantizedLinear): | ||
continue | ||
new_submod = QuantizedLinear.from_params( | ||
submod.weight_bit_width, | ||
submod.weight, | ||
submod.weight_scale, | ||
submod.bias, | ||
) | ||
setattr(mod, name, new_submod) | ||
|
||
model.lora_attached = False | ||
return model | ||
|
||
|
Oops, something went wrong.