Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
daihuaiii committed Apr 15, 2024
0 parents commit 850d4d7
Show file tree
Hide file tree
Showing 13 changed files with 2,760 additions and 0 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# ChatGLM2-6B-int4

- Add new functions in lora_utils.py
- Add new method for class ChatGLMForConditionalGeneration in modeling_chatglm.py
- Loading the model for ft with different manner in train_lora.py

## Ref

- https://github.com/DracoUnion/chatglm2-6b-int4-lora
- https://github.com/shuxueslpi/chatGLM-6B-QLoRA
19 changes: 19 additions & 0 deletions chatGLM_6B_LoRA.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"output_dir": "saved_files/chatGLM_6B_int4_LoRA",
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 4,
"per_device_eval_batch_size": 2,
"learning_rate": 5e-4,
"num_train_epochs": 10.0,
"lr_scheduler_type": "linear",
"warmup_ratio": 0.1,
"logging_steps": 100,
"save_strategy": "epoch",
"load_best_model_at_end": false,
"evaluation_strategy": "epoch",
"optim": "adamw_torch",
"fp16": false,
"remove_unused_columns": false,
"ddp_find_unused_parameters": false,
"seed": 42
}
33 changes: 33 additions & 0 deletions glm2-6b-int4/MODEL_LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
The ChatGLM2-6B License

1. Definitions

“Licensor” means the ChatGLM2-6B Model Team that distributes its Software.

“Software” means the ChatGLM2-6B model parameters made available under this license.

2. License Grant

Subject to the terms and conditions of this License, the Licensor hereby grants to you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license to use the Software solely for your non-commercial research purposes.

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

3. Restriction

You will not use, copy, modify, merge, publish, distribute, reproduce, or create derivative works of the Software, in whole or in part, for any commercial, military, or illegal purposes.

You will not use the Software for any act that may undermine China's national security and national unity, harm the public interest of society, or infringe upon the rights and interests of human beings.

4. Disclaimer

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

5. Limitation of Liability

EXCEPT TO THE EXTENT PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER BASED IN TORT, NEGLIGENCE, CONTRACT, LIABILITY, OR OTHERWISE WILL ANY LICENSOR BE LIABLE TO YOU FOR ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES, OR ANY OTHER COMMERCIAL LOSSES, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

6. Dispute Resolution

This license shall be governed and construed in accordance with the laws of People’s Republic of China. Any dispute arising from or in connection with this License shall be submitted to Haidian District People's Court in Beijing.

Note that the license is subject to update to a more comprehensive version. For any questions related to the license and copyright, please contact us at [email protected].
41 changes: 41 additions & 0 deletions glm2-6b-int4/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"_name_or_path": "THUDM/chatglm2-6b",
"model_type": "chatglm",
"architectures": [
"ChatGLMModel"
],
"auto_map": {
"AutoConfig": "configuration_chatglm.ChatGLMConfig",
"AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
"AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
},
"add_bias_linear": false,
"add_qkv_bias": true,
"apply_query_key_layer_scaling": true,
"apply_residual_connection_post_layernorm": false,
"attention_dropout": 0.0,
"attention_softmax_in_fp32": true,
"bias_dropout_fusion": true,
"ffn_hidden_size": 13696,
"fp32_residual_connection": false,
"hidden_dropout": 0.0,
"hidden_size": 4096,
"kv_channels": 128,
"layernorm_epsilon": 1e-05,
"multi_query_attention": true,
"multi_query_group_num": 2,
"num_attention_heads": 32,
"num_layers": 28,
"original_rope": true,
"padded_vocab_size": 65024,
"post_layer_norm": true,
"quantization_bit": 4,
"rmsnorm": true,
"seq_length": 32768,
"use_cache": true,
"torch_dtype": "float16",
"transformers_version": "4.27.1",
"tie_word_embeddings": false,
"eos_token_id": 2,
"pad_token_id": 0
}
59 changes: 59 additions & 0 deletions glm2-6b-int4/configuration_chatglm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from transformers import PretrainedConfig


class ChatGLMConfig(PretrainedConfig):
model_type = "chatglm"
def __init__(
self,
num_layers=28,
padded_vocab_size=65024,
hidden_size=4096,
ffn_hidden_size=13696,
kv_channels=128,
num_attention_heads=32,
seq_length=2048,
hidden_dropout=0.0,
attention_dropout=0.0,
layernorm_epsilon=1e-5,
rmsnorm=True,
apply_residual_connection_post_layernorm=False,
post_layer_norm=True,
add_bias_linear=False,
add_qkv_bias=False,
bias_dropout_fusion=True,
multi_query_attention=False,
multi_query_group_num=1,
apply_query_key_layer_scaling=True,
attention_softmax_in_fp32=True,
fp32_residual_connection=False,
quantization_bit=0,
pre_seq_len=None,
prefix_projection=False,
**kwargs
):
self.num_layers = num_layers
self.vocab_size = padded_vocab_size
self.padded_vocab_size = padded_vocab_size
self.hidden_size = hidden_size
self.ffn_hidden_size = ffn_hidden_size
self.kv_channels = kv_channels
self.num_attention_heads = num_attention_heads
self.seq_length = seq_length
self.hidden_dropout = hidden_dropout
self.attention_dropout = attention_dropout
self.layernorm_epsilon = layernorm_epsilon
self.rmsnorm = rmsnorm
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
self.post_layer_norm = post_layer_norm
self.add_bias_linear = add_bias_linear
self.add_qkv_bias = add_qkv_bias
self.bias_dropout_fusion = bias_dropout_fusion
self.multi_query_attention = multi_query_attention
self.multi_query_group_num = multi_query_group_num
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
self.attention_softmax_in_fp32 = attention_softmax_in_fp32
self.fp32_residual_connection = fp32_residual_connection
self.quantization_bit = quantization_bit
self.pre_seq_len = pre_seq_len
self.prefix_projection = prefix_projection
super().__init__(**kwargs)
141 changes: 141 additions & 0 deletions glm2-6b-int4/lora_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
'''
Edited according to https://zhuanlan.zhihu.com/p/662569090
for CPU peft ft
'''
import torch
from .quantization import *




class LoraQuantizedLinear(torch.nn.Module):

def __init__(self, q_linear, lora_r=32, lora_alpha=32, lora_dropout_rate=0.0):
super().__init__()

# 保存原始参数和Lora配置
self.lora_r = lora_r
self.lora_alpha = lora_alpha
self.lora_dropout_rate = lora_dropout_rate
self.weight_bit_width = q_linear.weight_bit_width
self.weight = q_linear.weight
self.weight_scale = q_linear.weight_scale
self.bias = q_linear.bias

# 冻结原始参数
self.weight.requires_grad = False
self.weight_scale.requires_grad = False
if self.bias is not None: self.bias.requires_grad = False

# 创建 Lora 参数,FP16
out_dim, in_dim = self.weight.shape
# INT4 模型下,InDim 是原始大小的一半
if self.weight_bit_width == 4: in_dim *= 2
# LoraA 正态初始化
self.lora_a = torch.nn.Parameter(torch.empty(
[self.lora_r, in_dim],
device=self.weight.device,
dtype=torch.float16,
))
torch.nn.init.kaiming_normal_(self.lora_a)
# LoraB 全零初始化
self.lora_b = torch.nn.Parameter(torch.zeros(
[out_dim, self.lora_r],
device=self.weight.device,
dtype=torch.float16,
))
self.lora_dropout = torch.nn.Dropout(self.lora_dropout_rate)
self.lora_scale = self.lora_alpha / self.lora_r

def forward(self, input):
ori_output = QuantizedLinear.forward(self, input)
lora_output = (
self.lora_dropout(input.half()) @
self.lora_a.transpose(0, 1) @
self.lora_b.transpose(0, 1) *
self.lora_scale
)
return ori_output + lora_output.to(ori_output.dtype)

def merge(self):
# H = XW + b + XAB * s => H = X(W + AB * s) + b
# 将 int 原始参数转成 fp16
weight = extract_weight_to_half(self.weight, self.weight_scale, self.weight_bit_width)
# 合并 lora 参数
weight += self.lora_b @ self.lora_a * self.lora_scale
# 再转回 int
weight, weight_scale = half_weight_to_int(weight, self.weight_bit_width)
self.weight = torch.nn.Parameter(weight, requires_grad=False)
self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
# 重新初始化 lora 两个矩阵
torch.nn.init.kaiming_normal_(self.lora_a)
torch.nn.init.zeros_(self.lora_b)



# 将所有的QuantizedLinear改成LoraQuantizedLinear
def attach_lora(model, lora_r=32, lora_alpha=32, lora_dropout_rate=0.0):
if model.lora_attached: return model
lora_conf = dict(lora_r=lora_r, lora_alpha=lora_alpha, lora_dropout_rate=lora_dropout_rate)
for mod in model.modules():
for name in dir(mod):
submod = getattr(mod, name, None)
if not isinstance(submod, QuantizedLinear):
continue
new_submod = LoraQuantizedLinear(submod, **lora_conf)
setattr(mod, name, new_submod)

for name, param in model.named_parameters():
if 'lora_' not in name:
param.requires_grad = False
model.lora_attached = True
return model


# 导出所有 Lora 参数。
def lora_state_dict(model):
return {
k:v
for k, v in model.state_dict().items()
if 'lora_' in k
}


def base_state_dict(model):
return {
k:v
for k, v in model.state_dict().items()
if 'lora_' not in k
}


# 将所有LoraQuantizedLinear的参数合并。
def merge_lora(model):
for mod in model.modules():
if isinstance(mod, LoraQuantizedLinear):
mod.merge()
return model


# 搜索模型的所有模块,再搜索它的直接子模块,如果任何东西是LoraQuantizedLinear,就把它替换回QuantizedLinear。
def detach_lora(model):
if not model.lora_attached: return model

for mod in model.modules():
for name in dir(mod):
submod = getattr(mod, name, None)
if not isinstance(submod, LoraQuantizedLinear):
continue
new_submod = QuantizedLinear.from_params(
submod.weight_bit_width,
submod.weight,
submod.weight_scale,
submod.bias,
)
setattr(mod, name, new_submod)

model.lora_attached = False
return model


Loading

0 comments on commit 850d4d7

Please sign in to comment.