update code

liucongg · Jun 13, 2023 · 710e6e6 · 710e6e6
1 parent e39cfef
commit 710e6e6
Show file tree

Hide file tree

Showing 3 changed files with 424 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@
 
 由于官方模型和代码一直再更新，请使用项目中的代码，对应版本模型见[百度网盘](https://pan.baidu.com/s/1-UrZWnqw6Ciyo5K2NLraDg)，提取码：jh0l
 
+- update-2023.06.12 [**增加流水线并行训练方法**](https://zhuanlan.zhihu.com/p/636488690)
 - update-2023.04.18 **增加文本生成任务评测**
 - update-2023.04.05 **增加信息抽取任务评测**
 
@@ -171,7 +172,22 @@ CUDA_VISIBLE_DEVICES=0 nohup deepspeed --master_port 5555 finetuning_freeze.py -
 | 分数 | 51.75 | 73.75 | 87.75 | 79.25 | 86.75 |
 
 
-
-
 ### 文本分类
 待补充
+
+## 流水线并行训练
+代码说明见：[大模型流水线并行（Pipeline）实战](https://zhuanlan.zhihu.com/p/636488690)
+
+模型训练详细代码见Github中train_pipeline.py文件。 训练脚本：
+```
+CUDA_VISIBLE_DEVICES=0,1,2,3 deepspeed --master_port 5524 train_pipeline.py --train_path data/spo_0.json --model_name_or_path ./ChatGLM-6B/ --per_device_train_batch_size 14 --max_len 1024 --max_src_len 512 --num_train_epochs 5 --gradient_accumulation_steps 1 --seed 1234 --show_loss_step 20 --num_stages 4 --save_model_step 100 --output_dir ./output-glm-pp
+```
+模型转换详细代码见Github中convert_model_to_hf.py文件。模型转换脚本：
+```
+python3 convert_model_to_hf.py --ori_model_dir ./ChatGLM-6B/ --pipeline_model_dir output-glm-pp/global_step300/ --save_model_dir output-glm-pp/gs300/
+```
+| 步数 |  100 |  200 | 300 |  400 |  500 | 
+| ------- | ------ | ------  | ------ | ------ | ------ |
+| F1值 | 0.4931 | 0.5132 | 0.5882 | 0.5793 | 0.5874 |
+
+相比于之前其他微调方法（PT、Freeze、Lora等）来说，全量参数微调效果并不是最好，可能由于数据量不足导致。
diff --git a/convert_model_to_hf.py b/convert_model_to_hf.py
@@ -0,0 +1,55 @@
+# -*- coding:utf-8 -*-
+# @project: ChatGLM-Finetuning
+# @filename: convert_to_hf
+# @author: 刘聪NLP
+# @zhihu: https://www.zhihu.com/people/LiuCongNLP
+# @contact: [email protected]
+# @time: 2023/6/5 11:06
+"""
+    文件说明：
+            
+"""
+import torch
+from pathlib import Path
+import os
+from os.path import join
+from shutil import copy
+import argparse
+
+
+def convert_model_to_hf(ori_model_dir, pipeline_model_dir, save_model_dir):
+    model_static_dict = {}
+    for path in Path(pipeline_model_dir).iterdir():
+        print("已经处理文件：{}".format(path))
+        if not path.name.startswith('layer'):
+            continue
+        small_static_dict = torch.load(path, map_location="cpu")
+        layer_i = int(path.name.split('-')[0].replace('layer_', ''))
+        if layer_i == 0:
+            model_static_dict["transformer.word_embeddings.weight"] = small_static_dict["word_embeddings.weight"]
+        elif layer_i == 30:
+            model_static_dict["lm_head.weight"] = small_static_dict["word_embeddings.weight"]
+        elif layer_i == 29:
+            for k, v in small_static_dict.items():
+                model_static_dict["transformer." + k] = v
+        else:
+            for k, v in small_static_dict.items():
+                model_static_dict["transformer." + k.replace("layer.", "layers.{}.".format(layer_i - 1))] = v
+
+    torch.save(model_static_dict, join(save_model_dir, "pytorch_model.bin"))
+    copy(join(ori_model_dir, "config.json"), join(save_model_dir, "config.json"))
+    copy(join(ori_model_dir, "tokenizer_config.json"), join(save_model_dir, "tokenizer_config.json"))
+    copy(join(ori_model_dir, "ice_text.model"), os.path.join(save_model_dir, "ice_text.model"))
+
+
+def set_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--ori_model_dir', default='ChatGLM-6B/', type=str, help='')
+    parser.add_argument('--pipeline_model_dir', default='output-glm-pp/global_step300/', type=str, help='')
+    parser.add_argument('--save_model_dir', default='output-glm-pp/gs300/', type=str, help='')
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    ages = set_args()
+    convert_model_to_hf(ages.ori_model_dir, ages.pipeline_model_dir, ages.save_model_dir)