-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain_react_StrategyQA.sh
More file actions
72 lines (68 loc) · 2.58 KB
/
train_react_StrategyQA.sh
File metadata and controls
72 lines (68 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
export NCCL_P2P_LEVEL=NVL
export ACCELERATE_USE_FSDP=False
# 相比于最初的setting,需要设置use_reentrant True
# --push_to_hub \
# --hub_private_repo True \
# --hub_strategy "every_save" \
# --dataset_name "smangrul/ultrachat-10k-chatml" \
model_name_or_path=Qwen/Qwen2.5-14B-Instruct
# model_name_or_path=meta-llama/Llama-3.1-8B-Instruct
dataset=StrategyQA
num_train_epochs=10
learning_rate=1e-4
further_filter=False
filtered=False
data_creator=ds
augment_model=None
random_select=True
load_in_4bit=False
load_in_8bit=False
train_response_only=False
use_unsloth=False
special_token=False
data_format=react
lora=all-linear # all-linear/gate_proj,up_proj,down_proj
TRAIN_DATA_PATH=dataset/$dataset/$data_format/strategyqa-deepseek-v3-241226-train.json
TEST_DATA_PATH=dataset/$dataset/$data_format/strategyqa-deepseek-v3-241226-test.json
save_path="ckpts/$data_format/$model_name_or_path/$dataset/lr=$learning_rate-creator=$data_creator-augment=$augment_model-random_select=$random_select-further_filter=$further_filter-filtered=$filtered-load_in_4bit=$load_in_4bit-response_only=$train_response_only-N_epochs=$num_train_epochs-unsloth=$use_unsloth-special_token=$special_token-lora=$lora"
# CUDA_VISIBLE_DEVICES=4 python train_cot_StrategyQA.py \
CUDA_VISIBLE_DEVICES=0,1 accelerate launch --config_file "deepspeed_configs/deepspeed_config_StrategyQA_react.yaml" train_react.py \
--seed 100 \
--model_name_or_path $model_name_or_path \
--dataset_name $TRAIN_DATA_PATH,$TEST_DATA_PATH \
--chat_template_format "chatml" \
--add_special_tokens $special_token \
--append_concat_token False \
--splits "train,test" \
--max_seq_len 2048 \
--num_train_epochs $num_train_epochs \
--logging_steps 1 \
--log_level "info" \
--logging_strategy "steps" \
--eval_strategy "epoch" \
--save_strategy "epoch" \
--save_total_limit $num_train_epochs \
--bf16 True \
--packing True \
--learning_rate $learning_rate \
--lr_scheduler_type "cosine" \
--weight_decay 1e-4 \
--warmup_ratio 0.17 \
--max_grad_norm 1.0 \
--output_dir $save_path \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 8 \
--gradient_checkpointing True \
--use_reentrant True \
--dataset_text_field "content" \
--use_flash_attn True \
--use_peft_lora True \
--lora_r 16 \
--lora_alpha 16 \
--lora_dropout 0.01 \
--lora_target_modules $lora \
--use_4bit_quantization $load_in_4bit \
--use_8bit_quantization $load_in_8bit \
--use_unsloth $use_unsloth \
--data_format $data_format