forked from extreme-bert/extreme-bert
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinetune_search.sh
executable file
·158 lines (144 loc) · 5.54 KB
/
finetune_search.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/bin/bash
# Copyright 2022 Statistics and Machine Learning Research Group at HKUST. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Assumption: Must be run under the project directory
function run_hyperparam_search() {
local pretrain_dataset_name=$1
local task=$2
local model_set_path=$3
local num_gpu=$4
for model_path in ${model_set_path}; do
# For every pretrained model, we use finetune search to find the best
# finetune results under standard BERT finetune settings
local model_name=$(basename ${model_path})
# Trick of 24h-bert: save finetuned models on MNLI to finetune MRPC, RTE,
# STSB. Regarding how to select the intermediate finetuning hyperparameter,
# no completed matched details found in 24h-bert code/paper:
# https://arxiv.org/pdf/2104.07705.pdf
# https://github.com/IntelLabs/academic-budget-bert
#
# or the mentioned STILT code/paper in the previous paper:
# https://arxiv.org/pdf/1811.01088.pdf
# https://github.com/zphang/bert_on_stilts
#
# so I use the closet setting to the STILT:
#
# STILT paper 24h-bert finetune search
# warmup_proportion 0.1 >0.06
# train_eopchs >3 {3, 5}
# weight_decay >0.1 0.1
# batch_size 16 or 24 or 32 >32
# init_lr 2e-5 {>5e-5, 8e-5}
# lr_schedule strange warmup_linear >warmup + linear
# optimizer handcraft AdamW >AdamW
#
if [ "${task_name}" = "mnli" ]; then
save_finetune_checkpoint_at_end=True
else
save_finetune_checkpoint_at_end=False
fi
if [ "${task_name}" = "rte" -o "${task_name}" = "mrpc" -o "${task_name}" = "stsb" ]; then
mnli_setting="epoch-3_batch-size-32_init-lr-5e-5"
model_path="output/finetune/${pretrain_dataset_name}/mnli/${model_name}/${mnli_setting}"
# Need this file exists to run, otherwise waits
prerequisite_file="tmp/finetune/${pretrain_dataset_name}/mnli/${model_name}/${mnli_setting}.completed.mark"
else
prerequisite_file="" # Default: no need for prerequisite file
fi
# Search! (With default linear decay lr scheduler)
if [ "${task_name}" = "mnli" -o "${task_name}" = "qqp" -o "${task_name}" = "qnli" ]; then
for num_epoch in 3 5; do
for batch_size in 32; do
for init_lr in 5e-5 8e-5; do
prefix="epoch-${num_epoch}_batch-size-${batch_size}_init-lr-${init_lr}"
./run_glue.sh \
${pretrain_dataset_name} \
${task} \
${prefix} \
${num_epoch} \
${batch_size} \
${init_lr} \
${model_path} \
${model_name} \
${save_finetune_checkpoint_at_end} \
"${prerequisite_file}" \
${num_gpu}
done
done
done
else
# Standard finetune setting in 24h BERT paper (table 7, last page)
for num_epoch in 3 5 10; do
for batch_size in 16 32; do
for init_lr in 1e-5 3e-5 5e-5 8e-5; do
prefix="epoch-${num_epoch}_batch-size-${batch_size}_init-lr-${init_lr}"
./run_glue.sh \
${pretrain_dataset_name} \
${task} \
${prefix} \
${num_epoch} \
${batch_size} \
${init_lr} \
${model_path} \
${model_name} \
${save_finetune_checkpoint_at_end} \
"${prerequisite_file}" \
${num_gpu}
done
done
done
fi
done
}
#===============================================================================
# Reference: https://www.tensorflow.org/datasets/catalog/glue
# Reference: https://github.com/huggingface/transformers/tree/master/examples/pytorch/text-classification
#===============================================================================
function main() {
local model_path
local num_epoch
local batch_size
local task_name
local num_sample
local pretrain_dataset_name="$1"
local num_gpu=$2
local prefix=$3 # A string that specifies unique pretrain setting
local model_set_path="saved_models/pretrain/${pretrain_dataset_name}/${prefix}"
# By default, searched hyperparameters are skipped.
# But with --remove-old-record, we will remove those records and do the
# grid search from scratch. Think carefully when you use this option!
if [ "$1" = "--remove-old-record" ]; then
rm tmp/finetune/${pretrain_dataset_name}/*/*/*.mark
fi
# Number of samples in each task:
# wnli: 635
# rte: 2490
# mrpc: 3668
# stsb: 5749
# cola: 8551
# sst2: 67349
# qnli: 104743
# qqp: 363846
# mnli: 392702
for task_name in mnli qqp qnli sst2 cola stsb mrpc rte wnli; do
# for task_name in wnli; do
run_hyperparam_search \
${pretrain_dataset_name} \
${task_name} \
${model_set_path} \
${num_gpu}
done
}
main "$@"