-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathexample_chunk_refining.sh
78 lines (67 loc) · 2.06 KB
/
example_chunk_refining.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
#SBATCH --job-name=prox_chunk_refining_xs
#SBATCH --output=<expected_output_file>
#SBATCH --partition=<your_partition>
#SBATCH --error=<expected_error_file>
#SBATCH --time=50:00:00
#SBATCH --nodes=8
#SBATCH --gres=gpu:8
#SBATCH --cpus-per-task=32
# setup env
chmod +x setup_personal_env.sh
chmod +x setup_common_env.sh
source setup_personal_env.sh
source setup_common_env.sh
# activate conda env
source $TINYLM_CONDA_DIR
conda activate refining
# enter working dir
cd $TINYLM_WORK_DIR
export NNODE=8
export NGPU=8
export TOTAL_SPLIT=$((NNODE*NGPU))
cmd="
for i in \$(seq 0 \$((NGPU-1))); do
TOTAL_SPLIT=$TOTAL_SPLIT \\
NODE_GPUS=$NGPU \\
NODE_RANK=\$SLURM_NODEID \\
CUDA_VISIBLE_DEVICES=\$i \\
python -m data_gen.tasks.apply_chunk_refining \\
--data_format parquet \\
--limit -1 \\
--model_path gair-prox/web-chunk-refining-lm \\
--config_path data_gen/configs/apply_chunk_refining.yaml \\
> ./logging/apply_chunk_refining_\${SLURM_NODEID}_\${i}.log 2>&1 &
done
wait
"
echo "Executing command:"
echo "$cmd"
srun bash -c "$cmd"
# # ****************************************************
# # scripts for single node: (debug)
# # ****************************************************
# # setup env
# chmod +x setup_personal_env.sh
# chmod +x setup_common_env.sh
# source setup_personal_env.sh
# source setup_common_env.sh
# # activate conda env
# source $TINYLM_CONDA_DIR
# conda activate llama_factory
# # enter working dir
# cd $TINYLM_WORK_DIR
# export NNODE=1
# export NGPU=1
# # total split (int) = nnode * ngpu, write in shell expression
# export TOTAL_SPLIT=$((NNODE*NGPU))
# export SLURM_NODEID=0
# for i in $(seq 0 $((NGPU-1))); do
# TOTAL_SPLIT=$TOTAL_SPLIT NODE_GPUS=$NGPU NODE_RANK=$SLURM_NODEID CUDA_VISIBLE_DEVICES=$i \
# python -m data_gen.tasks.apply_chunk_refining \
# --data_format parquet \
# --limit 1000 \
# --model_path gair-prox/chunk_refining_web_lm \
# --config_path data_gen/configs/apply_chunk_refining.yaml \
# > ./logging/apply_chunk_refining_${SLURM_NODEID}_${i}.log &
# done