GRiD/eval_think_GPQA.sh at main · cure-lab/GRiD · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
export NCCL_P2P_LEVEL=NVL
export ACCELERATE_USE_FSDP=False
export CUDA_VISIBLE_DEVICES=3

BEAM_SIZE=1
data_format=think # cot/direct/think
base_path=/local_data/xywen22/project/train_with_sftrainer/ckpts
DATASET=GPQA
# Model_Series=Qwen
Model_Series=meta-llama
# Model_name=Qwen2.5-14B-Instruct
Model_name=Llama-3.1-8B-Instruct
# checkpoint_NAME_prefix="lr=5e-05-dependency-packing=True"
# checkpoint_NAME_suffix="augmentModel=None-randomselected-gpt4o-futher_filtered-addtoken=False-load_in_4bit=True-lora_dropout=0.01-train_response_only=False-N_epochs=20"
# checkpoint_NAME=$checkpoint_NAME_prefix-$checkpoint_NAME_suffix
checkpoint_NAME='lr=1e-4-creator=ds-augment=None-random_select=False-further_filter=False-filtered=False-load_in_4bit=False-response_only=False-N_epochs=10-unsloth=False-special_token=False-lora=all-linear'
checkpoint_id="checkpoint-150"
LOG_FILE_NAME=${checkpoint_NAME}_${checkpoint_id}

python eval_think.py \
    --beam $BEAM_SIZE \
    --ckpt $base_path/$data_format/$Model_Series/$Model_name/$DATASET/$checkpoint_NAME/$checkpoint_id \
    --data dataset/$DATASET/$data_format/gpqa_diamond-deepseek-r1-250120-think.json \
    --log_path eval_results/baseline_$data_format/$DATASET/$Model_Series/${Model_name}-${LOG_FILE_NAME}.json \
    --load_in_4bit false \
    # 2>&1 | tee eval_results/${LOG_FILE_NAME}.txt