GRiD/eval_think-reason_StrategyQA.sh at main · cure-lab/GRiD · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
export NCCL_P2P_LEVEL=NVL
export ACCELERATE_USE_FSDP=False
export CUDA_VISIBLE_DEVICES=2

BEAM_SIZE=1
data_format=think-reason # cot/direct/think
base_path=/local_data/xywen22/project/train_with_sftrainer/ckpts
DATASET=StrategyQA
# Model_Series=Qwen
# Model_Series=meta-llama
Model_Series=deepseek-ai
# Model_name=Qwen2.5-14B-Instruct
# Model_name=Llama-3.1-8B-Instruct
Model_name=DeepSeek-R1-Distill-Qwen-14B
# checkpoint_NAME_prefix="lr=5e-05-dependency-packing=True"
# checkpoint_NAME_suffix="augmentModel=None-randomselected-gpt4o-futher_filtered-addtoken=False-load_in_4bit=True-lora_dropout=0.01-train_response_only=False-N_epochs=20"
# checkpoint_NAME=$checkpoint_NAME_prefix-$checkpoint_NAME_suffix
checkpoint_NAME='lr=1e-4-creator=ds-augment=None-random_select=True-further_filter=False-filtered=False-load_in_4bit=False-response_only=False-N_epochs=15-unsloth=False-special_token=False-lora=all-linear'
checkpoint_id="checkpoint-645"
LOG_FILE_NAME=${checkpoint_NAME}_${checkpoint_id}

python eval_think-reason.py \
    --beam $BEAM_SIZE \
    --ckpt $base_path/$data_format/$Model_Series/$Model_name/$DATASET/$checkpoint_NAME/$checkpoint_id \
    --data dataset/$DATASET/$data_format/strategyqa-ds-combine-think-random-test.json \
    --log_path eval_results/$data_format/$DATASET/$Model_Series/${Model_name}-${LOG_FILE_NAME}.json \
    --load_in_4bit false \
    # 2>&1 | tee eval_results/${LOG_FILE_NAME}.txt