-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathrun.sh
More file actions
executable file
·92 lines (84 loc) · 2.42 KB
/
run.sh
File metadata and controls
executable file
·92 lines (84 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash
# OctoBench 评估流水线
# 用法: ./run.sh [--model MODEL] [--dataset DATASET] [--skip-eval]
set -e
# 默认参数
MODEL="claude-sonnet-4-5-20250929"
DATASET="MiniMaxAI/OctoCodingBench"
SKIP_EVAL=false
RESULTS_DIR="./results"
TRAJECTORIES_DIR="${RESULTS_DIR}/trajectories"
MERGED_FILE="${RESULTS_DIR}/merged_trajectories.jsonl"
SCORES_FILE="${RESULTS_DIR}/scores.json"
# 解析命令行参数
while [[ $# -gt 0 ]]; do
case $1 in
--model)
MODEL="$2"
shift 2
;;
--dataset)
DATASET="$2"
shift 2
;;
--skip-eval)
SKIP_EVAL=true
shift
;;
--help|-h)
echo "用法: ./run.sh [选项]"
echo ""
echo "选项:"
echo " --model MODEL 指定评估模型 (如 claude-sonnet-4-5-20250929)"
echo " --dataset DATASET 数据集路径或 HuggingFace ID"
echo " --skip-eval 跳过评估步骤,只运行任务和轨迹处理"
echo " -h, --help 显示帮助信息"
exit 0
;;
*)
echo "未知参数: $1"
exit 1
;;
esac
done
echo "=============================================="
echo "OctoBench 评估流水线"
echo "=============================================="
echo "数据集: ${DATASET}"
echo "模型: ${MODEL}"
echo "结果目录: ${RESULTS_DIR}"
echo "=============================================="
# Step 1: 运行 Benchmark
echo ""
echo "[Step 1/3] 运行 Benchmark..."
if [ -n "$MODEL" ]; then
python benchmark_runner.py --dataset "$DATASET" --model "$MODEL"
else
python benchmark_runner.py --dataset "$DATASET"
fi
# Step 2: 轨迹处理
echo ""
echo "[Step 2/3] 处理轨迹..."
python convert/convert_cc_traj_to_msg.py \
--input_path "$TRAJECTORIES_DIR" \
--output_path "$MERGED_FILE"
# Step 3: 评估 (可选)
if [ "$SKIP_EVAL" = false ]; then
echo ""
echo "[Step 3/3] 评估轨迹..."
python evaluate.py \
--trajectories "$MERGED_FILE" \
--data "$DATASET" \
--output "$SCORES_FILE"
else
echo ""
echo "[Step 3/3] 跳过评估"
fi
echo ""
echo "=============================================="
echo "完成!"
echo "轨迹文件: ${MERGED_FILE}"
if [ "$SKIP_EVAL" = false ]; then
echo "评估结果: ${SCORES_FILE}"
fi
echo "=============================================="