-
Notifications
You must be signed in to change notification settings - Fork 55
/
Copy pathrun_specs_full_coarse_600_budget.conf
156 lines (123 loc) · 12.6 KB
/
run_specs_full_coarse_600_budget.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
entries: [
# auto_debugging
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=auto_debugging,subtask=,max_eval_instances=18",priority: 1}
# code_line_description
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=code_line_description,subtask=,max_eval_instances=19",priority: 1}
# conceptual_combinations
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=conceptual_combinations,subtask=contradictions,max_eval_instances=3",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=conceptual_combinations,subtask=emergent_properties,max_eval_instances=3",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=conceptual_combinations,subtask=fanciful_fictional_combinations,max_eval_instances=4",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=conceptual_combinations,subtask=homonyms,max_eval_instances=4",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=conceptual_combinations,subtask=invented_words,max_eval_instances=4",priority: 1}
# emoji_movie
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=emoji_movie,subtask=,max_eval_instances=19",priority: 1}
# formal_fallacies_syllogisms_negation
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=formal_fallacies_syllogisms_negation,subtask=,max_eval_instances=19",priority: 1}
# known_unknowns
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=known_unknowns,subtask=,max_eval_instances=19",priority: 1}
# linguistics_puzzles
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=linguistics_puzzles,subtask=,max_eval_instances=18",priority: 1}
# logic_grid_puzzle
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=logic_grid_puzzle,subtask=,max_eval_instances=18",priority: 1}
# logical_deduction
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=logical_deduction,subtask=three_objects,max_eval_instances=6",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=logical_deduction,subtask=five_objects,max_eval_instances=6",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=logical_deduction,subtask=seven_objects,max_eval_instances=6",priority: 1}
# novel_concepts
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=novel_concepts,subtask=,max_eval_instances=18",priority: 1}
# operator
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=operators,subtask=,max_eval_instances=18",priority: 1}
# play_dialog_same_or_different
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=play_dialog_same_or_different,subtask=,max_eval_instances=18",priority: 1}
# repeat_copy_logic
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=repeat_copy_logic,subtask=,max_eval_instances=18",priority: 1}
# strange_stories
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=strange_stories,subtask=boolean,max_eval_instances=9",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=strange_stories,subtask=multiple_choice,max_eval_instances=9",priority: 1}
# strategyqa
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=strategyqa,subtask=,max_eval_instances=18",priority: 1}
# symbol_interpretation
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=symbol_interpretation,subtask=adversarial,max_eval_instances=3",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=symbol_interpretation,subtask=emoji_agnostic,max_eval_instances=3",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=symbol_interpretation,subtask=name_agnostic,max_eval_instances=4",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=symbol_interpretation,subtask=plain,max_eval_instances=4",priority: 1}
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=symbol_interpretation,subtask=tricky,max_eval_instances=4",priority: 1}
# vitaminc_fact_verification
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=vitaminc_fact_verification,subtask=,max_eval_instances=18",priority: 1}
# winowhy
{description: "big_bench:model=neurips/local,max_train_instances=big_bench_few_shot_setting,task=winowhy,subtask=,max_eval_instances=19",priority: 1}
# medicine_biology
{description: "mmlu:model=neurips/local,subject=anatomy,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=college_medicine,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=college_biology,data_augmentation=canonical,max_eval_instances=5",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_biology,data_augmentation=canonical,max_eval_instances=5",priority: 1}
# computer_science
{description: "mmlu:model=neurips/local,subject=college_computer_science,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_computer_science,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=computer_security,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=electrical_engineering,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=machine_learning,data_augmentation=canonical,max_eval_instances=4",priority: 1}
# math
{description: "mmlu:model=neurips/local,subject=high_school_mathematics,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=college_mathematics,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=abstract_algebra,data_augmentation=canonical,max_eval_instances=5",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_statistics,data_augmentation=canonical,max_eval_instances=5",priority: 1}
# physics_chemistry
{description: "mmlu:model=neurips/local,subject=college_chemistry,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_chemistry,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_physics,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=college_physics,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=astronomy,data_augmentation=canonical,max_eval_instances=4",priority: 1}
# formal_reasoning
{description: "mmlu:model=neurips/local,subject=formal_logic,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=logical_fallacies,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=philosophy,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=moral_disputes,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=moral_scenarios,data_augmentation=canonical,max_eval_instances=4",priority: 1}
# law
{description: "mmlu:model=neurips/local,subject=professional_law,data_augmentation=canonical,max_eval_instances=6",priority: 1}
{description: "mmlu:model=neurips/local,subject=international_law,data_augmentation=canonical,max_eval_instances=6",priority: 1}
{description: "mmlu:model=neurips/local,subject=jurisprudence,data_augmentation=canonical,max_eval_instances=6",priority: 1}
# history
{description: "mmlu:model=neurips/local,subject=high_school_european_history,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_us_history,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_world_history,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=prehistory,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=world_religions,data_augmentation=canonical,max_eval_instances=4",priority: 1}
# business
{description: "mmlu:model=neurips/local,subject=business_ethics,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=global_facts,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=management,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=marketing,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=miscellaneous,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=professional_accounting,data_augmentation=canonical,max_eval_instances=3",priority: 1}
# health
{description: "mmlu:model=neurips/local,subject=nutrition,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=human_aging,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=clinical_knowledge,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=medical_genetics,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=professional_medicine,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=virology,data_augmentation=canonical,max_eval_instances=3",priority: 1}
# social_studies
{description: "mmlu:model=neurips/local,subject=high_school_government_and_politics,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_geography,data_augmentation=canonical,max_eval_instances=3",priority: 1}
{description: "mmlu:model=neurips/local,subject=us_foreign_policy,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=public_relations,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=security_studies,data_augmentation=canonical,max_eval_instances=4",priority: 1}
# human_behavior
{description: "mmlu:model=neurips/local,subject=high_school_psychology,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=human_sexuality,data_augmentation=canonical,max_eval_instances=4",priority: 1}
{description: "mmlu:model=neurips/local,subject=professional_psychology,data_augmentation=canonical,max_eval_instances=5",priority: 1}
{description: "mmlu:model=neurips/local,subject=sociology,data_augmentation=canonical,max_eval_instances=5",priority: 1}
# economics
{description: "mmlu:model=neurips/local,subject=high_school_microeconomics,data_augmentation=canonical,max_eval_instances=6",priority: 1}
{description: "mmlu:model=neurips/local,subject=econometrics,data_augmentation=canonical,max_eval_instances=6",priority: 1}
{description: "mmlu:model=neurips/local,subject=high_school_macroeconomics,data_augmentation=canonical,max_eval_instances=6",priority: 1}
# truthful_qa
{description: "truthful_qa:task=mc_single,model=neurips/local,max_eval_instances=9",priority: 1}
{description: "summarization_cnndm:model=neurips/local,max_eval_instances=9",priority: 1}
# gsm
{description: "gsm:model=neurips/local,max_eval_instances=19",priority: 1}
# bbq
{description: "bbq:subject=all,model=neurips/local,max_eval_instances=18",priority: 1}
]