-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathscore_foundation.py
89 lines (78 loc) · 3.12 KB
/
score_foundation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import json
input_file = 'Foundation_result_modelx.jsonl'
fail_num = 0
task_id_list = []
total_num_dict = {}
correct_num_dict = {}
with open(input_file, 'r') as fp:
for data in fp:
line = json.loads(data)
task_name = line['task_name']
dataset_name = line['dataset_name']
if task_name == None:
print("1.task_name is None")
continue
task_id = task_name + '_' + dataset_name
if task_id not in task_id_list:
task_id_list.append(task_id)
total_num = total_num_dict.get(task_id, 0)
correct_num = correct_num_dict.get(task_id, 0)
predict = line['response'].strip().replace('\n', '')
if predict != 'None' and predict:
if predict[0] == 'A' or predict[0] == 'B' or predict[0] == 'C' or predict[0] == 'D':
gpt_predict = predict[0]
if line['answer_gt'] == line['choice_a']:
gt = 'A'
elif line['answer_gt'] == line['choice_b']:
gt = 'B'
elif line['answer_gt'] == line.get('choice_c', None):
gt = 'C'
elif line['answer_gt'] == line.get('choice_d', None):
gt = 'D'
else:
print('???? gt_answer is: ', end='')
print(line['answer_gt'])
exit(1)
#This situation may occur when the answer given by gpt is "The answer is A."
elif len(predict) > 1:
if predict[-2] == 'A' or predict[-2] == 'B' or predict[-2] == 'C' or predict[-2] == 'D':
gpt_predict = predict[-2]
if line['answer_gt'] == line['choice_a']:
gt = 'A'
elif line['answer_gt'] == line['choice_b']:
gt = 'B'
elif line['answer_gt'] == line.get('choice_c', None):
gt = 'C'
elif line['answer_gt'] == line.get('choice_d', None):
gt = 'D'
else:
print('???? gt_answer is: ', end='')
print(line['answer_gt'])
exit(1)
else:
print(f'response is {predict}')
fail_num += 1
continue
else:
print(f'response is {predict}')
fail_num += 1
continue
if gt == gpt_predict:
total_num += 1
correct_num += 1
else:
total_num += 1
total_num_dict[task_id] = total_num
correct_num_dict[task_id] = correct_num
else:
print('2.Response is None.')
fail_num += 1
total_sum = 0
for task_id in task_id_list:
total_num = total_num_dict[task_id]
correct_num = correct_num_dict[task_id]
acc = correct_num / total_num
total_sum += total_num
print(f'{task_id}: Sum={total_num}, correct={correct_num}, acc={acc}')
print(f'total_sum: {total_sum}')
print(f'fail_num: {fail_num}')