-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
89 lines (74 loc) · 3.41 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import nltk
import pandas as pd
from nltk.translate.bleu_score import SmoothingFunction
smoothie = SmoothingFunction().method4
import numpy as np
path_to_reference = 'Dataset/COCOcaptions.txt' # df -> image_id:str caption:str len(5000)
path_to_model = 'model/Decoder/Generated_Captions.txt'
with open(path_to_model) as f:
model_data = f.readlines()
model_filenames = [caps.split('\t')[0] for caps in model_data]
model_captions = [caps.replace('\n', '').split('\t')[1] for caps in model_data]
with open(path_to_reference, 'r') as f:
ref_data = f.readlines()
reference_filenames = [caps.split('\t')[0].split('#')[0] for caps in ref_data]
reference_captions = [caps.replace('\n', '').split('\t')[1] for caps in ref_data]
df = pd.DataFrame()
df['image'] = reference_filenames
df['caption'] = reference_captions
# df.caption = df.caption.str.decode('utf').str.split()
df.caption = df.caption.str.split()
df = pd.DataFrame(data={'image': list(df.image.unique()), 'caption': list(df.groupby('image')['caption'].apply(list))})[
:len(model_captions)]
bleu1_scores = []
bleu2_scores = []
bleu3_scores = []
bleu4_scores = []
meteor_scores = []
index1 = None
index2 = None
for i, row in df.iterrows():
model = model_captions[i].split()
reference = row.caption
try:
score1 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[1.0])
score2 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[0.5, 0.5])
score3 = nltk.translate.bleu_score.sentence_bleu(reference, model,
weights=[1.0 / 3, 1.0 / 3, 1 - 2 * (1.0 / 3)])
score4 = nltk.translate.bleu_score.sentence_bleu(reference, model)
bleu1_scores.append(score1)
bleu2_scores.append(score2)
bleu3_scores.append(score3)
bleu4_scores.append(score4)
if i % 10000 == 0 and i != 0:
print(str((float(i) / df.shape[0]) * 100) + "% done")
except:
index1 = df.index[i]
index2 = i
print("Invalid Caption Generated for: ", model_filenames[i])
# print("\nMean Sentence-Level BLEU-1 score: ", np.mean(bleu1_scores))
# print("Mean Sentence-Level BLEU-2 score: ", np.mean(bleu2_scores))
# print("Mean Sentence-Level BLEU-3 score: ", np.mean(bleu3_scores))
# print("Mean Sentence-Level BLEU-4 score: ", np.mean(bleu4_scores))
# print("Meteor Sentence-Level score: ", np.mean(meteor_scores))
# score_meteor = nltk.translate.meteor_score.meteor_score(reference_captions, model_captions)
# print("Corpus-Level METEOR score: ", score_meteor)
weights1=[1,0,0,0]
weights2 = [0.5, 0.5, 0, 0]
weights3 = [0.3, 0.3, 0.3, 0]
weights4 = [0.25, 0.25, 0.25, 0.25]
if index1 and index2:
df = df.drop([index1])
df = df.reset_index(drop=True)
del model_captions[index2]
references = df.caption
model_captions = [caption.split() for caption in model_captions]
score1 = nltk.translate.bleu_score.corpus_bleu(references, model_captions, weights1)
print("\n\nCorpus-Level BLEU-1 score: ", score1)
score2 = nltk.translate.bleu_score.corpus_bleu(references, model_captions, weights2)
print("Corpus-Level BLEU-2 score: ", score2)
score3 = nltk.translate.bleu_score.corpus_bleu(references, model_captions, weights3)
print("Corpus-Level BLEU-3 score: ", score3)
score4 = nltk.translate.bleu_score.corpus_bleu(references, model_captions, weights4)
print("Corpus-Level BLEU-4 score: ", score4)
# unigrams to 4 grams weights=[0.5, 0.5]