-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanswer_ranking.py
executable file
·116 lines (105 loc) · 3.62 KB
/
answer_ranking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import ner
import util
import nltk
import pre_process as pp
# The answer ranking combining the three rules
def answer_ranking(sentences, answer_sent_ids, query):
stopwords = set(nltk.corpus.stopwords.words('english'))
sentence = ''
entities = []
answer = ''
sen_id = 0
for id in answer_sent_ids:
entities = ner.get_entities(id[0])
label = 0
if entities:
for entity in entities:
for k, v in entity.items():
if k.lower() not in stopwords:
label = 1
if label == 1:
sen_id = id[0]
sentence = sentences[sen_id]
break
# print(sentence)
# print(entities)
ques_type = ner.question_type(query)
# print(ques_type)
tagged_sent = ner.tagged_sents[sen_id]
entity_dict = ner.get_trans_entities(tagged_sent)
entity_score = {}
score_3 = closer_to_open(sentence, sen_id, query)
for entity in entities:
for k, v in entity.items():
entity_score[k] = 0
for entity in entities:
for k, v in entity.items():
if entity_score[k] == 0:
entity_score[k] += rule_one(k, query)
entity_score[k] += score_3[k]
if v == ques_type:
entity_score[k] += 1
value = sorted(entity_score.values())
# print(entity_score)
if entity_score != {}:
for k, v in entity_score.items():
if v == value[len(entity_score) - 1]:
answer = k
return answer
else:
return "not sure"
# Rule 1
# Answers whose content words all appear in the question should be ranked lowest
def rule_one(entity, query):
score = 0
entity = pp.lemmatize(entity.lower())
if entity not in pp.stopwords and entity not in query:
score = 1
else:
score = 0
return score
# Rule 2
# Answers which match the question type should be ranked higher than those that don't
def match_question_type(entities, question):
question_type = ner.question_type(question)
score = 0
for entity in entities:
for x, y in entity.items():
if question_type == y:
# if question_type == 'OTHER':
# score = 0.5
# else:
score = 1
return score
# Rule 3
# Among entities of the same type,
# the preferred entity should be the one which is closer in the sentence to an open-class word from the question
def find_open_class_words(sentence):
open_class_words = {}
for i in range(len(sentence)):
if sentence[i].lower() not in pp.stopwords and sentence[i] not in pp.punctuations:
open_class_words[sentence[i]] = i
return open_class_words
# Input is the question and entities, output is the entity score
def closer_to_open(sentence, answer_sent_id, query):
entities = ner.get_entities(answer_sent_id)
sen = find_open_class_words(sentence.split())
entity_score = {}
total_score = 0
avg_score = 0
for entity in entities:
for k,v in entity.items():
entity_score[k] = 0
for entity in entities:
for k, v in entity.items():
for l, x in sen.items():
if pp.lemmatize(l) in query:
if k.split()[0] in sen.keys():
entity_score[k] += abs(sen[k.split()[0]] - x)
for k, v in entity_score.items():
total_score += v
if len(entities) != 0:
avg_score = total_score / len(entities)
for k, v in entity_score.items():
entity_score[k] = util.sigmoid(avg_score - entity_score[k])
return entity_score