-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpresident_estimator.py
70 lines (62 loc) · 2.92 KB
/
president_estimator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from persons import Person
import nltk
from nameparser.parser import HumanName
class PresidentEstimator:
def __init__(self):
self.persons_of_interest = []
self.article_persons = []
def getPersonsList(self):
return self.persons_of_interest
def appendList(self, list, person):
return list.append(person)
def sortListDecreasing(self, list):
sorted_list = sorted(list, key=lambda Person: -1 * Person.getOccurence())
return sorted_list
def cropList(self, list, number=3):
cropped_list = list[0:number]
return cropped_list
#merge list_2 into list_1
def mergeLists(self):
for entries_2 in self.article_persons: #checking all existing objects in the list
flag = False
for entries_1 in self.persons_of_interest:
if entries_1.getSurname() == entries_2.getSurname(): #if the surname is in the list increase occurence
flag = True
entries_1.addOccurence(entries_2.getOccurence())
break
if not flag:
self.persons_of_interest.append(entries_2)#persons.append(Person(person[-1]))
self.article_persons = []
self.persons_of_interest = self.sortListDecreasing(self.persons_of_interest)
def getHumanNames(self, text):
tokens = nltk.tokenize.word_tokenize(text)
pos = nltk.pos_tag(tokens)
sentt = nltk.ne_chunk(pos, binary = False)
person = []
name = ""
flag = False
for subtree in sentt.subtrees(filter=lambda t: t.label() == 'PERSON'):
for leaf in subtree.leaves():
person.append(leaf[0])
for part in person:
name += part + ' '
try:
for entries in self.article_persons: #checking all existing objects in the list
if person[-1] == entries.getSurname(): #if the surname is in the list increase occurence
flag = True
entries.increaseOccurence()
break
except TypeError:
flag = False
#do nothing
if not flag:
if len(person) > 1:
self.article_persons.append(Person(person[-1], person[0]))#persons_of_interest.append(Person(person[-1],person[0])) #append new object with prename and surname to the list
else:
self.article_persons.append(Person(person[-1]))#persons.append(Person(person[-1]))
name = ''
flag = False
person = []
self.article_persons = self.sortListDecreasing(self.article_persons)
self.article_persons = self.cropList(self.article_persons)
#the labels gpe and gsp indicate countriers or capitals