-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathentity.py
More file actions
107 lines (93 loc) · 2.59 KB
/
entity.py
File metadata and controls
107 lines (93 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
class Sentence:
def __init__(self, text, tokens):
self.text = text
self.tokens = tokens
self.labels = []
def __str__(self):
return '|'.join(t.text for t in self.tokens)
class Token:
def __init__(self, id, text):
self.id = id
self.text = text
self.label = None
self.label_class = None
def __str__(self):
return self.text
class Mention:
def __init__(self):
pass
class Label:
def __init__(self, id, phrase, assertion, typ, bone, bone_part):
self.id = id
self._phrase = phrase
self.assertion = assertion.lower()
to_list = lambda s: [t for t in s.lower().split('|') if t]
self._typ_list = to_list(typ)
self._bone_list = to_list(bone)
self._bone_part_list = to_list(bone_part)
self.keyword = None
self.typ = []
self.bone = []
self.bone_part = []
# def __init__(self, id, sentence, keyword):
# self.id = id
# self.sentence = sentence
# self.keyword = keyword
# sentence.labels.append(self)
def is_empty(self):
return not self._phrase
def bert_string(self):
lines = []
for token in self.sentence.tokens:
text = token.text
if token.label == self:
if token == self.keyword:
text = '**MASK**'
cls = 'B_' + self.assertion.upper()
else:
cls = 'B_' + token.label_class.upper()
else:
cls = 'O'
lines.append('{}\t{}'.format(text, cls))
return '\n'.join(lines)
def assign_to(self, sentence):
if self.is_empty():
return
def assign_to_token(token, clas):
token.label = self
token.label_class = clas
for token in sentence.tokens:
if token.label or not token.text:
continue
text = token.text.lower()
if text == self._phrase.lower():
assign_to_token(token, 'keyword')
self.keyword = token
break
if not self.keyword:
print('Keyword not found!', sentence, self._phrase)
return
sentence.labels.append(self)
self.sentence = sentence
for token in sentence.tokens:
if token.label or not token.text:
continue
text = token.text.lower()
if text in self._typ_list:
assign_to_token(token, 'type')
self.typ.append(token)
self._typ_list.remove(text)
elif text in self._bone_list:
assign_to_token(token, 'bone')
self.bone.append(token)
self._bone_list.remove(text)
elif text in self._bone_part_list:
assign_to_token(token, 'bone_part')
self.bone_part.append(token)
self._bone_part_list.remove(text)
if self._typ_list:
print('Type not complete!', sentence, self._typ_list)
if self._bone_list:
print('Bone not complete!', sentence, self._bone_list)
if self._bone_part_list:
print('Bone part not complete!', sentence, self._bone_part_list)