-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathargumentor.py
162 lines (138 loc) · 5.7 KB
/
argumentor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/python
# -*- coding: utf-8 -*-
from argument_BERT.preprocessing import data_loader, preparator
from argument_BERT.preprocessing import model_builder
from argument_BERT.preprocessing import data_builder, data_saver
import numpy as np
import pandas as pd
from bert_embedding import BertEmbedding
from keras.models import load_model
from datetime import datetime
from nltk.tokenize import sent_tokenize
def proposition_identification(text):
"""Find argument propositions in text"""
sents = sent_tokenize(text)
props = []
for i in range(len(sents)):
next_prop = {'id': ('T' + str(i)),
'text': sents[i],
'relations': []}
props.append(next_prop)
return props
def proposition_type(props, full_text, model_path, verbose=1):
"""Identifies proposition types"""
texts = [prop['text'] for prop in props]
ADUs, confs = predict_type(texts, full_text, model_path, verbose=verbose)
for i in range(len(props)):
ADU_dict = {0: 'premise', 1: 'claim', 2: 'conclusion'}
props[i]['ADU'] = {'type': ADU_dict[ADUs[i]],
'confidence': str(confs[i])}
return props
def proposition_position(props, text):
"""Add proposition start and end prefixes"""
for i in range(len(props)):
prop_text = props[i]['text']
s = text.find(prop_text)
f = s + len(prop_text) - 1
props[i]['textPosition'] = {'start': str(s),
'end': str(f)}
return props
def relation_detection(props, text, model_path, verbose=1):
"""Identifies relations between propositions"""
arg1s = []
arg1sID = []
arg2s = []
arg2sID = []
for i in range(len(props)):
for j in range(i+1, len(props)):
arg1s.append(props[i]['text'])
arg2s.append(props[j]['text'])
arg1sID.append(i)
arg2sID.append(j)
preds, confs = predict_relation(arg1s, arg2s, text, model_path,
verbose=verbose)
for i in range(len(preds)):
if preds[i] != 0:
arg1ID = arg1sID[i]
arg2ID = arg2sID[i]
relID = 'RT' + str(arg1ID) + '-T' + str(arg2ID)
tyB = preds[i] - 1
tyStr = 'Default Inference' if tyB == 0 else 'Default Conflict'
relation = {'id': relID,
'typeBinary': str(tyB),
'type': tyStr,
'partnerID': props[arg2ID]['id'],
'confidence': str(confs[i])}
props[arg1ID]['relations'].append(relation)
return props
def predict_type(list_of_props, full_text, model_path, verbose=1):
"""Model prediction for proposition types"""
chances = predictor(model_path, list_of_props,
ADU=True, fullText=full_text,
verbose=verbose)
predictions = np.argmax(chances, axis=1)
confidences = np.max(chances, axis=1)
return predictions, confidences
def predict_relation(arg1, arg2, full_text, model_path, verbose=1):
"""Model prediction for relation types"""
chances = predictor(model_path, arg1, arg2,
ADU=False, fullText=full_text,
verbose=verbose)
predictions = np.argmax(chances, axis=1)
confidences = np.max(chances, axis=1)
return predictions, confidences
def predictor(model_path,
arg1,
arg2=None,
originalArg1=None,
originalArg2=None,
ADU=False,
verbose=1,
fullText=None):
"""Generates model readable data from propositions to predict"""
if verbose > 0:
print('Start loading resources...')
model = load_model(model_path)
bert_embedding = BertEmbedding(model='bert_12_768_12',
dataset_name='book_corpus_wiki_en_cased',
max_seq_length=35)
if verbose > 0:
print('Generate prediction...')
fulls = [fullText.replace('\n', ' ')] * len(arg1)
data = data_builder.generate_data_with_features(arg1, arg2, originalArg1,
originalArg2, fulls,
bert_embedding)
if not ADU:
features = model_builder.select_FFNN_features(
data, shared_feature_list=None, original_bert=True)
prediction = model.predict(features)
if verbose > 0:
print(prediction)
return prediction
else:
features = model_builder.select_FFNN_features(
data, shared_feature_list=None, original_bert=True, has_2=False)
prediction = model.predict(features)
if verbose > 0:
print(prediction)
return prediction
def argumentor(text, adu_model, relation_model, corpus_name="NoName",
out_filename="out.json", verbose=1):
"""Proposition identification, proposition type classification
and relation detection pipeline.
Input:
text: text of the argumentation
adu_model: path of the proposition type predictor model
relation_model: path of the relation detector model
corpus_name: name of the file's original corpus
out_filename: file to store the results. .json or .xml
verbose: more than 0 for follow-up texts
"""
props = proposition_identification(text)
props = proposition_position(props, text)
props = proposition_type(props, text, adu_model, verbose=verbose)
props = relation_detection(props, text, relation_model, verbose=verbose)
data = {'propositions': props,
'originalText': text,
'corpus': corpus_name}
data_saver.save2file(data, out_filename)