-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexperiment.py
435 lines (347 loc) · 18.8 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
import wandb
import torch
import copy
from tqdm.auto import tqdm
from torch import nn, optim
from torch.utils.data import DataLoader
import pandas as pd
from models import AMCNN
from baseline import BaselineExperiment
from models import BiGRU, TextCNN, TransformerClassifier
from utils import init_weights, removeObjectiveSents, load_pretrained_vectors
from settings import *
from data_processing import Lang, CustomDataset, TransformerDataset
from nltk.corpus import movie_reviews, subjectivity
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score
from torchtext.vocab import GloVe
class Experiment:
def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None, **kwargs):
self.model_config = None
self.ModelType = None
self.train_loader = None
self.test_loader = None
self.optimizer = None
self.cost_fn = None
self.task = task
self.data_raw = None
self.data_Y = None
self.lang = None
self.sjv_classifier = sjv_classifier
self.sjv_vectorizer = sjv_vectorizer
def prepare_data(self):
if self.task == "polarity":
# Load movie review dataset
negative_fileids = movie_reviews.fileids('neg')
positive_fileids = movie_reviews.fileids('pos')
mr_neg = [{"document": list(movie_reviews.words(fileids=fileid)), "label": 0} for fileid in negative_fileids]
mr_Y_neg = [0]*len(mr_neg)
mr_pos = [{"document": list(movie_reviews.words(fileids=fileid)), "label": 1} for fileid in positive_fileids]
mr_Y_pos = [1]*len(mr_pos)
self.data_raw = mr_neg+mr_pos
self.data_Y = mr_Y_neg + mr_Y_pos
print("Total samples: ", len(self.data_raw))
elif self.task == "subjectivity":
obj_fileid = subjectivity.fileids()[0] # plot.tok.gt9.5000
subj_fileid = subjectivity.fileids()[1] # quote.tok.gt9.5000
obj_sents = subjectivity.sents(fileids=obj_fileid)
subj_sents = subjectivity.sents(fileids=subj_fileid)
self.data_raw = [{"document": sent, "label": 0} for sent in obj_sents]
self.data_Y = [0]*len(obj_sents)
self.data_raw += [{"document": sent, "label": 1} for sent in subj_sents]
self.data_Y += [1]*len(subj_sents)
print("Total samples: ", len(self.data_raw))
elif (self.task == "polarity-filter"
and self.sjv_classifier is not None
and self.sjv_vectorizer is not None
):
# get docs divided in sentences
negative_fileids = movie_reviews.fileids('neg')
positive_fileids = movie_reviews.fileids('pos')
neg_docs_sents = [movie_reviews.sents(fileids=fileid) for fileid in negative_fileids]
pos_docs_sents = [movie_reviews.sents(fileids=fileid) for fileid in positive_fileids]
mr_docs_sents = neg_docs_sents + pos_docs_sents
mr_sents = [" ".join(sent) for doc in mr_docs_sents for sent in doc]
# shallow subjectivity classifier is used to allow comparisons
movie_sjv_vectors = self.sjv_vectorizer.transform(mr_sents)
pred = self.sjv_classifier.predict(movie_sjv_vectors)
clean_mr = removeObjectiveSents(mr_docs_sents, pred, tokenized=True)
mr_neg = [{"document": doc, "label": 0} for doc in clean_mr[:1000]]
mr_Y_neg = [0]*len(mr_neg)
mr_pos = [{"document": doc, "label": 1} for doc in clean_mr[1000:]]
mr_Y_pos = [1]*len(mr_pos)
self.data_raw = mr_neg+mr_pos
self.data_Y = mr_Y_neg + mr_Y_pos
print("Total samples: ", len(self.data_raw))
else:
print("Cannot prepare data. Wrong parameters.")
exit()
def create_folds(self, n_folds=N_FOLDS):
skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=RANDOM_SEED)
self.folds_idxs = []
for idxs in skf.split(self.data_raw, self.data_Y):
self.folds_idxs.append(idxs)
def create_dataloaders(self, fold_idx=0):
# indexes = [i for i in range(len(self.data_raw))]
# train, test, _, _, idx_tr, idx_ts = train_test_split(self.data_raw, self.data_Y, indexes, test_size=TRAIN_TEST_SPLIT,
# random_state=RANDOM_SEED,
# shuffle=True,
# stratify=self.data_Y)
print("Create dataloaders")
# get train and test indexes for this fold
idx_tr, idx_ts = self.folds_idxs[fold_idx] # is a tuple
train, test = [self.data_raw[idx] for idx in idx_tr], [self.data_raw[idx] for idx in idx_ts]
# save indexes
df_idx_tr = pd.DataFrame(idx_tr)
df_idx_ts = pd.DataFrame(idx_ts)
pe_string = "_pe" if self.model_config.get("pretrained_embeddings") else ""
truncation_string = self.model_config.get("truncation_strategy", "")
df_idx_tr.to_csv(f"indexes/{self.model_config['model_name']}_{self.task}{pe_string}{truncation_string}_train_{fold_idx:02d}.csv")
df_idx_ts.to_csv(f"indexes/{self.model_config['model_name']}_{self.task}{pe_string}{truncation_string}_test_{fold_idx:02d}.csv")
words = [word for sample in train for word in sample["document"]]
self.lang = Lang(words)
max_len = self.model_config.get("sequence_max_len")
train_dataset = CustomDataset(train, self.lang, max_len=max_len)
test_dataset = CustomDataset(test, self.lang, max_len=max_len)
self.train_loader = DataLoader(train_dataset, batch_size=self.model_config["batch_size"], collate_fn=train_dataset.collate_fn, shuffle=True)
self.test_loader = DataLoader(test_dataset, batch_size=self.model_config["batch_size"], collate_fn=test_dataset.collate_fn)
def run(self):
self.prepare_data()
self.create_folds(N_FOLDS)
models = []
metrics_list = []
for fold_idx in range(N_FOLDS):
self.create_dataloaders(fold_idx)
if self.lang:
vocab_size = len(self.lang.word2id)
self.model_config["vocab_size"] = vocab_size # save vocab size to load model for inference
model = self.ModelType(vocab_size, self.model_config)
if self.model_config["pretrained_embeddings"]:
print("Loading pretrained word embeddings")
embds = GloVe(name='840B', dim=300)
embeddings = torch.tensor(load_pretrained_vectors(self.lang.word2id, embds), dtype=torch.float)
model.embedding = nn.Embedding.from_pretrained(embeddings, padding_idx=PAD_TOKEN)
else:
model = self.ModelType(self.model_config)
model.to(DEVICE)
pe_string = "_pe" if self.model_config.get("pretrained_embeddings") else ""
truncation_string = self.model_config.get("truncation_strategy", "")
run = wandb.init(
project=WANDB_PROJECT,
entity=WANDB_ENTITY,
group=f"{self.model_config['model_name']}",
name=f"{self.task}_{self.model_config['model_name']}{pe_string}{truncation_string}_fold_{fold_idx:02d}",
config={
"task": self.task,
**self.model_config,
"loss": "BCELoss",
"optimizer": "Adam"
}
)
print(model)
wandb.watch(model, "gradients", log_freq=5)
self.optimizer = optim.Adam(model.parameters(), lr=run.config['lr'])
self.cost_fn = torch.nn.BCEWithLogitsLoss()
best_model, metrics = self.training_loop(model, self.train_loader, self.test_loader, run)
models.append(best_model)
metrics_list.append(metrics)
# print average and std for metrics
metrics_df = pd.DataFrame.from_dict(metrics_list)
metrics_df.loc["mean"] = metrics_df[:N_FOLDS].mean()
metrics_df.loc["std"] = metrics_df[:N_FOLDS].std()
print(metrics_df)
pe_string = "_pe" if self.model_config.get("pretrained_embeddings") else ""
truncation_string = self.model_config.get("truncation_strategy", "")
metrics_df.to_csv(f"{STATS_SAVE_PATH}/{self.task}/{self.model_config['model_name']}_{self.task}{pe_string}{truncation_string}.csv")
best_model_overall_idx = metrics_df["acc"].idxmax()
return models[best_model_overall_idx]
def training_loop(self, model, tr_dl, ts_dl, wandb_run, save=True):
print(f"Runnig: {wandb_run.name}")
# Check if model is pretrained to avoid initializing weights
if not wandb_run.config.get("pretrained"):
print("Model is not pretrained: initializing weigths.")
model.apply(init_weights)
optimizer = self.optimizer
cost_fn = self.cost_fn
best_loss = 0.
best_acc = 0.
print("Start training")
for e in tqdm(range(wandb_run.config['epochs']), desc="Training Loop"):
train_metrics = self.training_step(model, tr_dl, optimizer, cost_fn, clip=wandb_run.config["clip_gradients"], epoch=e)
test_metrics = self.test_step(model, ts_dl, cost_fn, epoch=e)
metrics = {**train_metrics, **test_metrics}
wandb.log(metrics)
train_loss = train_metrics['train/train_loss']
train_acc = train_metrics['train/train_acc']
test_loss = test_metrics['test/test_loss']
test_acc = test_metrics['test/test_acc']
test_f1 = test_metrics['test/test_f1']
if best_acc < test_acc or e == 0:
best_acc = test_acc
best_loss = test_loss
best_f1 = test_f1
best_model = copy.deepcopy(model)
# Save new best weights
if save:
self.save_weights(e, model, optimizer, test_loss, f"{WEIGHTS_SAVE_PATH}/{wandb_run.name}.pth")
artifact = wandb.Artifact(f'{wandb_run.name}', type='model', metadata={**wandb_run.config, **metrics})
artifact.add_file(f"{WEIGHTS_SAVE_PATH}/{wandb_run.name}.pth")
wandb_run.log_artifact(artifact)
print('\n Epoch: {:d}'.format(e + 1))
print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_acc))
print('\t Test loss {:.5f}, Test accuracy {:.2f}, Test F1 {:.2f}'.format(test_loss, test_acc, test_f1))
print('-----------------------------------------------------')
#visualize(best_model, ts_dl, wandb_run)
print('\t BEST Test loss {:.5f}, Test accuracy {:.2f}, Test F1 {:.2f}'.format(best_loss, best_acc, best_f1))
wandb.summary["test_best_loss"] = best_loss
wandb.summary["test_best_accuracy"] = best_acc
wandb.summary["test_best_f1"] = best_f1
wandb.finish()
best_metrics = {"loss": best_loss, "acc": best_acc, "f1": best_f1}
return best_model, best_metrics
def training_step(self, model, data_loader, optimizer, cost_function, clip=0, epoch=0):
n_samples = 0
cumulative_loss = 0.
cumulative_accuracy = 0.
model.train()
for batch_idx, (inputs, targets) in enumerate(tqdm(data_loader, desc="Training Step", leave=False)):
for k in inputs.keys():
inputs[k] = inputs[k].to(DEVICE)
targets = targets.to(DEVICE)
outputs = model(inputs)
# this is to deal with models returning logits and attention scores, ignoring the latter
if type(outputs) is tuple:
outputs, *_ = outputs
loss = cost_function(outputs, targets.unsqueeze(-1).float())
loss.backward()
if clip != 0:
nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
optimizer.zero_grad()
# add batch size
n_samples += outputs.shape[0]
# cumulative loss
cumulative_loss += loss.item()
# return predicted labels
predicted = torch.sigmoid(outputs).round()
# cumulative accuracy
cumulative_accuracy += predicted.eq(targets.unsqueeze(-1)).sum().item()
# avg loss and accuracy
loss = cumulative_loss / n_samples
acc = cumulative_accuracy / n_samples
metrics = {
"train/train_loss": loss,
"train/train_acc": acc
}
return metrics
def test_step(self, model, data_loader, cost_function, epoch=0):
n_samples = 0
cumulative_loss = 0.
cumulative_accuracy = 0.
model.eval()
y_gt = []
y_pred = []
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(tqdm(data_loader, desc="Test Step", leave=False)):
for k in inputs.keys():
inputs[k] = inputs[k].to(DEVICE)
targets = targets.to(DEVICE)
outputs = model(inputs)
# this is to deal with models returning logits and attention scores, ignoring the latter
if type(outputs) is tuple:
outputs, *_ = outputs
loss = cost_function(outputs, targets.unsqueeze(-1).float())
# add batch size
n_samples += outputs.shape[0]
# cumulative loss
cumulative_loss += loss.item()
# return predicted labels
predicted = torch.sigmoid(outputs).round()
y_pred += predicted.tolist()
y_gt += targets.unsqueeze(-1).float().tolist()
# cumulative accuracy
cumulative_accuracy += predicted.eq(targets.unsqueeze(-1)).sum().item()
# avg loss and accuracy
loss = cumulative_loss / n_samples
acc = cumulative_accuracy / n_samples
f1 = f1_score(y_gt, y_pred)
metrics = {
"test/test_loss": loss,
"test/test_acc": acc,
"test/test_f1": f1
}
return metrics
def save_weights(self, epoch, model, optimizer, loss, path, scheduler=None):
save_dict = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler': scheduler.state_dict() if scheduler is not None else None
}
torch.save(save_dict, path)
def load_weights(self, model, optimizer, weights_path, DEVICE, scheduler=None):
checkpoint = torch.load(weights_path, map_location=DEVICE)
epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
if scheduler != None:
scheduler.load_state_dict(checkpoint['scheduler'])
return epoch, model, optimizer, scheduler
class TransformerExperiment(Experiment):
def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None, **kwargs):
super().__init__(task, sjv_classifier, sjv_vectorizer)
self.model_config = Transformer_config
self.model_config["truncation_strategy"] = kwargs["truncation_strategy"]
self.ModelType = TransformerClassifier
if task == "polarity" or task == "polarity-filter":
self.model_config["pretrained_model"] = PRETRAINED_MODEL_NAME_POLARITY
def create_dataloaders(self, fold_idx):
idx_tr, idx_ts = self.folds_idxs[fold_idx] # is a tuple
train, test = [self.data_raw[idx] for idx in idx_tr], [self.data_raw[idx] for idx in idx_ts]
train_y, test_y = [self.data_Y[idx] for idx in idx_tr], [self.data_Y[idx] for idx in idx_ts]
train_dataset = TransformerDataset(train, train_y, self.model_config, self.task)
test_dataset = TransformerDataset(test, test_y, self.model_config, self.task)
self.train_loader = DataLoader(train_dataset, batch_size=self.model_config["batch_size"], shuffle=True)
self.test_loader = DataLoader(test_dataset, batch_size=self.model_config["batch_size"])
def prepare_data(self):
BaselineExperiment.prepare_data(self)
class LongSequenceExperiment(Experiment):
def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None, **kwargs):
super().__init__(task, sjv_classifier, sjv_vectorizer)
self.model_config = LongSequence_config[kwargs["model"]]
self.ModelType = TransformerClassifier
def create_dataloaders(self, fold_idx):
idx_tr, idx_ts = self.folds_idxs[fold_idx] # is a tuple
train, test = [self.data_raw[idx] for idx in idx_tr], [self.data_raw[idx] for idx in idx_ts]
train_y, test_y = [self.data_Y[idx] for idx in idx_tr], [self.data_Y[idx] for idx in idx_ts]
train_dataset = TransformerDataset(train, train_y, self.model_config, self.task)
test_dataset = TransformerDataset(test, test_y, self.model_config, self.task)
self.train_loader = DataLoader(train_dataset, batch_size=self.model_config["batch_size"], shuffle=True)
self.test_loader = DataLoader(test_dataset, batch_size=self.model_config["batch_size"])
def prepare_data(self):
BaselineExperiment.prepare_data(self)
class BiGRUExperiment(Experiment):
def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None, pretrained_embeddings=False, **kwargs):
super().__init__(task, sjv_classifier, sjv_vectorizer)
self.model_config = BiGRU_config
self.ModelType = BiGRU
self.model_config["pretrained_embeddings"] = pretrained_embeddings
class BiGRUAttentionExperiment(Experiment):
def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None, pretrained_embeddings=False, **kwargs):
super().__init__(task, sjv_classifier, sjv_vectorizer)
self.model_config = BiGRUAttention_config
self.ModelType = BiGRU
self.model_config["pretrained_embeddings"] = pretrained_embeddings
class TextCNNExperiment(Experiment):
def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None, pretrained_embeddings=False, **kwargs):
super().__init__(task, sjv_classifier, sjv_vectorizer)
self.model_config = TextCNN_config
self.ModelType = TextCNN
self.model_config["pretrained_embeddings"] = pretrained_embeddings
class AMCNNExperiment(Experiment):
def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None, pretrained_embeddings=False, **kwargs):
super().__init__(task, sjv_classifier, sjv_vectorizer)
self.model_config = AMCNN_config
self.ModelType = AMCNN
self.model_config["pretrained_embeddings"] = pretrained_embeddings
self.model_config["sequence_max_len"] = SEQUENCE_MAX_LENGTHS[self.task]