-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadversarial_examples.py
115 lines (92 loc) · 4.41 KB
/
adversarial_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import json
import argparse
import wandb
from nltk.corpus import movie_reviews, subjectivity, stopwords
from baseline import BaselineExperiment
from sklearn.metrics import accuracy_score, f1_score
from experiment import Experiment
from baseline import BaselineExperiment
from models import *
from settings import *
nameToModel = {
"BiGRU": BiGRU,
"BiGRUAttention": BiGRU,
"TextCNN": TextCNN
}
# Subjective sentence generated by ChatGPT usign only tokens from the objective only lexicon
subj_sentences = ["I was shocked to discover that the financial webcams we had been using were actually part of a scheme known as 'frodes', and I couldn't believe that Daddy's client would scoff at the idea of being caught up in such a bale of trouble.",
"I felt betrayed and stunned, but I knew I had to move on and find a new situation-based opportunity, even if it meant leaving behind the familiar Composers' Castle and the territorial Marjorie and Margaret"
]
obj_sentences = ["The widely reserved, self-determination and simplicity of the 12-step program have proven to be an effective life-affirming method for those seeking to overcome addiction and achieve reconciliation with themselves and others.",
"The artist-agent's creative approach to marketing and promotion has helped to boost the success and stylishness of numerous music and entertainment projects."
]
sentences = obj_sentences + subj_sentences
def baseline(task):
exp_subjectivity = BaselineExperiment(task=task)
classifier, vectorizer = exp_subjectivity.run()
vectors = vectorizer.transform(sentences)
preds = classifier.predict(vectors)
print(preds)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
"model", choices=["Baseline", "BiGRU", "BiGRUAttention", "TextCNN"], help="Specify model type. Eg. 'BiGRU'")
parser.add_argument("task", choices=["subjectivity"], help="Specify which task to perform.")
parser.add_argument("--fold_index", type=int, choices=[
0, 1, 2, 3, 4], help="Specifify the fold index to load correct train/test split.")
parser.add_argument("-pe", "--pretrained_embeddings",
action="store_true", help="Specify if use pretrained embeddings.")
args = parser.parse_args()
sjv_classifier = None
sjv_vectorizer = None
if args.model == "Baseline":
baseline(args.task)
exit(0)
# load model
api = wandb.Api()
pe_string = "_pe" if args.pretrained_embeddings else ""
name = f"{args.task}_{args.model}{pe_string}_fold_{args.fold_index:02d}"
artifact_name = f'{WANDB_ENTITY}/{WANDB_PROJECT}/{name}:latest'
print(artifact_name)
checkpoint = f"{name}.pth"
print(checkpoint)
artifact = api.artifact(artifact_name)
artifact.download(root=WEIGHTS_SAVE_PATH)
print(artifact.metadata)
model_config = artifact.metadata
if model_config.get("vocab_size"):
model = nameToModel[args.model](
model_config["vocab_size"], model_config)
else:
raise Exception("Config does not specify vocab_size.")
checkpoint = torch.load(
f"{WEIGHTS_SAVE_PATH}/{checkpoint}", map_location=DEVICE)
model.load_state_dict(checkpoint['model_state_dict'])
# create the same language on which the model was trained
exp = Experiment(args.task, sjv_classifier, sjv_vectorizer)
exp.model_config = model_config
exp.prepare_data()
exp.create_folds()
exp.create_dataloaders(args.fold_index)
# tokenize sentences
tokenized = [nltk.WordPunctTokenizer().tokenize(sent) for sent in sentences]
tokenized = [[t.lower() for t in sent] for sent in tokenized]
print(tokenized)
# convert to ids and pad
ids = [[exp.lang.word2id.get(t, exp.lang.word2id['<unk>']) for t in sent] for sent in tokenized]
ids = [torch.tensor(sent) for sent in ids]
y_gt = [0, 0, 1, 1]
print(y_gt)
# predict
y_pred = []
model.eval()
with torch.no_grad():
for sent in ids:
sent = sent.unsqueeze(0).to(DEVICE)
text_len = torch.tensor(len(sent)).unsqueeze(0).to(DEVICE)
out = model({"document": sent, "text_len": text_len})
if args.model == "BiGRUAttention":
out = out[0]
prediction = torch.sigmoid(out).round().int()
y_pred.append(prediction.item())
print(y_pred)