-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlnet.py
99 lines (86 loc) · 2.28 KB
/
lnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM
from keras.layers.embeddings import Embedding
import numpy as np
import os
import random
from time import time
# global constants
MAX_WORD_LENGTH = 15
LANGS = ["english","french","spanish"]
ITERATIONS = 10
BATCH_SIZE = 32
NUM_TOP = 3
DUMP_FILE_NAME = "model.h5"
LSTM_NUM = 100
VERBOSE = True
random.seed(0)
NUM_LANGS = len(LANGS)
DICTS = {}
TOT = []
def convWord(word):
tmp = []
for k in word:
tmp.append(float(ord(k)))
if len(tmp) > MAX_WORD_LENGTH:
# truncate if too long
tmp = tmp[0:MAX_WORD_LENGTH]
else:
for n in range(len(tmp),MAX_WORD_LENGTH):
# pad if too short
tmp.append(0)
return tmp
regen = True
if os.path.isfile("model.h5"):
print "Use existing neural network? [y/n]"
inp = raw_input()
if inp != "n":
regen = False
if regen:
tic = time()
# read in data
for i in range(len(LANGS)):
fin = open("data/" + LANGS[i] + ".txt").read().splitlines()
for j in range(len(fin)):
fin[j] = np.array(convWord(fin[j]))
TOT.append((fin[j],i))
DICTS[i] = fin
random.shuffle(TOT)
X = []
Y = []
for i in TOT:
X.append(i[0])
correct = [0.0 for j in range(NUM_LANGS)]
correct[i[1]] = 1.0
Y.append(np.array(correct))
X = np.array(X)
Y = np.array(Y)
#generate net
model = Sequential()
model.add(Embedding(len(X),MAX_WORD_LENGTH,input_length=MAX_WORD_LENGTH))
model.add(LSTM(LSTM_NUM))
model.add(Dense(MAX_WORD_LENGTH, init='uniform', activation='relu'))
model.add(Dense(NUM_LANGS, init='uniform', activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X,Y, nb_epoch=ITERATIONS, batch_size=BATCH_SIZE,verbose=VERBOSE)
scores = model.evaluate(X,Y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
toc = time()
print "Time Elapsed: " + str(toc-tic) + " seconds."
model.save("model.h5")
else:
model = load_model("model.h5")
while True:
print "Enter word:"
word = raw_input()
out = model.predict(np.array(convWord(word)).reshape((1,MAX_WORD_LENGTH)))
out = out[0]
tmp = []
for i in range(len(out)):
tmp.append((out[i],LANGS[i]))
out = sorted(tmp,reverse=True)
tot = 0
for i in out:
tot += i[0]
for i in range(min(NUM_TOP,NUM_LANGS)):
print out[i][1] + ": " + str(100*(out[i][0]/tot)) + "%"