Skip to content

Commit cf7ded4

Browse files
Add files via upload
1 parent 097d054 commit cf7ded4

11 files changed

+4985
-0
lines changed

layerNormedGRU.py

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
4+
5+
class layerNormedGRU(tf.contrib.rnn.RNNCell):
6+
7+
def __init__(
8+
self, size, activation=tf.tanh, reuse=None,
9+
normalizer=tf.contrib.layers.layer_norm,
10+
initializer=tf.contrib.layers.xavier_initializer()):
11+
super(layerNormedGRU, self).__init__(_reuse=reuse)
12+
self._size = size
13+
self._activation = activation
14+
self._normalizer = normalizer
15+
self._initializer = initializer
16+
17+
@property
18+
def state_size(self):
19+
return self._size
20+
21+
@property
22+
def output_size(self):
23+
return self._size
24+
25+
def call(self, input_, state):
26+
update, reset = tf.split(self._forward(
27+
'update_reset', [state, input_], 2 * self._size, tf.nn.sigmoid,
28+
bias_initializer=tf.constant_initializer(-1.)), 2, 1)
29+
candidate = self._forward(
30+
'candidate', [reset * state, input_], self._size, self._activation)
31+
state = (1 - update) * state + update * candidate
32+
return state, state
33+
34+
def _forward(self, name, inputs, size, activation, **kwargs):
35+
with tf.variable_scope(name):
36+
return _forward(
37+
inputs, size, activation, normalizer=self._normalizer,
38+
weight_initializer=self._initializer, **kwargs)
39+
40+
41+
def _forward(
42+
inputs, size, activation, normalizer=tf.contrib.layers.layer_norm,
43+
weight_initializer=tf.contrib.layers.xavier_initializer(),
44+
bias_initializer=tf.zeros_initializer()):
45+
if not isinstance(inputs, (tuple, list)):
46+
inputs = (inputs,)
47+
shapes = []
48+
outputs = []
49+
# Map each input to individually normalize their outputs.
50+
for index, input_ in enumerate(inputs):
51+
shapes.append(input_.shape[1: -1].as_list())
52+
input_ = tf.contrib.layers.flatten(input_)
53+
weight = tf.get_variable(
54+
'weight_{}'.format(index + 1), (int(input_.shape[1]), size),
55+
tf.float32, weight_initializer)
56+
output = tf.matmul(input_, weight)
57+
if normalizer:
58+
output = normalizer(output)
59+
outputs.append(output)
60+
output = tf.reduce_mean(outputs, 0)
61+
# Add bias after normalization.
62+
bias = tf.get_variable(
63+
'weight', (size,), tf.float32, bias_initializer)
64+
output += bias
65+
# Activation function.
66+
if activation:
67+
output = activation(output)
68+
# Restore shape dimensions that are consistent among inputs.
69+
min_dim = min(len(shape[1:]) for shape in shapes)
70+
dim_shapes = [[shape[dim] for shape in shapes] for dim in range(min_dim)]
71+
matching_dims = ''.join('NY'[len(set(x)) == 1] for x in dim_shapes) + 'N'
72+
agreement = matching_dims.index('N')
73+
remaining = sum(np.prod(shape[agreement:]) for shape in shapes)
74+
if agreement:
75+
batch_size = output.shape[0].value or -1
76+
shape = [batch_size] + shapes[:agreement] + [remaining]
77+
output = tf.reshape(output, shape)
78+
return output

model901.py

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import tensorflow as tf
2+
import numpy as np
3+
from layerNormedGRU import layerNormedGRU
4+
5+
class model:
6+
7+
def __init__(self, num_class, topk_paths = 10):
8+
self.xs = tf.placeholder(tf.float32, [None, 1000, 161])
9+
self.ys = tf.sparse_placeholder(tf.int32)
10+
self.learning_rate = tf.placeholder(tf.float32)
11+
self.seq_len = tf.placeholder(tf.int32, [None])
12+
self.isTrain = tf.placeholder(tf.bool, name='phase')
13+
14+
xs_input = tf.expand_dims(self.xs, 3)
15+
16+
conv1 = self._nn_conv_bn_layer(xs_input, 'conv_1', [11, 41, 1, 32], [3, 2])
17+
conv2 = self._nn_conv_bn_layer(conv1, 'conv_2', [11, 21, 32, 32], [1, 2])
18+
conv_out = tf.reshape(conv2, [-1, 334, 41*32])
19+
biRNN1 = self._biRNN_bn_layer(conv_out, 'biRNN_1', 256)
20+
biRNN2 = self._biRNN_bn_layer(biRNN1, 'biRNN_2', 256)
21+
biRNN3 = self._biRNN_bn_layer(biRNN2, 'biRNN_3', 256)
22+
23+
self.phonemes = tf.layers.dense(biRNN3, num_class)
24+
25+
# Notes: tf.nn.ctc_loss performs the softmax operation for you, so
26+
# inputs should be e.g. linear projections of outputs by an LSTM.
27+
self.loss = tf.reduce_mean(tf.nn.ctc_loss(labels=self.ys, inputs=self.phonemes, sequence_length=self.seq_len,
28+
ignore_longer_outputs_than_inputs=True, time_major=False))
29+
30+
optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1 = 0.6, beta2 = 0.8)
31+
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
32+
with tf.control_dependencies(update_ops):
33+
gvs = optimizer.compute_gradients(self.loss)
34+
capped_gvs = [(tf.clip_by_value(grad, -400., 400.), var) for grad, var in gvs if grad is not None]
35+
self.train_op = optimizer.apply_gradients(capped_gvs)
36+
37+
self.prediction, log_prob = tf.nn.ctc_beam_search_decoder(tf.transpose(self.phonemes,[1,0,2]), self.seq_len, top_paths=topk_paths, merge_repeated=False)
38+
39+
self.loss_summary = tf.summary.scalar("loss", self.loss)
40+
self.merged = tf.summary.merge_all()
41+
42+
def _nn_conv_bn_layer(self, inputs, scope, shape, strides):
43+
with tf.variable_scope(scope):
44+
W_conv = tf.get_variable("W", shape=shape, initializer=tf.contrib.layers.xavier_initializer())
45+
h_conv = tf.nn.conv2d(inputs, W_conv, strides=[1, strides[0], strides[1], 1], padding='SAME', name="conv2d")
46+
b = tf.get_variable("bias" , shape=[shape[3]], initializer=tf.contrib.layers.xavier_initializer())
47+
h_bn = tf.layers.batch_normalization(h_conv+b, training = self.isTrain)
48+
h_relu = tf.nn.relu6(h_bn, name="relu6")
49+
return h_relu
50+
51+
def _biRNN_bn_layer(self, input, scope, hidden_units, cell = "LayerNormedGRU"):
52+
with tf.variable_scope(scope):
53+
if cell == 'GRU':
54+
fw_cell = tf.nn.rnn_cell.GRUCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
55+
bw_cell = tf.nn.rnn_cell.GRUCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
56+
elif cell == 'LSTM':
57+
fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
58+
bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
59+
elif cell == 'vanila':
60+
fw_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
61+
bw_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
62+
elif cell == 'LayerNormedGRU':
63+
with tf.variable_scope('fw_cell'):
64+
fw_cell = layerNormedGRU(hidden_units, activation=tf.nn.relu)
65+
with tf.variable_scope('bw_cell'):
66+
bw_cell = layerNormedGRU(hidden_units, activation=tf.nn.relu)
67+
else:
68+
raise ValueError("Invalid cell type: "+str(cell))
69+
70+
(output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, input, dtype=tf.float32, scope="bi_dynamic_rnn")
71+
# output_fw_bn = tf.layers.batch_normalization(output_fw, training = self.isTrain, name = 'output_fw_bn')
72+
# output_bw_bn = tf.layers.batch_normalization(output_bw, training = self.isTrain, name = 'output_bw_bn')
73+
# bilstm_outputs_concat_1 = tf.concat([output_fw_bn, output_bw_bn], 2)
74+
bilstm_outputs_concat_1 = tf.concat([output_fw, output_bw], 2)
75+
return bilstm_outputs_concat_1
76+
77+
def train(self, sess, learning_rate, xs, ys):
78+
_, loss, summary = sess.run([self.train_op, self.loss, self.merged], feed_dict = {self.isTrain: True, self.learning_rate: learning_rate, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs, self.ys: ys})
79+
return loss, summary
80+
81+
def get_loss(self, sess, xs, ys):
82+
loss = sess.run(self.loss, feed_dict = {self.isTrain: False, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs, self.ys: ys})
83+
return loss
84+
85+
def predict(self, sess, xs):
86+
prediction = sess.run(self.prediction, feed_dict = {self.isTrain: False, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs})
87+
return prediction

model902.py

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import tensorflow as tf
2+
import numpy as np
3+
from layerNormedGRU import layerNormedGRU
4+
5+
class model:
6+
7+
def __init__(self, num_class, topk_paths = 10):
8+
self.xs = tf.placeholder(tf.float32, [None, 1000, 161])
9+
self.ys = tf.sparse_placeholder(tf.int32)
10+
self.learning_rate = tf.placeholder(tf.float32)
11+
self.seq_len = tf.placeholder(tf.int32, [None])
12+
self.isTrain = tf.placeholder(tf.bool, name='phase')
13+
14+
xs_input = tf.expand_dims(self.xs, 3)
15+
16+
conv1 = self._nn_conv_bn_layer(xs_input, 'conv_1', [11, 41, 1, 32], [3, 2])
17+
conv2 = self._nn_conv_bn_layer(conv1, 'conv_2', [11, 21, 32, 64], [1, 2])
18+
conv_out = tf.reshape(conv2, [-1, 334, 41*64])
19+
biRNN1 = self._biRNN_bn_layer(conv_out, 'biRNN_1', 256)
20+
biRNN2 = self._biRNN_bn_layer(biRNN1, 'biRNN_2', 256)
21+
biRNN3 = self._biRNN_bn_layer(biRNN2, 'biRNN_3', 256)
22+
biRNN4 = self._biRNN_bn_layer(biRNN3, 'biRNN_4', 256)
23+
biRNN5 = self._biRNN_bn_layer(biRNN4, 'biRNN_5', 256)
24+
25+
self.phonemes = tf.layers.dense(biRNN5, num_class)
26+
27+
# Notes: tf.nn.ctc_loss performs the softmax operation for you, so
28+
# inputs should be e.g. linear projections of outputs by an LSTM.
29+
self.loss = tf.reduce_mean(tf.nn.ctc_loss(labels=self.ys, inputs=self.phonemes, sequence_length=self.seq_len,
30+
ignore_longer_outputs_than_inputs=True, time_major=False))
31+
32+
optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=0.7, beta2=0.9)
33+
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
34+
with tf.control_dependencies(update_ops):
35+
gvs = optimizer.compute_gradients(self.loss)
36+
capped_gvs = [(tf.clip_by_value(grad, -400., 400.), var) for grad, var in gvs if grad is not None]
37+
self.train_op = optimizer.apply_gradients(capped_gvs)
38+
39+
self.prediction, log_prob = tf.nn.ctc_beam_search_decoder(tf.transpose(self.phonemes,[1,0,2]), self.seq_len, top_paths=topk_paths, merge_repeated=False)
40+
41+
self.loss_summary = tf.summary.scalar("loss", self.loss)
42+
self.merged = tf.summary.merge_all()
43+
44+
def _nn_conv_bn_layer(self, inputs, scope, shape, strides):
45+
with tf.variable_scope(scope):
46+
W_conv = tf.get_variable("W", shape=shape, initializer=tf.contrib.layers.xavier_initializer())
47+
h_conv = tf.nn.conv2d(inputs, W_conv, strides=[1, strides[0], strides[1], 1], padding='SAME', name="conv2d")
48+
b = tf.get_variable("bias" , shape=[shape[3]], initializer=tf.contrib.layers.xavier_initializer())
49+
h_bn = tf.layers.batch_normalization(h_conv+b, training = self.isTrain)
50+
h_relu = tf.nn.relu6(h_bn, name="relu6")
51+
return h_relu
52+
53+
def _biRNN_bn_layer(self, input, scope, hidden_units, cell = "LayerNormedGRU"):
54+
with tf.variable_scope(scope):
55+
if cell == 'GRU':
56+
fw_cell = tf.nn.rnn_cell.GRUCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
57+
bw_cell = tf.nn.rnn_cell.GRUCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
58+
elif cell == 'LSTM':
59+
fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
60+
bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
61+
elif cell == 'vanila':
62+
fw_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
63+
bw_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
64+
elif cell == 'LayerNormedGRU':
65+
with tf.variable_scope('fw_cell'):
66+
fw_cell = layerNormedGRU(hidden_units, activation=tf.nn.relu)
67+
with tf.variable_scope('bw_cell'):
68+
bw_cell = layerNormedGRU(hidden_units, activation=tf.nn.relu)
69+
else:
70+
raise ValueError("Invalid cell type: "+str(cell))
71+
72+
(output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, input, dtype=tf.float32, scope="bi_dynamic_rnn")
73+
# output_fw_bn = tf.layers.batch_normalization(output_fw, training = self.isTrain, name = 'output_fw_bn')
74+
# output_bw_bn = tf.layers.batch_normalization(output_bw, training = self.isTrain, name = 'output_bw_bn')
75+
# bilstm_outputs_concat_1 = tf.concat([output_fw_bn, output_bw_bn], 2)
76+
bilstm_outputs_concat_1 = tf.concat([output_fw, output_bw], 2)
77+
return bilstm_outputs_concat_1
78+
79+
def train(self, sess, learning_rate, xs, ys):
80+
_, loss, summary = sess.run([self.train_op, self.loss, self.merged], feed_dict = {self.isTrain: True, self.learning_rate: learning_rate, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs, self.ys: ys})
81+
return loss, summary
82+
83+
def get_loss(self, sess, xs, ys):
84+
loss = sess.run(self.loss, feed_dict = {self.isTrain: False, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs, self.ys: ys})
85+
return loss
86+
87+
def predict(self, sess, xs):
88+
prediction = sess.run(self.prediction, feed_dict = {self.isTrain: False, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs})
89+
return prediction

model903.py

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import tensorflow as tf
2+
import numpy as np
3+
from layerNormedGRU import layerNormedGRU
4+
5+
class model:
6+
7+
def __init__(self, num_class, topk_paths = 10):
8+
self.xs = tf.placeholder(tf.float32, [None, 1000, 161])
9+
self.ys = tf.sparse_placeholder(tf.int32)
10+
self.learning_rate = tf.placeholder(tf.float32)
11+
self.seq_len = tf.placeholder(tf.int32, [None])
12+
self.isTrain = tf.placeholder(tf.bool, name='phase')
13+
14+
xs_input = tf.expand_dims(self.xs, 3)
15+
16+
conv1 = self._nn_conv_bn_layer(xs_input, 'conv_1', [11, 41, 1, 32], [3, 2])
17+
conv2 = self._nn_conv_bn_layer(conv1, 'conv_2', [11, 21, 32, 64], [1, 2])
18+
conv_out = tf.reshape(conv2, [-1, 334, 41*64])
19+
biRNN1 = self._biRNN_bn_layer(conv_out, 'biRNN_1', 1024)
20+
biRNN2 = self._biRNN_bn_layer(biRNN1, 'biRNN_2', 1024)
21+
biRNN3 = self._biRNN_bn_layer(biRNN2, 'biRNN_3', 1024)
22+
biRNN4 = self._biRNN_bn_layer(biRNN3, 'biRNN_4', 1024)
23+
biRNN5 = self._biRNN_bn_layer(biRNN4, 'biRNN_5', 1024)
24+
25+
self.phonemes = tf.layers.dense(biRNN5, num_class)
26+
27+
# Notes: tf.nn.ctc_loss performs the softmax operation for you, so
28+
# inputs should be e.g. linear projections of outputs by an LSTM.
29+
self.loss = tf.reduce_mean(tf.nn.ctc_loss(labels=self.ys, inputs=self.phonemes, sequence_length=self.seq_len,
30+
ignore_longer_outputs_than_inputs=True, time_major=False))
31+
32+
optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=0.7, beta2=0.9)
33+
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
34+
with tf.control_dependencies(update_ops):
35+
gvs = optimizer.compute_gradients(self.loss)
36+
capped_gvs = [(tf.clip_by_value(grad, -400., 400.), var) for grad, var in gvs if grad is not None]
37+
self.train_op = optimizer.apply_gradients(capped_gvs)
38+
39+
self.prediction, log_prob = tf.nn.ctc_beam_search_decoder(tf.transpose(self.phonemes,[1,0,2]), self.seq_len, top_paths=topk_paths, merge_repeated=False)
40+
41+
self.loss_summary = tf.summary.scalar("loss", self.loss)
42+
self.merged = tf.summary.merge_all()
43+
44+
def _nn_conv_bn_layer(self, inputs, scope, shape, strides):
45+
with tf.variable_scope(scope):
46+
W_conv = tf.get_variable("W", shape=shape, initializer=tf.contrib.layers.xavier_initializer())
47+
h_conv = tf.nn.conv2d(inputs, W_conv, strides=[1, strides[0], strides[1], 1], padding='SAME', name="conv2d")
48+
b = tf.get_variable("bias" , shape=[shape[3]], initializer=tf.contrib.layers.xavier_initializer())
49+
h_bn = tf.layers.batch_normalization(h_conv+b, training = self.isTrain)
50+
h_relu = tf.nn.relu6(h_bn, name="relu6")
51+
return h_relu
52+
53+
def _biRNN_bn_layer(self, input, scope, hidden_units, cell = "LayerNormedGRU"):
54+
with tf.variable_scope(scope):
55+
if cell == 'GRU':
56+
fw_cell = tf.nn.rnn_cell.GRUCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
57+
bw_cell = tf.nn.rnn_cell.GRUCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
58+
elif cell == 'LSTM':
59+
fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
60+
bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
61+
elif cell == 'vanila':
62+
fw_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_units, activation=tf.nn.relu, name = 'fw_cell')
63+
bw_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_units, activation=tf.nn.relu, name = 'bw_cell')
64+
elif cell == 'LayerNormedGRU':
65+
with tf.variable_scope('fw_cell'):
66+
fw_cell = layerNormedGRU(hidden_units, activation=tf.nn.relu)
67+
with tf.variable_scope('bw_cell'):
68+
bw_cell = layerNormedGRU(hidden_units, activation=tf.nn.relu)
69+
else:
70+
raise ValueError("Invalid cell type: "+str(cell))
71+
72+
(output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, input, dtype=tf.float32, scope="bi_dynamic_rnn")
73+
# output_fw_bn = tf.layers.batch_normalization(output_fw, training = self.isTrain, name = 'output_fw_bn')
74+
# output_bw_bn = tf.layers.batch_normalization(output_bw, training = self.isTrain, name = 'output_bw_bn')
75+
# bilstm_outputs_concat_1 = tf.concat([output_fw_bn, output_bw_bn], 2)
76+
bilstm_outputs_concat_1 = tf.concat([output_fw, output_bw], 2)
77+
return bilstm_outputs_concat_1
78+
79+
def train(self, sess, learning_rate, xs, ys):
80+
_, loss, summary = sess.run([self.train_op, self.loss, self.merged], feed_dict = {self.isTrain: True, self.learning_rate: learning_rate, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs, self.ys: ys})
81+
return loss, summary
82+
83+
def get_loss(self, sess, xs, ys):
84+
loss = sess.run(self.loss, feed_dict = {self.isTrain: False, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs, self.ys: ys})
85+
return loss
86+
87+
def predict(self, sess, xs):
88+
prediction = sess.run(self.prediction, feed_dict = {self.isTrain: False, self.seq_len: np.ones(xs.shape[0])*334, self.xs: xs})
89+
return prediction

0 commit comments

Comments
 (0)