-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathc8t3.py
More file actions
171 lines (105 loc) · 5.28 KB
/
c8t3.py
File metadata and controls
171 lines (105 loc) · 5.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
import tensorflow as tf
import c8_reader as reader
DATA_PATH = 'temp/ptb/simple-examples/data'
HIDDEN_SIZE = 200
NUM_LAYERS = 2
VOCAB_SIZE = 10000
LEARNING_RATE = 1.0
TRAIN_BATCH_SIZE = 20
TRAIN_NUM_STEP = 35
NUM_EPOCH = 2
KEEP_PROB = 0.5
MAX_GRAD_NORM = 5
EVAL_BATCH_SIZE = 1
EVAL_NUM_STEP = 1
class PTBModel:
def __init__(self, is_training, batch_size, num_steps):
self.batch_size = batch_size
self.num_steps = num_steps
self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])
func_lstm_cell = lambda: tf.contrib.rnn.LSTMCell(HIDDEN_SIZE)
if is_training:
func_lstm_cell = lambda: tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(HIDDEN_SIZE),
output_keep_prob = KEEP_PROB)
cell = tf.contrib.rnn.MultiRNNCell([func_lstm_cell() for _ in range(NUM_LAYERS)])
self.initial_state = cell.zero_state(batch_size, tf.float32)
embedding = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE])
inputs = tf.nn.embedding_lookup(embedding, self.input_data)
if is_training:
inputs = tf.nn.dropout(inputs, KEEP_PROB)
outputs = []
state = self.initial_state
with tf.variable_scope('RNN'):
for time_step in range(num_steps):
if time_step > 0:
tf.get_variable_scope().reuse_variables()
cell_output, state = cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE])
# output, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32, initial_state=state)
# output = tf.reshape(output, [-1, HIDDEN_SIZE])
weight = tf.get_variable('weight', [HIDDEN_SIZE, VOCAB_SIZE])
bias = tf.get_variable('bias', [VOCAB_SIZE])
logits = tf.matmul(output, weight) + bias
loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
[logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([batch_size*num_steps], dtype=tf.float32)]
)
self.cost = tf.reduce_sum(loss) / batch_size
self.final_state = state
if not is_training:
return
trainable_variables = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_variables), MAX_GRAD_NORM)
optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
self.train_op = optimizer.apply_gradients(zip(grads, trainable_variables))
def run_epoch(session, model, data, train_op, output_log, epoch_size):
total_costs = 0.0
iters = 0
state = session.run(model.initial_state)
for step in range(epoch_size):
x, y = session.run(data)
cost, state, _ = session.run([model.cost, model.final_state, train_op],
{model.input_data:x, model.targets:y, model.initial_state:state})
total_costs += cost
iters += model.num_steps
if output_log and step % 100 == 0:
print(f'After {step} steps, perplexity is {np.exp(total_costs/iters):.3f}')
return np.exp(total_costs/iters)
def main():
train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)
train_data_len = len(train_data)
train_batch_len = train_data_len // TRAIN_BATCH_SIZE
train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP
valid_data_len = len(valid_data)
valid_batch_len = valid_data_len // EVAL_BATCH_SIZE
valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP
test_data_len = len(test_data)
test_batch_len = test_data_len // EVAL_BATCH_SIZE
test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP
initializer = tf.random_uniform_initializer(-0.05, 0.05)
with tf.variable_scope('language_model', reuse=None, initializer=initializer):
train_model = PTBModel(True, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP)
with tf.variable_scope('language_model', reuse=True, initializer=initializer):
eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP)
with tf.Session() as session:
tf.global_variables_initializer().run()
train_q = reader.ptb_producer(train_data, train_model.batch_size, train_model.num_steps)
eval_q = reader.ptb_producer(valid_data, eval_model.batch_size, eval_model.num_steps)
test_q = reader.ptb_producer(test_data, eval_model.batch_size, eval_model.num_steps)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(session, coord)
for i in range(NUM_EPOCH):
print(f'In iteration: {i+1}')
run_epoch(session, train_model, train_q, train_model.train_op, True, train_epoch_size)
# valid_perplexity = run_epoch(session, eval_model, eval_q, tf.no_op(), False, valid_epoch_size)
# print(f'Epoch: {i+1} Validation Perplexity: {valid_perplexity:.3f}')
# test_perplexity = run_epoch(session, eval_model, test_q, tf.no_op(), False, test_epoch_size)
# print(f'Epoch: {i+1} Test Perplexity: {test_perplexity:.3f}')
coord.request_stop()
coord.join(threads)
if __name__ == '__main__':
main()