Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 36 additions & 64 deletions single/dssm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import print_function

import pickle
import random
import time
Expand Down Expand Up @@ -32,7 +34,8 @@ def load_train_data(pack_idx):
doc_train_data = pickle.load(open('../data/doc.train.' + str(pack_idx)+ '.pickle', 'rb')).tocsr()
query_train_data = pickle.load(open('../data/query.train.'+ str(pack_idx)+ '.pickle', 'rb')).tocsr()
end = time.time()
print ("\nTrain data %d/9 is loaded in %.2fs" % (pack_idx, end - start))
print("\nTrain data {} is loaded in {:.2f}".format(pack_idx, end - start))


end = time.time()
print("Loading data from HDD to memory: %.2fs" % (end - start))
Expand All @@ -45,33 +48,29 @@ def load_train_data(pack_idx):
L1_N = 400
L2_N = 120

query_in_shape = np.array([BS, TRIGRAM_D], np.int64)
doc_in_shape = np.array([BS, TRIGRAM_D], np.int64)


def variable_summaries(var, name):
"""Attach a lot of summaries to a Tensor."""
with tf.name_scope('summaries'):
mean = tf.reduce_mean(var)
tf.scalar_summary('mean/' + name, mean)
tf.summary.scalar('mean/' + name, mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
tf.scalar_summary('sttdev/' + name, stddev)
tf.scalar_summary('max/' + name, tf.reduce_max(var))
tf.scalar_summary('min/' + name, tf.reduce_min(var))
tf.histogram_summary(name, var)
tf.summary.scalar('sttdev/' + name, stddev)
tf.summary.scalar('max/' + name, tf.reduce_max(var))
tf.summary.scalar('min/' + name, tf.reduce_min(var))
tf.summary.histogram(name, var)


with tf.name_scope('input'):
# Shape [BS, TRIGRAM_D].
query_batch = tf.sparse_placeholder(tf.float32, shape=query_in_shape, name='QueryBatch')
query_batch = tf.sparse_placeholder(tf.float32, name='QueryBatch')
# Shape [BS, TRIGRAM_D]
doc_batch = tf.sparse_placeholder(tf.float32, shape=doc_in_shape, name='DocBatch')
doc_batch = tf.sparse_placeholder(tf.float32, name='DocBatch')

with tf.name_scope('L1'):
l1_par_range = np.sqrt(6.0 / (TRIGRAM_D + L1_N))
weight1 = tf.Variable(tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range))
bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range))
weight1 = tf.Variable(tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range), name="weight1")
bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range), name="bias1")
variable_summaries(weight1, 'L1_weights')
variable_summaries(bias1, 'L1_biases')

Expand All @@ -86,34 +85,33 @@ def variable_summaries(var, name):
with tf.name_scope('L2'):
l2_par_range = np.sqrt(6.0 / (L1_N + L2_N))

weight2 = tf.Variable(tf.random_uniform([L1_N, L2_N], -l2_par_range, l2_par_range))
bias2 = tf.Variable(tf.random_uniform([L2_N], -l2_par_range, l2_par_range))
weight2 = tf.Variable(tf.random_uniform([L1_N, L2_N], -l2_par_range, l2_par_range), name="weight2")
bias2 = tf.Variable(tf.random_uniform([L2_N], -l2_par_range, l2_par_range), name="bias2")
variable_summaries(weight2, 'L2_weights')
variable_summaries(bias2, 'L2_biases')

query_l2 = tf.matmul(query_l1_out, weight2) + bias2
doc_l2 = tf.matmul(doc_l1_out, weight2) + bias2
query_y = tf.nn.relu(query_l2)
doc_y = tf.nn.relu(doc_l2)
query_y = tf.nn.relu(query_l2, name="query_y")
doc_y = tf.nn.relu(doc_l2, name="doc_y")

with tf.name_scope('FD_rotate'):
# Rotate FD+ to produce 50 FD-
temp = tf.tile(doc_y, [1, 1])

for i in range(NEG):
rand = int((random.random() + i) * BS / NEG)
doc_y = tf.concat(0,
[doc_y,
doc_y = tf.concat([doc_y,
tf.slice(temp, [rand, 0], [BS - rand, -1]),
tf.slice(temp, [0, 0], [rand, -1])])
tf.slice(temp, [0, 0], [rand, -1])], 0)

with tf.name_scope('Cosine_Similarity'):
# Cosine similarity
query_norm = tf.tile(tf.sqrt(tf.reduce_sum(tf.square(query_y), 1, True)), [NEG + 1, 1])
doc_norm = tf.sqrt(tf.reduce_sum(tf.square(doc_y), 1, True))

prod = tf.reduce_sum(tf.mul(tf.tile(query_y, [NEG + 1, 1]), doc_y), 1, True)
norm_prod = tf.mul(query_norm, doc_norm)
prod = tf.reduce_sum(tf.multiply(tf.tile(query_y, [NEG + 1, 1]), doc_y), 1, True)
norm_prod = tf.multiply(query_norm, doc_norm)

cos_sim_raw = tf.truediv(prod, norm_prod)
cos_sim = tf.transpose(tf.reshape(tf.transpose(cos_sim_raw), [NEG + 1, BS])) * 20
Expand All @@ -123,7 +121,7 @@ def variable_summaries(var, name):
prob = tf.nn.softmax((cos_sim))
hit_prob = tf.slice(prob, [0, 0], [-1, 1])
loss = -tf.reduce_sum(tf.log(hit_prob)) / BS
tf.scalar_summary('loss', loss)
tf.summary.scalar('loss', loss)

with tf.name_scope('Training'):
# Optimizer
Expand All @@ -134,11 +132,11 @@ def variable_summaries(var, name):
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# tf.scalar_summary('accuracy', accuracy)

merged = tf.merge_all_summaries()
merged = tf.summary.merge_all()

with tf.name_scope('Test'):
average_loss = tf.placeholder(tf.float32)
loss_summary = tf.scalar_summary('average_loss', average_loss)
loss_summary = tf.summary.scalar('average_loss', average_loss)


def pull_batch(query_data, doc_data, batch_idx):
Expand All @@ -151,13 +149,13 @@ def pull_batch(query_data, doc_data, batch_idx):


query_in = tf.SparseTensorValue(
np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]),
np.array(query_in.data, dtype=np.float),
np.array(query_in.shape, dtype=np.int64))
indices=np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]),
values=np.array(query_in.data, dtype=np.float),
dense_shape=np.array(query_in.shape, dtype=np.int64))
doc_in = tf.SparseTensorValue(
np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]),
np.array(doc_in.data, dtype=np.float),
np.array(doc_in.shape, dtype=np.int64))
indices=np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]),
values=np.array(doc_in.data, dtype=np.float),
dense_shape=np.array(doc_in.shape, dtype=np.int64))

# end = time.time()
# print("Pull_batch time: %f" % (end - start))
Expand All @@ -180,44 +178,23 @@ def feed_dict(Train, batch_idx):
#config = tf.ConfigProto(device_count= {'GPU' : 0})

with tf.Session(config=config) as sess:
sess.run(tf.initialize_all_variables())
train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train', sess.graph)
test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test', sess.graph)
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph)
test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test', sess.graph)

# Actual execution
start = time.time()
# fp_time = 0
# fbp_time = 0
for step in range(FLAGS.max_steps):
batch_idx = step % FLAGS.epoch_steps
if batch_idx % FLAGS.pack_size == 0:
load_train_data(batch_idx / FLAGS.pack_size + 1)

# # setup toolbar
# sys.stdout.write("[%s]" % (" " * toolbar_width))
# #sys.stdout.flush()
# sys.stdout.write("\b" * (toolbar_width + 1)) # return to start of line, after '['


if batch_idx % (FLAGS.pack_size / 64) == 0:
progress = 100.0 * batch_idx / FLAGS.epoch_steps
sys.stdout.write("\r%.2f%% Epoch" % progress)
sys.stdout.write("\r{:.2f} Epoch (step {})".format(progress, step))
sys.stdout.flush()

# t1 = time.time()
# sess.run(loss, feed_dict = feed_dict(True, batch_idx))
# t2 = time.time()
# fp_time += t2 - t1
# #print(t2-t1)
# t1 = time.time()
sess.run(train_step, feed_dict=feed_dict(True, batch_idx % FLAGS.pack_size))
# t2 = time.time()
# fbp_time += t2 - t1
# #print(t2 - t1)
# if batch_idx % 2000 == 1999:
# print ("MiniBatch: Average FP Time %f, Average FP+BP Time %f" %
# (fp_time / step, fbp_time / step))


if batch_idx == FLAGS.epoch_steps - 1:
end = time.time()
Expand All @@ -230,11 +207,7 @@ def feed_dict(Train, batch_idx):
train_loss = sess.run(loss_summary, feed_dict={average_loss: epoch_loss})
train_writer.add_summary(train_loss, step + 1)

# print ("MiniBatch: Average FP Time %f, Average FP+BP Time %f" %
# (fp_time / step, fbp_time / step))
#
print ("\nEpoch #%-5d | Train Loss: %-4.3f | PureTrainTime: %-3.3fs" %
(step / FLAGS.epoch_steps, epoch_loss, end - start))
print ("\nStep #{:5} Epoch #{:5} | Train Loss: {:.3f} | PureTrainTime: {:.3f}s".format(step, step / FLAGS.epoch_steps, epoch_loss, end - start))

epoch_loss = 0
for i in range(FLAGS.pack_size):
Expand All @@ -247,6 +220,5 @@ def feed_dict(Train, batch_idx):
test_writer.add_summary(test_loss, step + 1)

start = time.time()
print ("Epoch #%-5d | Test Loss: %-4.3f | Calc_LossTime: %-3.3fs" %
(step / FLAGS.epoch_steps, epoch_loss, start - end))
print ("\nStep #{:5} Epoch #{:5} | Train Loss: {:.3f} | PureTrainTime: {:.3f}s".format(step, step / FLAGS.epoch_steps, epoch_loss, start - end))