diff --git a/single/dssm.py b/single/dssm.py index f461743..b65272a 100644 --- a/single/dssm.py +++ b/single/dssm.py @@ -1,3 +1,5 @@ +from __future__ import print_function + import pickle import random import time @@ -32,7 +34,8 @@ def load_train_data(pack_idx): doc_train_data = pickle.load(open('../data/doc.train.' + str(pack_idx)+ '.pickle', 'rb')).tocsr() query_train_data = pickle.load(open('../data/query.train.'+ str(pack_idx)+ '.pickle', 'rb')).tocsr() end = time.time() - print ("\nTrain data %d/9 is loaded in %.2fs" % (pack_idx, end - start)) + print("\nTrain data {} is loaded in {:.2f}".format(pack_idx, end - start)) + end = time.time() print("Loading data from HDD to memory: %.2fs" % (end - start)) @@ -45,33 +48,29 @@ def load_train_data(pack_idx): L1_N = 400 L2_N = 120 -query_in_shape = np.array([BS, TRIGRAM_D], np.int64) -doc_in_shape = np.array([BS, TRIGRAM_D], np.int64) - - def variable_summaries(var, name): """Attach a lot of summaries to a Tensor.""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) - tf.scalar_summary('mean/' + name, mean) + tf.summary.scalar('mean/' + name, mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) - tf.scalar_summary('sttdev/' + name, stddev) - tf.scalar_summary('max/' + name, tf.reduce_max(var)) - tf.scalar_summary('min/' + name, tf.reduce_min(var)) - tf.histogram_summary(name, var) + tf.summary.scalar('sttdev/' + name, stddev) + tf.summary.scalar('max/' + name, tf.reduce_max(var)) + tf.summary.scalar('min/' + name, tf.reduce_min(var)) + tf.summary.histogram(name, var) with tf.name_scope('input'): # Shape [BS, TRIGRAM_D]. - query_batch = tf.sparse_placeholder(tf.float32, shape=query_in_shape, name='QueryBatch') + query_batch = tf.sparse_placeholder(tf.float32, name='QueryBatch') # Shape [BS, TRIGRAM_D] - doc_batch = tf.sparse_placeholder(tf.float32, shape=doc_in_shape, name='DocBatch') + doc_batch = tf.sparse_placeholder(tf.float32, name='DocBatch') with tf.name_scope('L1'): l1_par_range = np.sqrt(6.0 / (TRIGRAM_D + L1_N)) - weight1 = tf.Variable(tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range)) - bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range)) + weight1 = tf.Variable(tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range), name="weight1") + bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range), name="bias1") variable_summaries(weight1, 'L1_weights') variable_summaries(bias1, 'L1_biases') @@ -86,15 +85,15 @@ def variable_summaries(var, name): with tf.name_scope('L2'): l2_par_range = np.sqrt(6.0 / (L1_N + L2_N)) - weight2 = tf.Variable(tf.random_uniform([L1_N, L2_N], -l2_par_range, l2_par_range)) - bias2 = tf.Variable(tf.random_uniform([L2_N], -l2_par_range, l2_par_range)) + weight2 = tf.Variable(tf.random_uniform([L1_N, L2_N], -l2_par_range, l2_par_range), name="weight2") + bias2 = tf.Variable(tf.random_uniform([L2_N], -l2_par_range, l2_par_range), name="bias2") variable_summaries(weight2, 'L2_weights') variable_summaries(bias2, 'L2_biases') query_l2 = tf.matmul(query_l1_out, weight2) + bias2 doc_l2 = tf.matmul(doc_l1_out, weight2) + bias2 - query_y = tf.nn.relu(query_l2) - doc_y = tf.nn.relu(doc_l2) + query_y = tf.nn.relu(query_l2, name="query_y") + doc_y = tf.nn.relu(doc_l2, name="doc_y") with tf.name_scope('FD_rotate'): # Rotate FD+ to produce 50 FD- @@ -102,18 +101,17 @@ def variable_summaries(var, name): for i in range(NEG): rand = int((random.random() + i) * BS / NEG) - doc_y = tf.concat(0, - [doc_y, + doc_y = tf.concat([doc_y, tf.slice(temp, [rand, 0], [BS - rand, -1]), - tf.slice(temp, [0, 0], [rand, -1])]) + tf.slice(temp, [0, 0], [rand, -1])], 0) with tf.name_scope('Cosine_Similarity'): # Cosine similarity query_norm = tf.tile(tf.sqrt(tf.reduce_sum(tf.square(query_y), 1, True)), [NEG + 1, 1]) doc_norm = tf.sqrt(tf.reduce_sum(tf.square(doc_y), 1, True)) - prod = tf.reduce_sum(tf.mul(tf.tile(query_y, [NEG + 1, 1]), doc_y), 1, True) - norm_prod = tf.mul(query_norm, doc_norm) + prod = tf.reduce_sum(tf.multiply(tf.tile(query_y, [NEG + 1, 1]), doc_y), 1, True) + norm_prod = tf.multiply(query_norm, doc_norm) cos_sim_raw = tf.truediv(prod, norm_prod) cos_sim = tf.transpose(tf.reshape(tf.transpose(cos_sim_raw), [NEG + 1, BS])) * 20 @@ -123,7 +121,7 @@ def variable_summaries(var, name): prob = tf.nn.softmax((cos_sim)) hit_prob = tf.slice(prob, [0, 0], [-1, 1]) loss = -tf.reduce_sum(tf.log(hit_prob)) / BS - tf.scalar_summary('loss', loss) + tf.summary.scalar('loss', loss) with tf.name_scope('Training'): # Optimizer @@ -134,11 +132,11 @@ def variable_summaries(var, name): # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # tf.scalar_summary('accuracy', accuracy) -merged = tf.merge_all_summaries() +merged = tf.summary.merge_all() with tf.name_scope('Test'): average_loss = tf.placeholder(tf.float32) - loss_summary = tf.scalar_summary('average_loss', average_loss) + loss_summary = tf.summary.scalar('average_loss', average_loss) def pull_batch(query_data, doc_data, batch_idx): @@ -151,13 +149,13 @@ def pull_batch(query_data, doc_data, batch_idx): query_in = tf.SparseTensorValue( - np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]), - np.array(query_in.data, dtype=np.float), - np.array(query_in.shape, dtype=np.int64)) + indices=np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]), + values=np.array(query_in.data, dtype=np.float), + dense_shape=np.array(query_in.shape, dtype=np.int64)) doc_in = tf.SparseTensorValue( - np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]), - np.array(doc_in.data, dtype=np.float), - np.array(doc_in.shape, dtype=np.int64)) + indices=np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]), + values=np.array(doc_in.data, dtype=np.float), + dense_shape=np.array(doc_in.shape, dtype=np.int64)) # end = time.time() # print("Pull_batch time: %f" % (end - start)) @@ -180,44 +178,23 @@ def feed_dict(Train, batch_idx): #config = tf.ConfigProto(device_count= {'GPU' : 0}) with tf.Session(config=config) as sess: - sess.run(tf.initialize_all_variables()) - train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train', sess.graph) - test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test', sess.graph) + sess.run(tf.global_variables_initializer()) + train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) + test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test', sess.graph) # Actual execution start = time.time() - # fp_time = 0 - # fbp_time = 0 for step in range(FLAGS.max_steps): batch_idx = step % FLAGS.epoch_steps if batch_idx % FLAGS.pack_size == 0: load_train_data(batch_idx / FLAGS.pack_size + 1) - # # setup toolbar - # sys.stdout.write("[%s]" % (" " * toolbar_width)) - # #sys.stdout.flush() - # sys.stdout.write("\b" * (toolbar_width + 1)) # return to start of line, after '[' - - if batch_idx % (FLAGS.pack_size / 64) == 0: progress = 100.0 * batch_idx / FLAGS.epoch_steps - sys.stdout.write("\r%.2f%% Epoch" % progress) + sys.stdout.write("\r{:.2f} Epoch (step {})".format(progress, step)) sys.stdout.flush() - # t1 = time.time() - # sess.run(loss, feed_dict = feed_dict(True, batch_idx)) - # t2 = time.time() - # fp_time += t2 - t1 - # #print(t2-t1) - # t1 = time.time() sess.run(train_step, feed_dict=feed_dict(True, batch_idx % FLAGS.pack_size)) - # t2 = time.time() - # fbp_time += t2 - t1 - # #print(t2 - t1) - # if batch_idx % 2000 == 1999: - # print ("MiniBatch: Average FP Time %f, Average FP+BP Time %f" % - # (fp_time / step, fbp_time / step)) - if batch_idx == FLAGS.epoch_steps - 1: end = time.time() @@ -230,11 +207,7 @@ def feed_dict(Train, batch_idx): train_loss = sess.run(loss_summary, feed_dict={average_loss: epoch_loss}) train_writer.add_summary(train_loss, step + 1) - # print ("MiniBatch: Average FP Time %f, Average FP+BP Time %f" % - # (fp_time / step, fbp_time / step)) - # - print ("\nEpoch #%-5d | Train Loss: %-4.3f | PureTrainTime: %-3.3fs" % - (step / FLAGS.epoch_steps, epoch_loss, end - start)) + print ("\nStep #{:5} Epoch #{:5} | Train Loss: {:.3f} | PureTrainTime: {:.3f}s".format(step, step / FLAGS.epoch_steps, epoch_loss, end - start)) epoch_loss = 0 for i in range(FLAGS.pack_size): @@ -247,6 +220,5 @@ def feed_dict(Train, batch_idx): test_writer.add_summary(test_loss, step + 1) start = time.time() - print ("Epoch #%-5d | Test Loss: %-4.3f | Calc_LossTime: %-3.3fs" % - (step / FLAGS.epoch_steps, epoch_loss, start - end)) + print ("\nStep #{:5} Epoch #{:5} | Train Loss: {:.3f} | PureTrainTime: {:.3f}s".format(step, step / FLAGS.epoch_steps, epoch_loss, start - end))