Merge pull request #8 from shixing/master

shixing · web-flow · commit 1e3e7da688cf · 2017-05-15T11:22:35.000-07:00
add 'words_ensemble' model
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,9 @@
-.history
+.DS_Store
 *~
+[#].[#]
+.[#]*
+*[#]
+
+*pyc
+.history
+
diff --git a/README_XING.md b/README_XING.md
@@ -134,11 +134,21 @@ $EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-bea
 You can choose one of the following three commend to type in STDIN:
 
 1. `source <source_file>` : process the source-side forward propagation.
-2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed. 
+2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed.
 3. `fsaline <fsa_file> encourage_list_files:enc1.txt,enc2.txt encourage_weights:1.0,-1.0 repetition:0.0 alliteration:0.0 wordlen:0.0` Let the RNN to continue decode with FSA.
 
 Both step 2 and 3 will start from the previous hidden states and cell states of target-side RNN.
 
+You can also ensemble two models `best.nn.1` and `best.nn.2` by:
+
+```
+$EXEC -k 10 best.nn.1 best.nn.2 kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt source.valid.txt --interactive-line 1 --interactive 1
+```
+
+and addtionally, you can use `words_ensemble` option to provide two different human inputs for the two models:
+
+4. `words_ensemble word11 word12 word13 ___sep___ word21 word22 word23 ___sep___` feed the target-side RNN with words sequence `word11 owrd12 word13` for `best.nn.1` and `word21 word22 word23` for `best.nn.2` These are supposed to be the lines human composed. 
+
 # Decoding with Word Alignment
 
 Suppose we are translating from French to English, we could use the word alignment information to speed up the decoding. Please find details in 5. [Speeding up Neural Machine Translation Decoding by Shrinking Run-time Vocabulary](http://xingshi.me/data/pdf/ACL2017short.pdf).
diff --git a/executable/ZOPH_RNN_XING b/executable/ZOPH_RNN_XING
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68ac7320aa41ebb3d33ec8a9b0b25244c5f7d37062d8a60f6424d044a71aec79
-size 126226040
+oid sha256:9331caaa8c6bf6ac7eb07a2d59e0706ad70f610ead632953de49015229cb0b57
+size 269
diff --git a/scripts/fsa/demo.sh b/scripts/fsa/demo.sh
@@ -41,10 +41,20 @@ $EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-bea
 # the command line should contains --fsa <fsa_file> and --decode-main-data-files <source_file>, both fsa_file and source_file should exist and are valid fsa_file and source file, although you don't really use them in the interactive mode.
 
 # [Interactive-line mode] : --interactive 1 --interactive-line 1
-$EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt --interactive-line 1 --interactive-line 1
+$EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt --interactive-line 1 --interactive 1
 
+# 1. `source <source_file>` : process the source-side forward propagation.
+# 2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed. 
+# 3. `fsaline <fsa_file> encourage_list_files:enc1.txt,enc2.txt encourage_weights:1.0,-1.0 repetition:0.0 alliteration:0.0 wordlen:0.0` Let the RNN to continue decode with FSA.
 
 
+# [Interactive-line mode + ensemble ] : --interactive 1 --interactive-line 1
+$EXEC -k 10 best.nn best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt source.valid.txt --interactive-line 1 --interactive 1
+
+# 1. `source <source_file>` : process the source-side forward propagation.
+# 2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed. 
+# 3. `words_ensemble word11 word12 word13 ___sep___ word21 word22 word23 ___sep___` feed the target-side RNN with words sequence `word11 owrd12 word13` for `best.nn.1` and `word21 word22 word23` for `best.nn.2` These are supposed to be the lines human composed. 
+# 4. `fsaline <fsa_file> encourage_list_files:enc1.txt,enc2.txt encourage_weights:1.0,-1.0 repetition:0.0 alliteration:0.0 wordlen:0.0` Let the RNN to continue decode with FSA.
 
 
 
diff --git a/src/decoder_model_wrapper.h b/src/decoder_model_wrapper.h
@@ -10,65 +10,70 @@ class neuralMT_model;
 template<typename dType>
 class decoder_model_wrapper {
 public:
-  int gpu_num;
-  int *d_ones; //vector of all ones, used for forward prop in beam search, on GPU
-  dType *h_outputdist;
-  dType *d_temp_swap_vals;
-  int *d_input_vocab_indicies_source;
-  int *d_current_indicies; 
-
-  neuralMT_model<dType> *model; //This is the model
-
-  file_helper_decoder *fileh; //for file input, so each file can get read in seperately
-  file_helper_decoder *fileh_multi_src; //reads in additional multi-source file
-
-  int source_length; //current length of the source sentence being decoded
-  int beam_size;
-  int source_vocab_size;
-  int target_vocab_size;
-  int num_layers;
-  int LSTM_size;
-  bool attention_model;
-  bool feed_input;
-  bool combine_LSTM;
-  int num_lines_in_file = -1;
-  int longest_sent;
-
-  bool multi_source = false;
-  int source_length_bi; //current length of the source sentence being decoded
-  int *d_input_vocab_indicies_source_bi;
-
-  bool char_cnn = false;
-  int *d_char_vocab_indicies_source;
-  int longest_word;
-  std::unordered_map<int,std::vector<int>> word_to_char_map; //for word index, what is the character sequence, this is read from a file
-  int *h_new_char_indicies;
-  int *d_new_char_indicies;
-
-  std::string main_weight_file;
-  std::string multi_src_weight_file;
-  std::string main_integerized_file;
-  std::string multi_src_integerized_file;
-
-  Eigen::Matrix<dType,Eigen::Dynamic, Eigen::Dynamic,Eigen::RowMajor> outputdist;
-  std::vector<int> viterbi_alignments_ind; //individual viterbi alignments before voting
-  std::vector<dType> viterbi_alignments_scores; //individual viterbi scores
-
-  // for shrink the target set vocab;
-  dType *d_D_shrink;
-  dType *d_softmax_original_D; // a pointer to refer the d_D in original softmax;
-  dType *d_b_shrink;
-  dType *d_softmax_original_b;
-  int new_output_vocab_size = 0;
-  int *h_new_vocab_index;
-  int *d_new_vocab_index;
-  // for policy 1
-  bool show_shrink_debug = false;
-  bool policy_1_done = false;
-  // for policy 2
-  int *h_alignments; // [cap+1, source_vocab_size]
-  int *d_alignments;
-  int cap = 0;
+
+    int gpu_num;
+	int *d_ones; //vector of all ones, used for forward prop in beam search, on GPU
+	dType *h_outputdist;
+	dType *d_temp_swap_vals;
+	int *d_input_vocab_indicies_source;
+	int *d_current_indicies;
+    
+    int *h_current_indices; // every model should have this vector for model ensemble; 
+    
+    
+
+	neuralMT_model<dType> *model; //This is the model
+
+	file_helper_decoder *fileh; //for file input, so each file can get read in seperately
+	file_helper_decoder *fileh_multi_src; //reads in additional multi-source file
+
+	int source_length; //current length of the source sentence being decoded
+	int beam_size;
+	int source_vocab_size;
+	int target_vocab_size;
+	int num_layers;
+	int LSTM_size;
+	bool attention_model;
+	bool feed_input;
+	bool combine_LSTM;
+	int num_lines_in_file = -1;
+	int longest_sent;
+
+	bool multi_source = false;
+	int source_length_bi; //current length of the source sentence being decoded
+	int *d_input_vocab_indicies_source_bi;
+
+	bool char_cnn = false;
+	int *d_char_vocab_indicies_source;
+	int longest_word;
+	std::unordered_map<int,std::vector<int>> word_to_char_map; //for word index, what is the character sequence, this is read from a file
+	int *h_new_char_indicies;
+	int *d_new_char_indicies;
+
+	std::string main_weight_file;
+	std::string multi_src_weight_file;
+	std::string main_integerized_file;
+	std::string multi_src_integerized_file;
+
+	Eigen::Matrix<dType,Eigen::Dynamic, Eigen::Dynamic,Eigen::RowMajor> outputdist;
+	std::vector<int> viterbi_alignments_ind; //individual viterbi alignments before voting
+	std::vector<dType> viterbi_alignments_scores; //individual viterbi scores
+
+    // for shrink the target set vocab;
+    dType *d_D_shrink;
+    dType *d_softmax_original_D; // a pointer to refer the d_D in original softmax;
+    dType *d_b_shrink;
+    dType *d_softmax_original_b;
+    int new_output_vocab_size = 0;
+    int *h_new_vocab_index;
+    int *d_new_vocab_index;
+    // for policy 1
+    bool show_shrink_debug = false;
+    bool policy_1_done = false;
+    // for policy 2
+    int *h_alignments; // [cap+1, source_vocab_size]
+    int *d_alignments;
+    int cap = 0;
     
   // for LSH
   int nnz = 0;
diff --git a/src/decoder_model_wrapper.hpp b/src/decoder_model_wrapper.hpp
@@ -69,14 +69,16 @@ decoder_model_wrapper<dType>::decoder_model_wrapper(int gpu_num,int beam_size,
   }
 
 
-  //allocate the current indicies
-  CUDA_ERROR_WRAPPER(cudaMalloc((void**)&d_current_indicies,beam_size*sizeof(int)),"GPU memory allocation failed\n");
-
-  model = new neuralMT_model<dType>();
-  //initialize the model
-  model->initModel_decoding(LSTM_size,beam_size,source_vocab_size,target_vocab_size,
-			    num_layers,main_weight_file,gpu_num,params,attention_model,
-			    feed_input,multi_source,combine_LSTM,char_cnn);
+	//allocate the current indicies
+	CUDA_ERROR_WRAPPER(cudaMalloc((void**)&d_current_indicies,beam_size*sizeof(int)),"GPU memory allocation failed\n");
+    h_current_indices = (int *) malloc(beam_size*sizeof(int));
+    
+    
+	model = new neuralMT_model<dType>();
+	//initialize the model
+	model->initModel_decoding(LSTM_size,beam_size,source_vocab_size,target_vocab_size,
+		num_layers,main_weight_file,gpu_num,params,attention_model,
+		feed_input,multi_source,combine_LSTM,char_cnn);
 
   //initialize additional stuff for model
   model->init_prev_states(num_layers,LSTM_size,beam_size,gpu_num,multi_source);
diff --git a/src/ensemble_factory.hpp b/src/ensemble_factory.hpp