Skip to content

Commit 1e3e7da

Browse files
authored
Merge pull request #8 from shixing/master
add 'words_ensemble' model
2 parents 5c7d9a8 + 2baea22 commit 1e3e7da

File tree

7 files changed

+804
-729
lines changed

7 files changed

+804
-729
lines changed

.gitignore

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
1-
.history
1+
.DS_Store
22
*~
3+
[#].[#]
4+
.[#]*
5+
*[#]
6+
7+
*pyc
8+
.history
9+

README_XING.md

+11-1
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,21 @@ $EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-bea
134134
You can choose one of the following three commend to type in STDIN:
135135

136136
1. `source <source_file>` : process the source-side forward propagation.
137-
2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed.
137+
2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed.
138138
3. `fsaline <fsa_file> encourage_list_files:enc1.txt,enc2.txt encourage_weights:1.0,-1.0 repetition:0.0 alliteration:0.0 wordlen:0.0` Let the RNN to continue decode with FSA.
139139

140140
Both step 2 and 3 will start from the previous hidden states and cell states of target-side RNN.
141141

142+
You can also ensemble two models `best.nn.1` and `best.nn.2` by:
143+
144+
```
145+
$EXEC -k 10 best.nn.1 best.nn.2 kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt source.valid.txt --interactive-line 1 --interactive 1
146+
```
147+
148+
and addtionally, you can use `words_ensemble` option to provide two different human inputs for the two models:
149+
150+
4. `words_ensemble word11 word12 word13 ___sep___ word21 word22 word23 ___sep___` feed the target-side RNN with words sequence `word11 owrd12 word13` for `best.nn.1` and `word21 word22 word23` for `best.nn.2` These are supposed to be the lines human composed.
151+
142152
# Decoding with Word Alignment
143153

144154
Suppose we are translating from French to English, we could use the word alignment information to speed up the decoding. Please find details in 5. [Speeding up Neural Machine Translation Decoding by Shrinking Run-time Vocabulary](http://xingshi.me/data/pdf/ACL2017short.pdf).

executable/ZOPH_RNN_XING

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:68ac7320aa41ebb3d33ec8a9b0b25244c5f7d37062d8a60f6424d044a71aec79
3-
size 126226040
2+
oid sha256:9331caaa8c6bf6ac7eb07a2d59e0706ad70f610ead632953de49015229cb0b57
3+
size 269

scripts/fsa/demo.sh

+11-1
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,20 @@ $EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-bea
4141
# the command line should contains --fsa <fsa_file> and --decode-main-data-files <source_file>, both fsa_file and source_file should exist and are valid fsa_file and source file, although you don't really use them in the interactive mode.
4242

4343
# [Interactive-line mode] : --interactive 1 --interactive-line 1
44-
$EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt --interactive-line 1 --interactive-line 1
44+
$EXEC -k 10 best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt --interactive-line 1 --interactive 1
4545

46+
# 1. `source <source_file>` : process the source-side forward propagation.
47+
# 2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed.
48+
# 3. `fsaline <fsa_file> encourage_list_files:enc1.txt,enc2.txt encourage_weights:1.0,-1.0 repetition:0.0 alliteration:0.0 wordlen:0.0` Let the RNN to continue decode with FSA.
4649

4750

51+
# [Interactive-line mode + ensemble ] : --interactive 1 --interactive-line 1
52+
$EXEC -k 10 best.nn best.nn kbest_fsa.txt --print-score 1 -b 5 --fsa fsa.txt --print-beam 1 --decode-main-data-files source.valid.txt source.valid.txt --interactive-line 1 --interactive 1
53+
54+
# 1. `source <source_file>` : process the source-side forward propagation.
55+
# 2. `words word1 word2 word3` feed the target-side RNN with words sequence `word1 owrd2 word3`. This is supposed to be the line that human composed.
56+
# 3. `words_ensemble word11 word12 word13 ___sep___ word21 word22 word23 ___sep___` feed the target-side RNN with words sequence `word11 owrd12 word13` for `best.nn.1` and `word21 word22 word23` for `best.nn.2` These are supposed to be the lines human composed.
57+
# 4. `fsaline <fsa_file> encourage_list_files:enc1.txt,enc2.txt encourage_weights:1.0,-1.0 repetition:0.0 alliteration:0.0 wordlen:0.0` Let the RNN to continue decode with FSA.
4858

4959

5060

src/decoder_model_wrapper.h

+64-59
Original file line numberDiff line numberDiff line change
@@ -10,65 +10,70 @@ class neuralMT_model;
1010
template<typename dType>
1111
class decoder_model_wrapper {
1212
public:
13-
int gpu_num;
14-
int *d_ones; //vector of all ones, used for forward prop in beam search, on GPU
15-
dType *h_outputdist;
16-
dType *d_temp_swap_vals;
17-
int *d_input_vocab_indicies_source;
18-
int *d_current_indicies;
19-
20-
neuralMT_model<dType> *model; //This is the model
21-
22-
file_helper_decoder *fileh; //for file input, so each file can get read in seperately
23-
file_helper_decoder *fileh_multi_src; //reads in additional multi-source file
24-
25-
int source_length; //current length of the source sentence being decoded
26-
int beam_size;
27-
int source_vocab_size;
28-
int target_vocab_size;
29-
int num_layers;
30-
int LSTM_size;
31-
bool attention_model;
32-
bool feed_input;
33-
bool combine_LSTM;
34-
int num_lines_in_file = -1;
35-
int longest_sent;
36-
37-
bool multi_source = false;
38-
int source_length_bi; //current length of the source sentence being decoded
39-
int *d_input_vocab_indicies_source_bi;
40-
41-
bool char_cnn = false;
42-
int *d_char_vocab_indicies_source;
43-
int longest_word;
44-
std::unordered_map<int,std::vector<int>> word_to_char_map; //for word index, what is the character sequence, this is read from a file
45-
int *h_new_char_indicies;
46-
int *d_new_char_indicies;
47-
48-
std::string main_weight_file;
49-
std::string multi_src_weight_file;
50-
std::string main_integerized_file;
51-
std::string multi_src_integerized_file;
52-
53-
Eigen::Matrix<dType,Eigen::Dynamic, Eigen::Dynamic,Eigen::RowMajor> outputdist;
54-
std::vector<int> viterbi_alignments_ind; //individual viterbi alignments before voting
55-
std::vector<dType> viterbi_alignments_scores; //individual viterbi scores
56-
57-
// for shrink the target set vocab;
58-
dType *d_D_shrink;
59-
dType *d_softmax_original_D; // a pointer to refer the d_D in original softmax;
60-
dType *d_b_shrink;
61-
dType *d_softmax_original_b;
62-
int new_output_vocab_size = 0;
63-
int *h_new_vocab_index;
64-
int *d_new_vocab_index;
65-
// for policy 1
66-
bool show_shrink_debug = false;
67-
bool policy_1_done = false;
68-
// for policy 2
69-
int *h_alignments; // [cap+1, source_vocab_size]
70-
int *d_alignments;
71-
int cap = 0;
13+
14+
int gpu_num;
15+
int *d_ones; //vector of all ones, used for forward prop in beam search, on GPU
16+
dType *h_outputdist;
17+
dType *d_temp_swap_vals;
18+
int *d_input_vocab_indicies_source;
19+
int *d_current_indicies;
20+
21+
int *h_current_indices; // every model should have this vector for model ensemble;
22+
23+
24+
25+
neuralMT_model<dType> *model; //This is the model
26+
27+
file_helper_decoder *fileh; //for file input, so each file can get read in seperately
28+
file_helper_decoder *fileh_multi_src; //reads in additional multi-source file
29+
30+
int source_length; //current length of the source sentence being decoded
31+
int beam_size;
32+
int source_vocab_size;
33+
int target_vocab_size;
34+
int num_layers;
35+
int LSTM_size;
36+
bool attention_model;
37+
bool feed_input;
38+
bool combine_LSTM;
39+
int num_lines_in_file = -1;
40+
int longest_sent;
41+
42+
bool multi_source = false;
43+
int source_length_bi; //current length of the source sentence being decoded
44+
int *d_input_vocab_indicies_source_bi;
45+
46+
bool char_cnn = false;
47+
int *d_char_vocab_indicies_source;
48+
int longest_word;
49+
std::unordered_map<int,std::vector<int>> word_to_char_map; //for word index, what is the character sequence, this is read from a file
50+
int *h_new_char_indicies;
51+
int *d_new_char_indicies;
52+
53+
std::string main_weight_file;
54+
std::string multi_src_weight_file;
55+
std::string main_integerized_file;
56+
std::string multi_src_integerized_file;
57+
58+
Eigen::Matrix<dType,Eigen::Dynamic, Eigen::Dynamic,Eigen::RowMajor> outputdist;
59+
std::vector<int> viterbi_alignments_ind; //individual viterbi alignments before voting
60+
std::vector<dType> viterbi_alignments_scores; //individual viterbi scores
61+
62+
// for shrink the target set vocab;
63+
dType *d_D_shrink;
64+
dType *d_softmax_original_D; // a pointer to refer the d_D in original softmax;
65+
dType *d_b_shrink;
66+
dType *d_softmax_original_b;
67+
int new_output_vocab_size = 0;
68+
int *h_new_vocab_index;
69+
int *d_new_vocab_index;
70+
// for policy 1
71+
bool show_shrink_debug = false;
72+
bool policy_1_done = false;
73+
// for policy 2
74+
int *h_alignments; // [cap+1, source_vocab_size]
75+
int *d_alignments;
76+
int cap = 0;
7277

7378
// for LSH
7479
int nnz = 0;

src/decoder_model_wrapper.hpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,16 @@ decoder_model_wrapper<dType>::decoder_model_wrapper(int gpu_num,int beam_size,
6969
}
7070

7171

72-
//allocate the current indicies
73-
CUDA_ERROR_WRAPPER(cudaMalloc((void**)&d_current_indicies,beam_size*sizeof(int)),"GPU memory allocation failed\n");
74-
75-
model = new neuralMT_model<dType>();
76-
//initialize the model
77-
model->initModel_decoding(LSTM_size,beam_size,source_vocab_size,target_vocab_size,
78-
num_layers,main_weight_file,gpu_num,params,attention_model,
79-
feed_input,multi_source,combine_LSTM,char_cnn);
72+
//allocate the current indicies
73+
CUDA_ERROR_WRAPPER(cudaMalloc((void**)&d_current_indicies,beam_size*sizeof(int)),"GPU memory allocation failed\n");
74+
h_current_indices = (int *) malloc(beam_size*sizeof(int));
75+
76+
77+
model = new neuralMT_model<dType>();
78+
//initialize the model
79+
model->initModel_decoding(LSTM_size,beam_size,source_vocab_size,target_vocab_size,
80+
num_layers,main_weight_file,gpu_num,params,attention_model,
81+
feed_input,multi_source,combine_LSTM,char_cnn);
8082

8183
//initialize additional stuff for model
8284
model->init_prev_states(num_layers,LSTM_size,beam_size,gpu_num,multi_source);

0 commit comments

Comments
 (0)