diff --git a/README.md b/README.md index 65e7029..209586f 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ To get started, cd into the directory examples/ and run demo.sh. It downloads th * sim_sif.py and sim_tfidf.py are for the textual similarity tasks in the paper, * supervised_sif_proj.sh is for the supervised tasks in the paper. -Check these files to see the options. +Check these files to see the options. Make sure you create "log" folder in "example" directory before running above code. ## Source code The code is separated into the following parts: diff --git a/examples/sif_embedding.py b/examples/sif_embedding.py old mode 100644 new mode 100755 index 8bafd71..1b65783 --- a/examples/sif_embedding.py +++ b/examples/sif_embedding.py @@ -15,7 +15,7 @@ word2weight = data_io.getWordWeight(weightfile, weightpara) # word2weight['str'] is the weight for the word 'str' weight4ind = data_io.getWeight(words, word2weight) # weight4ind[i] is the weight for the i-th word # load sentences -x, m, _ = data_io.sentences2idx(sentences, words) # x is the array of word indices, m is the binary mask indicating whether there is a word in that location +x, m = data_io.sentences2idx(sentences, words) # x is the array of word indices, m is the binary mask indicating whether there is a word in that location w = data_io.seq2weight(x, m, weight4ind) # get word weights # set parameters diff --git a/examples/train.sh b/examples/train.sh index 9b8746d..c4ed331 100755 --- a/examples/train.sh +++ b/examples/train.sh @@ -1,2 +1,2 @@ cd ../src -THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 python train.py $@ \ No newline at end of file +THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 python2.7 train.py $@ \ No newline at end of file diff --git a/src/data_io.py b/src/data_io.py old mode 100644 new mode 100755 index afea0c0..ff4ee15 --- a/src/data_io.py +++ b/src/data_io.py @@ -196,6 +196,7 @@ def sentences2idx(sentences, words): :param words: a dictionary, words['str'] is the indices of the word 'str' :return: x1, m1. x1[i, :] is the word indices in sentence i, m1[i,:] is the mask for sentence i (0 means no word at the location) """ + seq1 = [] for i in sentences: seq1.append(getSeq(i,words)) x1,m1 = prepare_data(seq1)