l11x0m7
diff --git a/‎README.md
+4-1 b/‎README.md
+4-1
diff --git a/‎bilm-tf/bilm/data.py
100644100755
+9-4 b/‎bilm-tf/bilm/data.py
100644100755
+9-4
@@ -41,7 +41,10 @@ This dir includes the ELMo model. Here we present experiments on the RACE datase
 
 #### Run on RACE
 
-TO DO!
+| epochs | batch size | max input length | Accuracy (%) on dev | Accuracy (%) on test |Accuracy (%) on middle test |Accuracy (%) on high test | device |
+| :--------: |:------:|:------:|:------:|:------:|:------:|:------:|:------:|
+| 3   | 8 | 512/32 | 39.39  | 38.57 | 38.23 | 39.02 | 1 GTX 1080 |
+| 25   | 8 | 512/32 | \  | \ | \ | \ | 1 GTX 1080 |
 
 #### Run on SQuAD
 
 
@@ -194,7 +194,7 @@ class Batcher(object):
     ''' 
     Batch sentences of tokenized text into character id matrices.
     '''
-    def __init__(self, lm_vocab_file: str, max_token_length: int):
+    def __init__(self, lm_vocab_file: str, max_token_length: int, max_sentence_length: int = 0):
         '''
         lm_vocab_file = the language model vocabulary file (one line per
             token)
@@ -204,6 +204,7 @@ def __init__(self, lm_vocab_file: str, max_token_length: int):
             lm_vocab_file, max_token_length
         )
         self._max_token_length = max_token_length
+        self._max_sentence_length = max_sentence_length
 
     def batch_sentences(self, sentences: List[List[str]]):
         '''
@@ -213,16 +214,20 @@ def batch_sentences(self, sentences: List[List[str]]):
         '''
         n_sentences = len(sentences)
         max_length = max(len(sentence) for sentence in sentences) + 2
+        if self._max_sentence_length > 0:
+            max_length = min(self._max_sentence_length, max_length)
 
         X_char_ids = np.zeros(
             (n_sentences, max_length, self._max_token_length),
-            dtype=np.int64
+            dtype=np.int32
         )
 
         for k, sent in enumerate(sentences):
-            length = len(sent) + 2
+            length = len(sent)
+            length = min(length, max_length - 2)
             char_ids_without_mask = self._lm_vocab.encode_chars(
-                sent, split=False)
+                sent[:length], split=False)
+            length += 2
             # add one so that 0 is the mask value
             X_char_ids[k, :length, :] = char_ids_without_mask + 1