Py3 fixes

spyysalo · Oct 9, 2019 · 60411af · 60411af
1 parent 7b276a1
commit 60411af
Show file tree

Hide file tree

Showing 6 changed files with 39 additions and 31 deletions.
diff --git a/analogy.py b/analogy.py
@@ -36,8 +36,8 @@ def main(argv=None):
     try:
         wv = wvlib.load(options.vectors, max_rank=options.max_rank)
         wv = wv.normalize()
-    except Exception, e:
-        print >> sys.stderr, 'Error: %s' % str(e)
+    except Exception as e:
+        print('Error: %s' % str(e), file=sys.stderr)
         return 1
     return query_loop(wv, options, process_query, query_count=3)
 

diff --git a/common.py b/common.py
@@ -44,7 +44,7 @@ def process_args(args, prompt='Enter words'):
 
 def get_line(prompt, exit_word=None):
     try:
-        s = raw_input(prompt)
+        s = input(prompt)
     except KeyboardInterrupt:   # CTRL-C
         raise EOFError
     if s.strip() == exit_word:
@@ -98,31 +98,31 @@ def query_loop(wv, options, process_query, query_count=1):
         if empty_query(query):
             continue
         if options.echo:
-            print query
+            print(query)
         if len(query) < query_count:
-            print >> sys.stderr, 'Enter %d words/phrases' % query_count
+            print('Enter %d words/phrases' % query_count, file=sys.stderr)
             continue
         if len(query) > query_count:
-            print >> sys.stderr, 'Ignoring extra words/phrases'
+            print('Ignoring extra words/phrases', file=sys.stderr)
             query = query[:query_count]
         words, missing = [w for q in query for w in q], []
         for w in uniq(words):
             if w not in wv:
-                print >> sys.stderr, 'Out of dictionary word: %s' % str(w)
+                print('Out of dictionary word: %s' % str(w), file=sys.stderr)
                 missing.append(w)
             elif not options.quiet:
-                print 'Word: %s  Position in vocabulary: %d' % (w, wv.rank(w))
+                print('Word: %s  Position in vocabulary: %d' % (w, wv.rank(w)))
         if not missing:
             process_query(wv, query, options)
 
 def output_nearest(nearest, options, out=sys.stdout):
     # word2vec distance.c output header
     output_header = '\n'+46*' '+'Word       Cosine distance\n'+72*'-'
     if not options.quiet:
-        print >> out, output_header
+        print(output_header, file=out)
         fmt = '%50s\t\t%f'
     else:
         fmt = '%s\t%f'
     for w, s in nearest:
-        print >> out, fmt % (w, s)
-    print >> out
+        print(fmt % (w, s), file=out)
+    print(file=out)
diff --git a/evalclass.py b/evalclass.py
@@ -51,7 +51,7 @@ def read_words(fn, encoding=ENCODING):
         for l in f:
             w = l.rstrip()
             if ' ' in w:
-                print >> sys.stderr, 'Skip multiword: "%s"' % w
+                print('Skip multiword: "%s"' % w, file=sys.stderr)
                 continue
             words.append(w)
     return words
@@ -71,7 +71,8 @@ def read_wordsets(fns):
 def enough_data(wordsets):
     assert not any(s for s in wordsets if len(s) == 0), 'empty set'
     if len(wordsets) < 2:
-        print >> sys.stderr, 'error: at least two non-empty word sets required'
+        print('error: at least two non-empty word sets required',
+              file=sys.stderr)
         argparser().print_help()
         return False
     else:
@@ -108,14 +109,14 @@ def compare_sets(set1, name1, set2, name2, w2v, options=None):
                 correct += score(w1, w2, w, w2v, SECOND)
                 total += 1
     if not total:
-        print >> sys.stderr, '%s - %s: No comparisons succeeded!' % \
-            (name1, name2)
+        print('%s - %s: No comparisons succeeded!' % \
+              (name1, name2), file=sys.stderr)
         return None
     else:
         avg = 1.*correct/total
         if not options or not options.quiet:
-            print 'AVERAGE %s - %s: %.2f%% (%d/%d)' % \
-                (name1, name2, 100*avg, correct, total)
+            print('AVERAGE %s - %s: %.2f%% (%d/%d)' % \
+                  (name1, name2, 100*avg, correct, total))
         return avg
 
 def main(argv=None):
@@ -145,13 +146,13 @@ def main(argv=None):
             if w in w2v:
                 filtered.append(w)
             else:
-                logging.warn('ignoring out-of-vocabulary word "%s"' % w)
+                logging.warning('ignoring out-of-vocabulary word "%s"' % w)
                 oov_count += 1
             word_count += 1
         if filtered:
             filtered_wordsets[k] = filtered
         else:
-            logging.warn('wordset %s empty after OOV filtering, removing' % k)
+            logging.warning('wordset %s empty after OOV filtering, removing' % k)
     wordsets = filtered_wordsets
 
     if not enough_data(wordsets):
@@ -164,14 +165,15 @@ def main(argv=None):
             results.append(result)
 
     if not options.quiet:
-        print >> sys.stderr, 'out of vocabulary %d/%d (%.2f%%)' % \
-            (oov_count, word_count, 100.*oov_count/word_count)
+        print('out of vocabulary %d/%d (%.2f%%)' % \
+              (oov_count, word_count, 100.*oov_count/word_count),
+              file=sys.stderr)
 
     if results:
-        print 'OVERALL AVERAGE (macro):\t%.2f%%\t(%.2f%% OOV)' % \
-            (100*sum(results)/len(results), 100.*oov_count/word_count)
+        print('OVERALL AVERAGE (macro):\t%.2f%%\t(%.2f%% OOV)' % \
+              (100*sum(results)/len(results), 100.*oov_count/word_count))
     else:
-        print >> sys.stderr, 'All comparisons failed!'
+        print('All comparisons failed!', file=sys.stderr)
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv))

diff --git a/evalset.py b/evalset.py
@@ -179,8 +179,8 @@ def report(TPp, TPg, FP, FN, header=None, out=sys.stdout):
     p, r, F = prec_rec_F(TPp, TPg, FP, FN)
     if header is not None:
         out.write(header)
-    print >> out, "precision %.2f%% (%d/%d) recall %.2f%% (%d/%d) F %.2f%%" % \
-        (p, TPp, TPp+FP, r, TPg, TPg+FN, F)
+    print("precision %.2f%% (%d/%d) recall %.2f%% (%d/%d) F %.2f%%" % \
+          (p, TPp, TPp+FP, r, TPg, TPg+FN, F), file=out)
 
 def evaluate_set(queries, targets, accept, setname, nearest, options):
     tTPp, tTPg, tFP, tFN = 0, 0, 0, 0
@@ -336,7 +336,7 @@ def main(argv=None):
         word_sets.append((targets, accept, name))
 
     avg_F = evaluate_sets(infn, word_sets, options)
-    print 'OVERALL AVERAGE:\t%.2f%%\t(F-score)' % avg_F
+    print('OVERALL AVERAGE:\t%.2f%%\t(F-score)' % avg_F)
 
     return 0
 

diff --git a/nearest.py b/nearest.py
@@ -33,8 +33,8 @@ def main(argv=None):
     try:
         wv = wvlib.load(options.vectors, max_rank=options.max_rank)
         wv = wv.normalize()
-    except Exception, e:
-        print >> sys.stderr, 'Error: %s' % str(e)
+    except Exception as e:
+        print('Error: %s' % str(e), file=sys.stderr)
         return 1
     return query_loop(wv, options, process_query)
 

diff --git a/wvlib.py b/wvlib.py
@@ -104,7 +104,7 @@
 
 from functools import partial
 from itertools import tee, islice
-from io import StringIO
+from io import BytesIO, StringIO
 from time import time
 from collections import defaultdict
 import struct
@@ -813,7 +813,13 @@ def load_numpy(cls, f, max_rank=None):
                 # for partial load will no longer work
                 logging.warning('no numpy.numarray, -r disabled for numpy data')
                 # TODO: reshape anyway
-            v = cls(numpy.load(f))
+
+            # hack around numpy failing to load from _FileInFile, see
+            # https://github.com/numpy/numpy/issues/7989#issuecomment-340921579
+            tmp = BytesIO()
+            tmp.write(f.read())
+            tmp.seek(0)
+            v = cls(numpy.load(tmp))
         return v
 
     @classmethod