Skip to content

Commit

Permalink
Py3 fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
spyysalo committed Oct 9, 2019
1 parent 7b276a1 commit 60411af
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 31 deletions.
4 changes: 2 additions & 2 deletions analogy.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def main(argv=None):
try:
wv = wvlib.load(options.vectors, max_rank=options.max_rank)
wv = wv.normalize()
except Exception, e:
print >> sys.stderr, 'Error: %s' % str(e)
except Exception as e:
print('Error: %s' % str(e), file=sys.stderr)
return 1
return query_loop(wv, options, process_query, query_count=3)

Expand Down
18 changes: 9 additions & 9 deletions common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def process_args(args, prompt='Enter words'):

def get_line(prompt, exit_word=None):
try:
s = raw_input(prompt)
s = input(prompt)
except KeyboardInterrupt: # CTRL-C
raise EOFError
if s.strip() == exit_word:
Expand Down Expand Up @@ -98,31 +98,31 @@ def query_loop(wv, options, process_query, query_count=1):
if empty_query(query):
continue
if options.echo:
print query
print(query)
if len(query) < query_count:
print >> sys.stderr, 'Enter %d words/phrases' % query_count
print('Enter %d words/phrases' % query_count, file=sys.stderr)
continue
if len(query) > query_count:
print >> sys.stderr, 'Ignoring extra words/phrases'
print('Ignoring extra words/phrases', file=sys.stderr)
query = query[:query_count]
words, missing = [w for q in query for w in q], []
for w in uniq(words):
if w not in wv:
print >> sys.stderr, 'Out of dictionary word: %s' % str(w)
print('Out of dictionary word: %s' % str(w), file=sys.stderr)
missing.append(w)
elif not options.quiet:
print 'Word: %s Position in vocabulary: %d' % (w, wv.rank(w))
print('Word: %s Position in vocabulary: %d' % (w, wv.rank(w)))
if not missing:
process_query(wv, query, options)

def output_nearest(nearest, options, out=sys.stdout):
# word2vec distance.c output header
output_header = '\n'+46*' '+'Word Cosine distance\n'+72*'-'
if not options.quiet:
print >> out, output_header
print(output_header, file=out)
fmt = '%50s\t\t%f'
else:
fmt = '%s\t%f'
for w, s in nearest:
print >> out, fmt % (w, s)
print >> out
print(fmt % (w, s), file=out)
print(file=out)
28 changes: 15 additions & 13 deletions evalclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def read_words(fn, encoding=ENCODING):
for l in f:
w = l.rstrip()
if ' ' in w:
print >> sys.stderr, 'Skip multiword: "%s"' % w
print('Skip multiword: "%s"' % w, file=sys.stderr)
continue
words.append(w)
return words
Expand All @@ -71,7 +71,8 @@ def read_wordsets(fns):
def enough_data(wordsets):
assert not any(s for s in wordsets if len(s) == 0), 'empty set'
if len(wordsets) < 2:
print >> sys.stderr, 'error: at least two non-empty word sets required'
print('error: at least two non-empty word sets required',
file=sys.stderr)
argparser().print_help()
return False
else:
Expand Down Expand Up @@ -108,14 +109,14 @@ def compare_sets(set1, name1, set2, name2, w2v, options=None):
correct += score(w1, w2, w, w2v, SECOND)
total += 1
if not total:
print >> sys.stderr, '%s - %s: No comparisons succeeded!' % \
(name1, name2)
print('%s - %s: No comparisons succeeded!' % \
(name1, name2), file=sys.stderr)
return None
else:
avg = 1.*correct/total
if not options or not options.quiet:
print 'AVERAGE %s - %s: %.2f%% (%d/%d)' % \
(name1, name2, 100*avg, correct, total)
print('AVERAGE %s - %s: %.2f%% (%d/%d)' % \
(name1, name2, 100*avg, correct, total))
return avg

def main(argv=None):
Expand Down Expand Up @@ -145,13 +146,13 @@ def main(argv=None):
if w in w2v:
filtered.append(w)
else:
logging.warn('ignoring out-of-vocabulary word "%s"' % w)
logging.warning('ignoring out-of-vocabulary word "%s"' % w)
oov_count += 1
word_count += 1
if filtered:
filtered_wordsets[k] = filtered
else:
logging.warn('wordset %s empty after OOV filtering, removing' % k)
logging.warning('wordset %s empty after OOV filtering, removing' % k)
wordsets = filtered_wordsets

if not enough_data(wordsets):
Expand All @@ -164,14 +165,15 @@ def main(argv=None):
results.append(result)

if not options.quiet:
print >> sys.stderr, 'out of vocabulary %d/%d (%.2f%%)' % \
(oov_count, word_count, 100.*oov_count/word_count)
print('out of vocabulary %d/%d (%.2f%%)' % \
(oov_count, word_count, 100.*oov_count/word_count),
file=sys.stderr)

if results:
print 'OVERALL AVERAGE (macro):\t%.2f%%\t(%.2f%% OOV)' % \
(100*sum(results)/len(results), 100.*oov_count/word_count)
print('OVERALL AVERAGE (macro):\t%.2f%%\t(%.2f%% OOV)' % \
(100*sum(results)/len(results), 100.*oov_count/word_count))
else:
print >> sys.stderr, 'All comparisons failed!'
print('All comparisons failed!', file=sys.stderr)

if __name__ == '__main__':
sys.exit(main(sys.argv))
Expand Down
6 changes: 3 additions & 3 deletions evalset.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ def report(TPp, TPg, FP, FN, header=None, out=sys.stdout):
p, r, F = prec_rec_F(TPp, TPg, FP, FN)
if header is not None:
out.write(header)
print >> out, "precision %.2f%% (%d/%d) recall %.2f%% (%d/%d) F %.2f%%" % \
(p, TPp, TPp+FP, r, TPg, TPg+FN, F)
print("precision %.2f%% (%d/%d) recall %.2f%% (%d/%d) F %.2f%%" % \
(p, TPp, TPp+FP, r, TPg, TPg+FN, F), file=out)

def evaluate_set(queries, targets, accept, setname, nearest, options):
tTPp, tTPg, tFP, tFN = 0, 0, 0, 0
Expand Down Expand Up @@ -336,7 +336,7 @@ def main(argv=None):
word_sets.append((targets, accept, name))

avg_F = evaluate_sets(infn, word_sets, options)
print 'OVERALL AVERAGE:\t%.2f%%\t(F-score)' % avg_F
print('OVERALL AVERAGE:\t%.2f%%\t(F-score)' % avg_F)

return 0

Expand Down
4 changes: 2 additions & 2 deletions nearest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def main(argv=None):
try:
wv = wvlib.load(options.vectors, max_rank=options.max_rank)
wv = wv.normalize()
except Exception, e:
print >> sys.stderr, 'Error: %s' % str(e)
except Exception as e:
print('Error: %s' % str(e), file=sys.stderr)
return 1
return query_loop(wv, options, process_query)

Expand Down
10 changes: 8 additions & 2 deletions wvlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@

from functools import partial
from itertools import tee, islice
from io import StringIO
from io import BytesIO, StringIO
from time import time
from collections import defaultdict
import struct
Expand Down Expand Up @@ -813,7 +813,13 @@ def load_numpy(cls, f, max_rank=None):
# for partial load will no longer work
logging.warning('no numpy.numarray, -r disabled for numpy data')
# TODO: reshape anyway
v = cls(numpy.load(f))

# hack around numpy failing to load from _FileInFile, see
# https://github.com/numpy/numpy/issues/7989#issuecomment-340921579
tmp = BytesIO()
tmp.write(f.read())
tmp.seek(0)
v = cls(numpy.load(tmp))
return v

@classmethod
Expand Down

0 comments on commit 60411af

Please sign in to comment.