diff --git a/nltk_contrib/stringcomp.py b/nltk_contrib/stringcomp.py index 5c2b2ee..d2c7e0c 100644 --- a/nltk_contrib/stringcomp.py +++ b/nltk_contrib/stringcomp.py @@ -18,7 +18,7 @@ def stringcomp (fx, fy): This is an implementation of the string comparison algorithm (also known as "string similarity") published by Qi Xiao Yang, Sung Sam Yuan, Li Zhao, Lu Chun and Sun Peng in a paper called "Faster Algorithm of String - Comparison" ( http://front.math.ucdavis.edu/0112.6022 ). Please note that, + Comparison" ( https://arxiv.org/abs/cs/0112022 ). Please note that, however, this implementation presents some relevant differences that will lead to different numerical results (read the comments for more details). @@ -74,7 +74,7 @@ def stringcomp (fx, fy): index = fy.find(pattern) if index != -1: ssnc += (2.*length)**2 - if fx_removed == False: + if not fx_removed: fx = fx[:i] + fx[i+length:] fx_removed = True fy = fy[:index] + fy[index+length:] @@ -89,20 +89,22 @@ def stringcomp (fx, fy): def demo (): - print "Comparison between 'python' and 'python': %.2f" % stringcomp("python", "python") - print "Comparison between 'python' and 'Python': %.2f" % stringcomp("python", "Python") - print "Comparison between 'NLTK' and 'NTLK': %.2f" % stringcomp("NLTK", "NTLK") - print "Comparison between 'abc' and 'def': %.2f" % stringcomp("abc", "def") - - print "Word most similar to 'australia' in list ['canada', 'brazil', 'egypt', 'thailand', 'austria']:" - max_score = 0.0 ; best_match = None + print("Comparison between 'python' and 'python': {0:0.2f}".format(stringcomp("python", "python"))) + print("Comparison between 'python' and 'Python': {0:0.2f}".format(stringcomp("python", "Python"))) + print("Comparison between 'NLTK' and 'NTLK': {0:0.2f}".format(stringcomp("NLTK", "NTLK"))) + print("Comparison between 'abc' and 'def': {0:0.2f}".format(stringcomp("abc", "def"))) + + print("Word most similar to 'australia' in list ['canada', 'brazil', 'egypt', 'thailand', 'austria']:") + max_score = 0.0 + best_match = None for country in ["canada", "brazil", "egypt", "thailand", "austria"]: score = stringcomp("australia", country) if score > max_score: best_match = country max_score = score - print "(comparison between 'australia' and '%s': %.2f)" % (country, score) - print "Word most similar to 'australia' is '%s' (score: %.2f)" % (best_match, max_score) - + print("(comparison between 'australia' and '{0}': {1:0.2f})".format(country, score)) + print("Word most similar to 'australia' is '{0}' (score: {1:0.2f})".format(best_match, max_score)) + + if __name__ == "__main__": demo()