forked from gotthespirit/SpellChecker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsuggAlgo.py
executable file
·80 lines (79 loc) · 4.32 KB
/
suggAlgo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# What this module basically does is it takes in a word which is not in the dictionary and finds suitable suggestions for it.
# Made by - Pratik Shastri.
class wrongWord: # Class of wrong word.
def __init__(self, wWord, fname):# Initializes the various variables used in the class.
self.wWord = wWord
self.fname = fname
self.dictionary = []
self.lenLst1 = []
self.lenLst2 = []
self.wLen = len(wWord)
def readDict(self): # Reads the text file and stores the words in a list.
file = open(self.fname, "r+")
strr = file.read()
file.close()
self.dictionary = strr.split()
def sortLen(self): # Sorts the words in the dictionary into words of length len(wWord), len(wWord) - 1 and len(wWord) + 1
self.lenLst1 = [token for token in self.dictionary if abs(len(token) - len(self.wWord)) == 1]
self.lenLst2 = [token for token in self.dictionary if abs(len(token) - len(self.wWord)) == 2]
self.dictionary = [token for token in self.dictionary if len(token) == len(self.wWord)]
def suggWords(self): # Generates a list of suggestions sugg based on the wrong word input wWord.
sugg = [] # List of suggestions.
wCounter = 0 # Counter for number of suggestions.
for token in self.dictionary: # Checks for words with same length as wWord, first letter common and which have a common substring of length = len(wWord) - 2.
if wCounter >= 20: break
if(token[0] == self.wWord[0]):
for i in range(2):
for j in range(i + 1):
if token[j:j+self.wLen-i] in self.wWord:
sugg.append(token)
wCounter += 1
break
else: continue
break
for token in self.dictionary: # Checks for words with same length as wWord and which have a common substring of length = len(wWord) - 2.
if wCounter >= 20: break
for i in range(2):
for j in range(i + 1):
if token[j:j+self.wLen-i] in self.wWord and token not in sugg:
wCounter += 1
sugg.append(token)
break
else: continue
break
for token in self.lenLst1: # Checks for words with length differing by 1 from the input.
if wCounter >= 20: break
if token in self.wWord or self.wWord in token:
sugg.append(token)
wCounter += 1
for token in self.lenLst2: # Checks for words with length differing by 1 from the input.
if token in self.wWord or self.wWord in token:
sugg.append(token)
wCounter += 1
if wCounter >= 20: break
for k in range(5): # Loop contains 4 checks, which are implemented 5 times each with the value of k ranging from 0 to 4. Implements these checks on words of same length as the input.
for token in self.dictionary: # Checks if the first letter matches and less than k corrosponding letters don't match.
if wCounter >= 20: break
if(token[0] == self.wWord[0]):
counter = 0
for i in range(len(self.wWord)):
if(self.wWord[i] == token[i]): counter += 1
if self.wLen - counter <= k and token not in sugg:
sugg.append(token)
wCounter += 1
for token in self.dictionary: # Checks if less than k corrosponding letters don't match.
if wCounter >= 20: break
counter = 0
for i in range(len(self.wWord)):
if(self.wWord[i] == token[i]): counter += 1
if self.wLen - counter <= k and token not in sugg:
sugg.append(token)
wCounter += 1
return sugg
if __name__ == "__main__": # Module specific code for test cases.
inpt = raw_input() # Reads a word.
wW = wrongWord(inpt, "big.txt") # wW object is created of the wrongWord class.
wW.readDict() # Dictionary file is read.
wW.sortLen() # Dictionary file is sorted accorting to length of words.
suggestions = wW.suggWords() # Suggestions list is made.
print suggestions