forked from MLOPEZMALET/Lexicology2019
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvector1.solution.py
64 lines (45 loc) · 1.5 KB
/
vector1.solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import sys
import re
import math
import numpy as np
def normalize(word):
return re.sub(r"[\.\,\?\:\;\"\'\-]",r"",word.lower())
# la liste de mots w et c:
vocabulary = []
f = open("words.lst", "r")
for line in f:
w = (line.rstrip())
vocabulary.append(w)
f.close
# definir les dimensions de la matrice M à partir de vocabulary
# initialiser la matrice numpy, avec des valeurs 0 :
M = np.zeros([len(vocabulary),len(vocabulary)])
def main():
f = open(sys.argv[1], "r")
for line in f:
words = line.split()
for k in range(len(words)):
if normalize(words[k]) in vocabulary:
for i in range(k-5, k+6):
if i == k:
i += 1
else:
if i >= 0 and i <= len(words)-1:
if normalize(words[i]) in vocabulary:
M[vocabulary.index(normalize(words[k])), vocabulary.index(normalize(words[i]))] += 1
# print (normalize(words[k]) + ' ' + normalize(words[i]) + ' ' + str(M[vocabulary.index(normalize(words[k])), vocabulary.index(normalize(words[i]))]))
# print(M)
user = input("Entrez deux mots: ")
ws = user.split()
try:
print(M[vocabulary.index(normalize(ws[0])),vocabulary.index(normalize(ws[1]))])
except ValueError:
print("Je ne trouve pas ce mot.")
N = np.zeros(M.shape)
N2 = np.zeros(M.shape)
for i in range (M.shape(0)):
N[i] = M[i] / np.sum(M,axis=1)[i]
for i in range (M.shape(1)):
N[:,i] = M[:,i] / np.linalg.norm(M,axis=0)[i]
if __name__ == '__main__':
main()