-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfind_fwords.py
124 lines (89 loc) · 3 KB
/
find_fwords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def extractType(line):
resultlist = []
sw = line.split('|')
for s in sw:
if s != sw[0] and s != sw[1] and s != sw[2]:
slist = s.split('\t')
result = slist[len(slist) - 1].strip()
resultlist.append(result)
return resultlist
def createLexs(flist):
#Create two dicts
cc_bliss = {}
cc_swe = {}
cc_aras = {}
for line in flist:
line = line.strip()
if line != '':
things = line.split('\t')
cc = things[0].split('||')[0].strip()
#Swedish word(s)
if things[1] == '6,0 | 6':
swedish = extractType(line)
if cc_swe.has_key(cc):
cc_swe[cc].append(swedish)
else:
cc_swe[cc] = [swedish]
#Bliss file(s)
if things[1] == '5,0 | 5':
bliss = extractType(line)
if cc_bliss.has_key(cc):
cc_bliss[cc].append(bliss)
else:
cc_bliss[cc] = [bliss]
#Aras file(s)
if things[1] == '20,0 | 20':
aras = extractType(line)
if cc_aras.has_key(cc):
cc_aras[cc].append(aras)
else:
cc_aras[cc] = [aras]
return cc_swe, cc_bliss, cc_aras
def matchTypes(cc_swe, cc_bliss, cc_aras):
swe_bliss = {}
swe_aras = {}
for k in cc_swe.keys():
swevalue = []
for x in cc_swe[k]:
for y in x:
swevalue.append(y)
if cc_bliss.has_key(k):
blissvalue = []
for x in cc_bliss[k]:
for y in x:
blissvalue.append(y)
for i in swevalue:
if swe_bliss.has_key(i):
for b in blissvalue:
swe_bliss[i].append(b)
else:
swe_bliss[i] = []
for b in blissvalue:
swe_bliss[i].append(b)
if cc_aras.has_key(k):
arasvalue = []
for x in cc_aras[k]:
for y in x:
arasvalue.append(y)
for i in swevalue:
if swe_aras.has_key(i):
for b in arasvalue:
swe_aras[i].append(b)
else:
swe_aras[i] = []
for b in arasvalue:
swe_aras[i].append(b)
return swe_bliss, swe_aras
flist = open('merged_cro_120702.txt', 'r')
swelex, blisslex, araslex = createLexs(flist)
swebliss, swearas = matchTypes(swelex, blisslex, araslex)
def wordToBliss(word):
return swebliss.get(word)
def translate(word, lang="bliss"):
if lang == "bliss":
return swebliss.get(word)
if lang == "aras":
return swearas.get(word)
if __name__ == "__main__":
wordlist = ['hur.vb.01', 'var.vb.01', 'mig.nn.03', 'sig.pnm.02', 'eller.prep.02', 'endera.vb.01']
#OBS! For the Wordnet-CCF look-up, perhaps it would be better if we used my files as a lexicon instead, looking for wordnet index nr and extracting the ccf?