-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_lexicons.py
More file actions
executable file
·25 lines (22 loc) · 1.24 KB
/
generate_lexicons.py
File metadata and controls
executable file
·25 lines (22 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from collections import OrderedDict, Counter
import numpy as np
import feat_gen
import glob
def gen():
# filenames = ['firstname.5k', 'lastname.5000', 'location', 'location.country', 'cap.1000', 'automotive.make', 'automotive.model', 'book.newspaper',
# 'broadcast.tv_channel', 'business.brand', 'business.consumer_company', 'business.consumer_product', 'government.government_agency', 'sports.sports_team', 'time.holiday', 'lower.10000']
# filenames = ['data/lexicon/' + filename for filename in filenames]
filenames = Counter(glob.glob("data/lexicon/*"))
for name in ['cap.10', 'cap.100', 'cap.500', 'firstname.10', 'firstname.100', 'firstname.500', 'firstname.1000', 'lastname.10', 'lastname.100','lastname.500', 'lastname.1000', 'lower.100', 'lower.500', 'lower.1000', 'lower.5000', 'dictionaries.conf']:
key = 'data/lexicon/' + name
del filenames[key]
filenames = filenames.keys()
for filename in filenames:
vars()[filename[13:]] = OrderedDict()
f = open(filename, "r")
words = f.readlines()
for word in words:
vars()[filename[13:]][word.strip().lower()] = 1
np.save("lexicon_features/" + filename[13:], vars()[filename[13:]])
if __name__ == "__main__":
gen()