-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake.py
42 lines (35 loc) · 1.21 KB
/
make.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import nltk
from nltk.corpus import wordnet as wn
from tqdm import tqdm
import re
if __name__ == "__main__":
nltk.download("wordnet")
nltk.download("omw-1.4")
only_letters = re.compile(r"^[a-zA-Z]+$")
assert only_letters.match("apple") is not None
assert only_letters.match("apple_2") is None
words = {"n": set(), "v": set(), "a": set()}
for category in words.keys():
for n in tqdm(list(wn.all_synsets(category))):
opts = n.lemma_names()
for opt in opts:
if (
only_letters.match(opt)
and len(opt) > 3
and len(opt) < 7
and opt.islower()
):
words[category].add(opt)
words = {cat: sorted(list(ws)) for cat, ws in words.items()}
for cat, ws in words.items():
print(cat, len(ws))
print(f"{9698 * 3644 * 2872:_} possibilities")
with open("nouns.txt", "w") as f:
for w in words["n"]:
f.write(w + "\n")
with open("adjectives.txt", "w") as f:
for w in words["a"]:
f.write(w + "\n")
with open("verbs.txt", "w") as f:
for w in words["v"]:
f.write(w + "\n")