Skip to content

Commit 7adde21

Browse files
committed
Add Examples
0 parents  commit 7adde21

30 files changed

+10883
-0
lines changed

__init__.py

Whitespace-only changes.

composer/__init__.py

Whitespace-only changes.

composer/definition.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from typing import List, Dict, Set
2+
3+
from models.composer import Composer
4+
from models.definer import Definer, NotFoundException
5+
from models.reader import Reader
6+
from models.tokenizer import Tokenizer
7+
from models.middleware import Middleware
8+
from models.word import WordDefinition
9+
from models.writer import Writer
10+
11+
12+
class DefinitionComposer(Composer):
13+
"""
14+
Definition composer class
15+
"""
16+
17+
def __init__(self, tokenizer: Tokenizer, middlewares: List[Middleware], definer: Definer):
18+
"""
19+
Definition composer constructor
20+
:param tokenizer: tokenizes the input
21+
:param middlewares: validates tokenized input whether it is acceptable
22+
:param definer: defines validated input words
23+
"""
24+
self.tokenizer: Tokenizer = tokenizer
25+
self.middlewares: List[Middleware] = middlewares
26+
self.definer: Definer = definer
27+
28+
def compose(self, reader: Reader) -> Dict[str, List[WordDefinition]]:
29+
"""
30+
Compose definition
31+
"""
32+
tokenized = self.tokenizer.tokenize(reader.read())
33+
print("number of tokens: " + str(len(tokenized)))
34+
normalized = self.__normalize_data(tokenized)
35+
print("number of tokens after normalization: " + str(len(normalized)))
36+
37+
for middleware in self.middlewares:
38+
normalized, excluded = middleware.validate(normalized)
39+
print("{} words excluded by {}".format(excluded, middleware.__class__.__name__))
40+
41+
print("words remained: {}".format(len(normalized)))
42+
definitions = dict()
43+
for word in normalized:
44+
try:
45+
definitions[word] = self.definer.define(word)
46+
print("word `{word}` is defined".format(word=word))
47+
except NotFoundException:
48+
print("word `{word}` is not found".format(word=word))
49+
50+
return definitions
51+
52+
def compose_write(self, reader: Reader, writer: Writer) -> None:
53+
"""
54+
Compose definition and write to file
55+
"""
56+
writer.write(self.compose(reader))
57+
58+
@staticmethod
59+
def __normalize_data(tokenized: List[str]) -> Set[str]:
60+
"""
61+
Normalize data
62+
"""
63+
return set(word.lower() for word in tokenized)

data/atomic-habits.pdf

6.23 MB
Binary file not shown.

0 commit comments

Comments
 (0)