|
| 1 | +from typing import List, Dict, Set |
| 2 | + |
| 3 | +from models.composer import Composer |
| 4 | +from models.definer import Definer, NotFoundException |
| 5 | +from models.reader import Reader |
| 6 | +from models.tokenizer import Tokenizer |
| 7 | +from models.middleware import Middleware |
| 8 | +from models.word import WordDefinition |
| 9 | +from models.writer import Writer |
| 10 | + |
| 11 | + |
| 12 | +class DefinitionComposer(Composer): |
| 13 | + """ |
| 14 | + Definition composer class |
| 15 | + """ |
| 16 | + |
| 17 | + def __init__(self, tokenizer: Tokenizer, middlewares: List[Middleware], definer: Definer): |
| 18 | + """ |
| 19 | + Definition composer constructor |
| 20 | + :param tokenizer: tokenizes the input |
| 21 | + :param middlewares: validates tokenized input whether it is acceptable |
| 22 | + :param definer: defines validated input words |
| 23 | + """ |
| 24 | + self.tokenizer: Tokenizer = tokenizer |
| 25 | + self.middlewares: List[Middleware] = middlewares |
| 26 | + self.definer: Definer = definer |
| 27 | + |
| 28 | + def compose(self, reader: Reader) -> Dict[str, List[WordDefinition]]: |
| 29 | + """ |
| 30 | + Compose definition |
| 31 | + """ |
| 32 | + tokenized = self.tokenizer.tokenize(reader.read()) |
| 33 | + print("number of tokens: " + str(len(tokenized))) |
| 34 | + normalized = self.__normalize_data(tokenized) |
| 35 | + print("number of tokens after normalization: " + str(len(normalized))) |
| 36 | + |
| 37 | + for middleware in self.middlewares: |
| 38 | + normalized, excluded = middleware.validate(normalized) |
| 39 | + print("{} words excluded by {}".format(excluded, middleware.__class__.__name__)) |
| 40 | + |
| 41 | + print("words remained: {}".format(len(normalized))) |
| 42 | + definitions = dict() |
| 43 | + for word in normalized: |
| 44 | + try: |
| 45 | + definitions[word] = self.definer.define(word) |
| 46 | + print("word `{word}` is defined".format(word=word)) |
| 47 | + except NotFoundException: |
| 48 | + print("word `{word}` is not found".format(word=word)) |
| 49 | + |
| 50 | + return definitions |
| 51 | + |
| 52 | + def compose_write(self, reader: Reader, writer: Writer) -> None: |
| 53 | + """ |
| 54 | + Compose definition and write to file |
| 55 | + """ |
| 56 | + writer.write(self.compose(reader)) |
| 57 | + |
| 58 | + @staticmethod |
| 59 | + def __normalize_data(tokenized: List[str]) -> Set[str]: |
| 60 | + """ |
| 61 | + Normalize data |
| 62 | + """ |
| 63 | + return set(word.lower() for word in tokenized) |
0 commit comments