Skip to content

Commit 521e46a

Browse files
committed
Make explain command filters more robust
1 parent 46ce1b1 commit 521e46a

File tree

2 files changed

+48
-5
lines changed

2 files changed

+48
-5
lines changed

explain.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def get_arguments() -> argparse.Namespace:
3939
'-ot', '--output-type', help='the output file type, Default = json', type=str,
4040
choices=['json', 'csv'], default='json'
4141
)
42+
parser.add_argument('--csv-write-header', help='write header in csv output', action='store_true')
4243
parser.add_argument(
4344
'--cefr-level', help='minimum word\'s cefr level to consider, default = B1', type=str,
4445
choices=['A1', 'A2', 'B1', 'B2', 'C1', 'C2'], default='B1'
@@ -50,6 +51,24 @@ def get_arguments() -> argparse.Namespace:
5051
parser.add_argument(
5152
'-v', '--verbose', help='verbose mode', default=False, action='store_true'
5253
)
54+
55+
parser.add_argument(
56+
'--disable-meaningful-words-filter',
57+
help='by default we try to filter out meaningless words, this option disable it',
58+
action='store_true'
59+
)
60+
61+
parser.add_argument(
62+
'--char-limiter-min-length',
63+
help='minimum length of the word to consider, default = 3',
64+
type=int, default=3
65+
)
66+
67+
parser.add_argument(
68+
'--disable-char-limiter-filter',
69+
help='by default we try to filter out words with too few characters, this option disable it',
70+
action='store_true'
71+
)
5372
args = parser.parse_args()
5473
return args
5574

@@ -89,13 +108,16 @@ def get_tokenizer(_: argparse.Namespace) -> Tokenizer:
89108

90109

91110
def get_middlewares(args: argparse.Namespace) -> List[Middleware]:
92-
return [
93-
Number(),
94-
CharLengthValidator(min_length=3),
95-
MeaningfulWords(),
96-
CEFRLimiter(min_cefr=args.cefr_level, filter_unknowns=args.ignore_unknown_cefr)
111+
rules = [
112+
(Number(), True),
113+
(CharLengthValidator(min_length=args.char_limiter_min_length), not args.disable_char_limiter_filter),
114+
(MeaningfulWords(), not args.disable_meaningful_words_filter),
115+
(CEFRLimiter(min_cefr=args.cefr_level, filter_unknowns=not args.ignore_unknown_cefr),
116+
not args.disable_meaningful_words_filter)
97117
]
98118

119+
return list(rule[0] for rule in rules if rule[1])
120+
99121

100122
def get_definer(_: argparse.Namespace) -> Definer:
101123
return MultiSourceDefinerWithStorage(

readers/voice_speech_recognition.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from speech_recognition import AudioData, Recognizer
2+
3+
from models.reader import Reader
4+
import speech_recognition as sr
5+
6+
7+
class VoiceRecognition(Reader):
8+
def __init__(self, file_name: str):
9+
super().__init__()
10+
self.__file_name: str = file_name
11+
self.__r: Recognizer = sr.Recognizer()
12+
13+
def read(self) -> str:
14+
with sr.AudioFile(open(self.__file_name,'rb')) as audio_file:
15+
print("reading audio file...")
16+
audio_data = self.__r.record(audio_file)
17+
print("extracting text from audio file...")
18+
return self.__recognize(audio_data)
19+
20+
def __recognize(self, audio_data: AudioData) -> str:
21+
return self.__r.recognize_google(audio_data)

0 commit comments

Comments
 (0)