Skip to content

Commit 1cf4629

Browse files
committed
use black
1 parent 0fb328a commit 1cf4629

File tree

11 files changed

+1035
-1033
lines changed

11 files changed

+1035
-1033
lines changed

.flake8

Lines changed: 0 additions & 9 deletions
This file was deleted.

.github/workflows/python-app.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ jobs:
2323
- name: Install dependencies
2424
run: |
2525
python -m pip install --upgrade pip
26-
pip install flake8 pytest
26+
pip install black pytest
2727
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28-
- name: Lint with flake8
28+
- name: Autoformat with black
2929
run: |
30-
flake8
30+
black .
3131
- name: Test with pytest
3232
run: |
3333
pytest -s

.pre-commit-config.yaml

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,15 @@ repos:
2020
- id: check-merge-conflict
2121
- id: check-yaml
2222
- id: debug-statements
23-
- id: end-of-file-fixer
24-
- id: mixed-line-ending
25-
files: \.(py|md)$
26-
args: [--fix=lf]
2723
- id: requirements-txt-fixer
28-
- id: trailing-whitespace
2924

3025
- repo: https://github.com/asottile/pyupgrade
3126
rev: v2.7.2
3227
hooks:
3328
- id: pyupgrade
3429
args: [--py36-plus]
3530

36-
- repo: https://gitlab.com/pycqa/flake8
37-
rev: 3.8.4
38-
hooks:
39-
- id: flake8
31+
# - repo: https://github.com/ambv/black
32+
# rev: stable
33+
# hooks:
34+
# - id: black

CONTRIBUTING.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
# All contributions
2-
Keep the code clean with `flake8`. Use `pre-commit` to ensure the checks are executed automatically upon commit. To set up `pre-commit`:
2+
Use `pre-commit` to ensure the checks are executed automatically upon commit. To set up `pre-commit`:
33
```
44
pip install pre-commit
55
pre-commit install
66
```
77

8+
Code style: `black`
9+
810
# When changing autocorrection logic
911
Run this command before and after changes, and paste the output in the comments:
1012
```

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
[![Downloads](https://pepy.tech/badge/autocorrect?label=PyPI%20downloads)](https://pepy.tech/project/autocorrect)
44
[![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/fsondej/autocorrect.svg)](http://isitmaintained.com/project/fsondej/autocorrect "Average time to resolve an issue")
55
[![Percentage of issues still open](http://isitmaintained.com/badge/open/fsondej/autocorrect.svg)](http://isitmaintained.com/project/fsondej/autocorrect "Percentage of issues still open")
6+
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
67

78
Spelling corrector in python. Currently supports English, Polish, Turkish, Russian, Ukrainian, Czech, Portuguese and Spanish, but you can easily add new languages.
89

autocorrect/__init__.py

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,42 +10,44 @@
1010
from autocorrect.constants import word_regexes, urls
1111
from autocorrect.typos import Word
1212

13-
1413
PATH = os.path.abspath(os.path.dirname(__file__))
1514

1615

1716
# credit: https://stackoverflow.com/questions/43370284
1817
class ProgressBar:
1918
def __init__(self):
2019
self.old_percent = 0
21-
print('_' * 50)
20+
print("_" * 50)
2221

2322
def download_progress_hook(self, count, blockSize, totalSize):
2423
percent = int(count * blockSize * 100 / totalSize)
2524
if percent >= 2 + self.old_percent:
2625
self.old_percent = percent
2726
# print(percent, '%')
28-
print('>', end='')
27+
print(">", end="")
2928
sys.stdout.flush()
3029
if percent == 100:
31-
print('\ndone!')
30+
print("\ndone!")
3231

3332

34-
def load_from_tar(lang, file_name='word_count.json'):
33+
def load_from_tar(lang, file_name="word_count.json"):
3534
archive_name = os.path.join(PATH, f"data/{lang}.tar.gz")
3635

3736
if lang not in word_regexes:
38-
supported_langs = ', '.join(word_regexes.keys())
37+
supported_langs = ", ".join(word_regexes.keys())
3938
raise NotImplementedError(
40-
textwrap.dedent(f"""
39+
textwrap.dedent(
40+
f"""
4141
language '{lang}' not supported
4242
supported languages: {supported_langs}
4343
you can easily add new languages by following instructions at
4444
https://github.com/fsondej/autocorrect/tree/master#adding-new-languages
45-
"""))
45+
"""
46+
)
47+
)
4648

4749
if not os.path.isfile(archive_name):
48-
print('dictionary for this language not found, downloading...')
50+
print("dictionary for this language not found, downloading...")
4951
for url in urls[lang]:
5052
progress = ProgressBar()
5153
try:
@@ -57,50 +59,49 @@ def load_from_tar(lang, file_name='word_count.json'):
5759
error_message = str(ex)
5860
if error_message is not None:
5961
raise ConnectionError(
60-
error_message +
61-
'\nFix your network connection, or manually download \n{}'
62-
'\nand put it in \nPATH_TO_REPO/autocorrect/data/'.format(url))
62+
error_message
63+
+ "\nFix your network connection, or manually download \n{}"
64+
"\nand put it in \nPATH_TO_REPO/autocorrect/data/".format(url)
65+
)
6366

64-
with closing(tarfile.open(archive_name, 'r:gz')) as tarf:
67+
with closing(tarfile.open(archive_name, "r:gz")) as tarf:
6568
with closing(tarf.extractfile(file_name)) as file:
6669
return json.load(file)
6770

6871

6972
class Speller:
70-
def __init__(self, lang='en', threshold=0, nlp_data=None, fast=False):
73+
def __init__(self, lang="en", threshold=0, nlp_data=None, fast=False):
7174
self.lang = lang
7275
self.threshold = threshold
7376
self.nlp_data = load_from_tar(lang) if nlp_data is None else nlp_data
7477
self.fast = fast
7578

7679
if threshold > 0:
7780
# print(f'Original number of words: {len(self.nlp_data)}')
78-
self.nlp_data = {k: v for k, v in self.nlp_data.items()
79-
if v >= threshold}
81+
self.nlp_data = {k: v for k, v in self.nlp_data.items() if v >= threshold}
8082
# print(f'After applying threshold: {len(self.nlp_data)}')
8183

8284
def existing(self, words):
8385
"""{'the', 'teh'} => {'the'}"""
84-
return {word for word in words
85-
if word in self.nlp_data}
86+
return {word for word in words if word in self.nlp_data}
8687

8788
def get_candidates(self, word):
8889
w = Word(word, self.lang)
8990
if self.fast:
90-
candidates = (self.existing([word]) or
91-
self.existing(w.typos()) or
92-
[word])
91+
candidates = self.existing([word]) or self.existing(w.typos()) or [word]
9392
else:
94-
candidates = (self.existing([word]) or
95-
self.existing(w.typos()) or
96-
self.existing(w.double_typos()) or
97-
[word])
93+
candidates = (
94+
self.existing([word])
95+
or self.existing(w.typos())
96+
or self.existing(w.double_typos())
97+
or [word]
98+
)
9899
return [(self.nlp_data.get(c, 0), c) for c in candidates]
99100

100101
def autocorrect_word(self, word):
101102
"""most likely correction for everything up to a double typo"""
102-
if word == '':
103-
return ''
103+
if word == "":
104+
return ""
104105

105106
candidates = self.get_candidates(word)
106107

@@ -116,9 +117,11 @@ def autocorrect_word(self, word):
116117
return best_word
117118

118119
def autocorrect_sentence(self, sentence):
119-
return re.sub(word_regexes[self.lang],
120-
lambda match: self.autocorrect_word(match.group(0)),
121-
sentence)
120+
return re.sub(
121+
word_regexes[self.lang],
122+
lambda match: self.autocorrect_word(match.group(0)),
123+
sentence,
124+
)
122125

123126
__call__ = autocorrect_sentence
124127

@@ -129,8 +132,10 @@ def __init__(self):
129132
self.speller = None
130133

131134
def __call__(self, sentence):
132-
print('autocorrect.spell is deprecated, \
133-
use autocorrect.Speller instead')
135+
print(
136+
"autocorrect.spell is deprecated, \
137+
use autocorrect.Speller instead"
138+
)
134139
if self.speller is None:
135140
self.speller = Speller()
136141
return self.speller(sentence)

autocorrect/constants.py

Lines changed: 47 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,55 @@
11
word_regexes = {
2-
'en': r'[A-Za-z]+',
3-
'pl': r'[A-Za-zęĘóÓąĄśŚłŁżŻźŹćĆńŃ]+',
4-
'ru': r'[АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя]+',
5-
'uk': r'[АаБбВвГ㥴ДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬЮюЯя]+',
6-
'tr': r'[a-zA-ZçÇğĞüÜöÖşŞıİ]+',
7-
'es': r'[A-Za-zÁáÉéÍíÓóÚúÜüÑñ]+',
8-
'pt': r'[a-zA-ZãáàâçéêíõóôúüÃÁÀÂÇÉÊÍÕÓÔÚÜ]+',
9-
'cs': r'[AÁBCČDĎEÉĚFGH(Ch)IÍJKLMNŇOÓPQRŘSŠTŤUÚŮVWXYÝZŽaábcčdďeéěfgh(ch)iíjklmnňoópqrřsštťuúůvwxyýzž]+',
2+
"en": r"[A-Za-z]+",
3+
"pl": r"[A-Za-zęĘóÓąĄśŚłŁżŻźŹćĆńŃ]+",
4+
"ru": r"[АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя]+",
5+
"uk": r"[АаБбВвГ㥴ДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬЮюЯя]+",
6+
"tr": r"[a-zA-ZçÇğĞüÜöÖşŞıİ]+",
7+
"es": r"[A-Za-zÁáÉéÍíÓóÚúÜüÑñ]+",
8+
"pt": r"[a-zA-ZãáàâçéêíõóôúüÃÁÀÂÇÉÊÍÕÓÔÚÜ]+",
9+
"cs": r"[AÁBCČDĎEÉĚFGH(Ch)IÍJKLMNŇOÓPQRŘSŠTŤUÚŮVWXYÝZŽaábcčdďeéěfgh(ch)iíjklmnňoópqrřsštťuúůvwxyýzž]+",
1010
}
1111

12-
1312
alphabets = {
14-
'en': 'abcdefghijklmnopqrstuvwxyz',
15-
'pl': 'abcdefghijklmnopqrstuvwxyzęóąśłżźćń',
16-
'ru': 'шиюынжсяплзухтвкйеобмцьёгдщэарчфъ',
17-
'uk': 'фагксщроємшплуьцнжхїйювязтибґідеч',
18-
'tr': 'abcçdefgğhıijklmnoöprsştuüvyzqwxÇĞİÜÖ',
19-
'es': 'abcdefghijklmnopqrstuvwxyzáéíóúüñ',
20-
'pt': 'abcdefghijklmnopqrstuvwxyzãáàâçéêíõóôúü',
21-
'cs': 'aábcčdďeéěfgh(ch)iíjklmnňoópqrřsštťuúůvwxyýzž',
13+
"en": "abcdefghijklmnopqrstuvwxyz",
14+
"pl": "abcdefghijklmnopqrstuvwxyzęóąśłżźćń",
15+
"ru": "шиюынжсяплзухтвкйеобмцьёгдщэарчфъ",
16+
"uk": "фагксщроємшплуьцнжхїйювязтибґідеч",
17+
"tr": "abcçdefgğhıijklmnoöprsştuüvyzqwxÇĞİÜÖ",
18+
"es": "abcdefghijklmnopqrstuvwxyzáéíóúüñ",
19+
"pt": "abcdefghijklmnopqrstuvwxyzãáàâçéêíõóôúü",
20+
"cs": "aábcčdďeéěfgh(ch)iíjklmnňoópqrřsštťuúůvwxyýzž",
2221
}
2322

2423
urls = {
25-
'en': [
26-
'https://dl.dropboxusercontent.com/s/grxjmtw4db814g1/en.tar.gz?dl=0'],
27-
'pl': [
28-
'https://dl.dropboxusercontent.com/s/40orabi1l3dfqpp/pl.tar.gz?dl=0'],
29-
'ru': [
30-
'https://dl.dropboxusercontent.com/s/mpas7xqn8yl3wej/ru.tar.gz?dl=0',
31-
'https://dl.dropboxusercontent.com/s/6tzfxy34xx34mm7/ru.tar.gz?dl=0',
32-
'https://siasky.net/AABaSQMcxgHp7LJ-YHs1IWqn4uxa8q17fGET-IaNbGgSnQ'],
33-
'uk': [
34-
'https://dl.dropboxusercontent.com/s/s64ot0l4lj3a0ec/uk.tar.gz?dl=0',
35-
'https://dl.dropboxusercontent.com/s/b76p4sc1lld96lw/uk.tar.gz?dl=0',
36-
'https://siasky.net/AADdpauxvMwjieU3n5qaMbjAeCYR9T-vK6L1OEXRTOgr6g'],
37-
'tr': [
38-
'https://dl.dropboxusercontent.com/s/mj2d3t158ucwhwx/tr.tar.gz?dl=0',
39-
'https://dl.dropboxusercontent.com/s/1wy01nq5fpq8iay/tr.tar.gz?dl=0',
40-
'https://siasky.net/AABWRhJ-7NVoo2vaTgSs6HNhwGmFgCgYzg0q_0d-eqgCeA'],
41-
'es': [
42-
'https://dl.dropboxusercontent.com/s/jh0212sou1qbs7t/es.tar.gz?dl=0',
43-
'https://dl.dropboxusercontent.com/s/k6g5vj3x0rx7mjz/es.tar.gz?dl=0',
44-
'https://siasky.net/_ArsYbh-vpFWosvzEuQQZnPrOt2XggjDQfkvDwTFu5MQoA'],
45-
'cs': [
46-
'https://dl.dropboxusercontent.com/s/8ptuuh8kcr3kufy/cs.tar.gz?dl=0',
47-
'https://dl.dropboxusercontent.com/s/369wplqb0w2ax21/cs.tar.gz?dl=0',
48-
'https://siasky.net/AAC6lW1ShlSRUeiFnr4_2bmw6sznlZsWvmhDhyQy_-g2wA'],
49-
'pt': [
50-
'https://dl.dropboxusercontent.com/s/6xnko882tsjgeaw/pt.tar.gz?dl=0',
51-
'https://siasky.net/PAOmY66v3ggXpqNtbHQU_hb7ARNOL_Lv3LcTwFMMWmdzVw'],
24+
"en": ["https://dl.dropboxusercontent.com/s/grxjmtw4db814g1/en.tar.gz?dl=0"],
25+
"pl": ["https://dl.dropboxusercontent.com/s/40orabi1l3dfqpp/pl.tar.gz?dl=0"],
26+
"ru": [
27+
"https://dl.dropboxusercontent.com/s/mpas7xqn8yl3wej/ru.tar.gz?dl=0",
28+
"https://dl.dropboxusercontent.com/s/6tzfxy34xx34mm7/ru.tar.gz?dl=0",
29+
"https://siasky.net/AABaSQMcxgHp7LJ-YHs1IWqn4uxa8q17fGET-IaNbGgSnQ",
30+
],
31+
"uk": [
32+
"https://dl.dropboxusercontent.com/s/s64ot0l4lj3a0ec/uk.tar.gz?dl=0",
33+
"https://dl.dropboxusercontent.com/s/b76p4sc1lld96lw/uk.tar.gz?dl=0",
34+
"https://siasky.net/AADdpauxvMwjieU3n5qaMbjAeCYR9T-vK6L1OEXRTOgr6g",
35+
],
36+
"tr": [
37+
"https://dl.dropboxusercontent.com/s/mj2d3t158ucwhwx/tr.tar.gz?dl=0",
38+
"https://dl.dropboxusercontent.com/s/1wy01nq5fpq8iay/tr.tar.gz?dl=0",
39+
"https://siasky.net/AABWRhJ-7NVoo2vaTgSs6HNhwGmFgCgYzg0q_0d-eqgCeA",
40+
],
41+
"es": [
42+
"https://dl.dropboxusercontent.com/s/jh0212sou1qbs7t/es.tar.gz?dl=0",
43+
"https://dl.dropboxusercontent.com/s/k6g5vj3x0rx7mjz/es.tar.gz?dl=0",
44+
"https://siasky.net/_ArsYbh-vpFWosvzEuQQZnPrOt2XggjDQfkvDwTFu5MQoA",
45+
],
46+
"cs": [
47+
"https://dl.dropboxusercontent.com/s/8ptuuh8kcr3kufy/cs.tar.gz?dl=0",
48+
"https://dl.dropboxusercontent.com/s/369wplqb0w2ax21/cs.tar.gz?dl=0",
49+
"https://siasky.net/AAC6lW1ShlSRUeiFnr4_2bmw6sznlZsWvmhDhyQy_-g2wA",
50+
],
51+
"pt": [
52+
"https://dl.dropboxusercontent.com/s/6xnko882tsjgeaw/pt.tar.gz?dl=0",
53+
"https://siasky.net/PAOmY66v3ggXpqNtbHQU_hb7ARNOL_Lv3LcTwFMMWmdzVw",
54+
],
5255
}

autocorrect/typos.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@
2121

2222
class Word:
2323
"""container for word-based methods"""
24-
__slots__ = ['slices', 'word', 'alphabet'] # optimization
2524

26-
def __init__(self, word, lang='en'):
25+
__slots__ = ["slices", "word", "alphabet"] # optimization
26+
27+
def __init__(self, word, lang="en"):
2728
"""
2829
Generate slices to assist with typo
2930
definitions.
@@ -33,41 +34,38 @@ def __init__(self, word, lang='en'):
3334
3435
"""
3536
slice_range = range(len(word) + 1)
36-
self.slices = tuple((word[:i], word[i:])
37-
for i in slice_range)
37+
self.slices = tuple((word[:i], word[i:]) for i in slice_range)
3838
self.word = word
3939
self.alphabet = alphabets[lang]
4040

4141
def _deletes(self):
4242
"""th"""
4343
for a, b in self.slices[:-1]:
44-
yield ''.join((a, b[1:]))
44+
yield "".join((a, b[1:]))
4545

4646
def _transposes(self):
4747
"""teh"""
4848
for a, b in self.slices[:-2]:
49-
yield ''.join((a, b[1], b[0], b[2:]))
49+
yield "".join((a, b[1], b[0], b[2:]))
5050

5151
def _replaces(self):
5252
"""tge"""
5353
for a, b in self.slices[:-1]:
5454
for c in self.alphabet:
55-
yield ''.join((a, c, b[1:]))
55+
yield "".join((a, c, b[1:]))
5656

5757
def _inserts(self):
5858
"""thwe"""
5959
for a, b in self.slices:
6060
for c in self.alphabet:
61-
yield ''.join((a, c, b))
61+
yield "".join((a, c, b))
6262

6363
def typos(self):
6464
"""letter combinations one typo away from word"""
65-
return chain(self._deletes(),
66-
self._transposes(),
67-
self._replaces(),
68-
self._inserts())
65+
return chain(
66+
self._deletes(), self._transposes(), self._replaces(), self._inserts()
67+
)
6968

7069
def double_typos(self):
7170
"""letter combinations two typos away from word"""
72-
return chain.from_iterable(
73-
Word(e1).typos() for e1 in self.typos())
71+
return chain.from_iterable(Word(e1).typos() for e1 in self.typos())

0 commit comments

Comments
 (0)