Skip to content

Commit 96cb79e

Browse files
authored
Merge pull request #23 from dc-aichara/add_flake
Add Flake8 Code Checker
2 parents d1d5990 + 1a26af8 commit 96cb79e

File tree

5 files changed

+66
-38
lines changed

5 files changed

+66
-38
lines changed

.flake8

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[flake8]
2+
ignore = W503
3+
exclude = .git,__pycache__,build,peters_code,.ipynb_checkpoints,setup.py
4+
max-complexity = 15
5+
per-file-ignores =
6+
# imported but unused
7+
__init__.py: F401
8+
max-line-length = 80

.github/workflows/python-package.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ jobs:
3333
python -m spacy download en_core_web_sm
3434
- name: Black Code Formatter
3535
run: black . --check
36+
- name: Flake Code Checker
37+
run: |
38+
# stop the build if there are Python syntax errors or undefined names
39+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
40+
# exit-zero treats all errors as warnings.
41+
flake8 . --count --exit-zero --statistics
3642
- name: Test with pytest
3743
run: |
3844
pytest

contextualSpellCheck/contextualSpellCheck.py

100644100755
Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
import spacy
2-
import torch
3-
import editdistance
4-
from datetime import datetime
5-
import os
61
import copy
7-
import warnings, logging
2+
import logging
3+
import os
4+
import warnings
5+
from datetime import datetime
86

7+
import editdistance
8+
import spacy
9+
import torch
910
from spacy.tokens import Doc, Token, Span
1011
from spacy.vocab import Vocab
11-
1212
from transformers import AutoModelForMaskedLM, AutoTokenizer
1313

1414

@@ -43,19 +43,20 @@ def __init__(
4343
Defaults to False.
4444
"""
4545
if (
46-
(type(vocab_path) != type(""))
47-
or (type(debug) != type(True))
48-
or (type(performance) != type(True))
46+
not isinstance(vocab_path, str)
47+
or not isinstance(debug, type(True))
48+
or not isinstance(performance, type(True))
4949
):
5050
raise TypeError(
5151
"Please check datatype provided. vocab_path should be str,"
5252
" debug and performance should be bool"
5353
)
5454
try:
5555
int(float(max_edit_dist))
56-
except ValueError as identifier:
56+
except ValueError:
5757
raise ValueError(
58-
f"cannot convert {max_edit_dist} to int. Please provide a valid integer"
58+
f"cannot convert {max_edit_dist} to int. Please provide a "
59+
f"valid integer "
5960
)
6061

6162
if vocab_path != "":
@@ -182,7 +183,7 @@ def check(self, query="", spacy_model="en_core_web_sm"):
182183
(str, `Doc`): returns updated query (if no oov words then "")
183184
and updated Doc Object
184185
"""
185-
if type(query) != str and len(query) == 0:
186+
if not isinstance(query, str) and len(query) == 0:
186187
return "Invalid query, expected non empty `str` but passed", query
187188

188189
nlp = spacy.load(spacy_model, disable=["tagger", "parser"])

contextualSpellCheck/tests/test_contextualSpellCheck.py

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import pytest
22
import spacy
33
from pytest import approx
4-
import warnings, os
4+
import warnings
5+
import os
56

67
from ..contextualSpellCheck import ContextualSpellCheck
78

@@ -18,7 +19,8 @@
1819
"inputSentence, misspell",
1920
[
2021
(
21-
"Income was $9.4 million compared to the prior year of $2.7 million.",
22+
"Income was $9.4 million \
23+
compared to the prior year of $2.7 million.",
2224
[],
2325
),
2426
("who is Rajat Goel?", []),
@@ -43,8 +45,8 @@ def test_no_misspellIdentify(inputSentence, misspell):
4345
def test_type_misspellIdentify(inputSentence, misspell):
4446
print("Start type correction test for spelling mistake identification\n")
4547
doc = nlp(inputSentence)
46-
assert type(checker.misspell_identify(doc)[0]) == type(misspell)
47-
assert type(checker.misspell_identify(doc)[1]) == type(doc)
48+
assert isinstance(checker.misspell_identify(doc)[0], type(misspell))
49+
assert isinstance(checker.misspell_identify(doc)[1], type(doc))
4850
assert checker.misspell_identify(doc)[1] == doc
4951

5052

@@ -62,8 +64,8 @@ def test_identify_misspellIdentify(inputSentence, misspell):
6264
print("Start misspell word identifation test\n")
6365
doc = nlp(inputSentence)
6466
checkerReturn = checker.misspell_identify(doc)[0]
65-
assert type(checkerReturn) == list
66-
## Changed the approach after v0.1.0
67+
assert isinstance(checkerReturn, list)
68+
# Changed the approach after v0.1.0
6769
assert [tok.text_with_ws for tok in checkerReturn] == [
6870
doc[i].text_with_ws for i in misspell
6971
]
@@ -142,9 +144,9 @@ def test_skipURL_misspellIdentify(inputSentence, misspell):
142144
def test_type_candidateGenerator(inputSentence, misspell):
143145
doc = nlp(inputSentence)
144146
misspell, doc = checker.misspell_identify(doc)
145-
assert type(checker.candidate_generator(doc, misspell)) == tuple
146-
assert type(checker.candidate_generator(doc, misspell)[0]) == type(doc)
147-
assert type(checker.candidate_generator(doc, misspell)[1]) == dict
147+
assert isinstance(checker.candidate_generator(doc, misspell), tuple)
148+
assert isinstance(checker.candidate_generator(doc, misspell)[0], type(doc))
149+
assert isinstance(checker.candidate_generator(doc, misspell)[1], dict)
148150

149151

150152
@pytest.mark.parametrize(
@@ -203,7 +205,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
203205
doc = nlp(inputSentence)
204206
(misspellings, doc) = checker.misspell_identify(doc)
205207
doc, suggestions = checker.candidate_generator(doc, misspellings)
206-
## changed after v1.0 because of deepCopy creatng issue with ==
208+
# changed after v1.0 because of deepCopy creatng issue with ==
207209
# gold_suggestions = {doc[key]: value for key, value in misspell.items()}
208210
assert [tok.i for tok in suggestions] == [key for key in misspell.keys()]
209211
assert [suggString for suggString in suggestions.values()] == [
@@ -226,7 +228,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
226228
def test_extension_candidateGenerator(inputSentence, misspell):
227229
doc = nlp(inputSentence)
228230
(misspellings, doc) = checker.misspell_identify(doc)
229-
suggestions = checker.candidate_generator(doc, misspellings)
231+
checker.candidate_generator(doc, misspellings)
230232
assert doc._.performed_spellCheck == misspell
231233

232234

@@ -321,8 +323,9 @@ def test_ranking_candidateRanking(inputSentence, misspell):
321323
(misspellings, doc) = checker.misspell_identify(doc)
322324
doc, suggestions = checker.candidate_generator(doc, misspellings)
323325
selectedWord = checker.candidate_ranking(doc, suggestions)
324-
## changes made after v0.1
325-
# assert selectedWord == {doc[key]: value for key, value in misspell.items()}
326+
# changes made after v0.1
327+
# assert selectedWord ==
328+
# {doc[key]: value for key, value in misspell.items()}
326329
assert [tok.i for tok in selectedWord.keys()] == [
327330
tok for tok in misspell.keys()
328331
]
@@ -378,8 +381,8 @@ def test_doc_extensions():
378381
("%", 0.00041),
379382
],
380383
}
381-
assert doc._.contextual_spellCheck == True
382-
assert doc._.performed_spellCheck == True
384+
assert doc._.contextual_spellCheck
385+
assert doc._.performed_spellCheck
383386
# updated after v0.1
384387
assert [tok.i for tok in doc._.suggestions_spellCheck.keys()] == [
385388
tok.i for tok in gold_suggestion.keys()
@@ -422,7 +425,7 @@ def test_doc_extensions():
422425
def test_span_extensions():
423426
try:
424427
nlp.add_pipe(checker)
425-
except:
428+
except BaseException:
426429
print("contextual SpellCheck already in pipeline")
427430
doc = nlp(
428431
"Income was $9.4 milion compared to the prior year of $2.7 milion."
@@ -446,7 +449,7 @@ def test_span_extensions():
446449
doc[5]: [],
447450
}
448451

449-
assert doc[2:6]._.get_has_spellCheck == True
452+
assert doc[2:6]._.get_has_spellCheck
450453
# splitting components to make use of approx function
451454
print(doc[2:6]._.score_spellCheck)
452455
print(gold_score)
@@ -472,7 +475,8 @@ def test_span_extensions():
472475
abs=1e-4,
473476
)
474477

475-
# assert doc[2:6]._.score_spellCheck == approx(gold_score,rel=1e-4, abs=1e-4)
478+
# assert doc[2:6]._.score_spellCheck ==
479+
# approx(gold_score,rel=1e-4, abs=1e-4)
476480
nlp.remove_pipe("contextual spellchecker")
477481

478482

@@ -497,7 +501,7 @@ def test_token_extension():
497501
("USD", 0.00113),
498502
]
499503

500-
assert doc[4]._.get_require_spellCheck == True
504+
assert doc[4]._.get_require_spellCheck
501505
assert doc[4]._.get_suggestion_spellCheck == gold_suggestions
502506
# Match words and score separately to incorporate approx fn in pytest
503507
assert [word_score[0] for word_score in doc[4]._.score_spellCheck] == [
@@ -525,13 +529,14 @@ def test_warning():
525529
# warnings.simplefilter("always")
526530
# Trigger a warning.
527531

528-
assert doc[4]._.get_require_spellCheck == False
532+
assert not doc[4]._.get_require_spellCheck
529533
assert doc[4]._.get_suggestion_spellCheck == ""
530534
assert doc[4]._.score_spellCheck == []
531535
# Verify Warning
532536
assert issubclass(w[-1].category, UserWarning)
533537
assert (
534-
"Position of tokens modified by downstream element in pipeline eg. merge_entities"
538+
"Position of tokens modified by downstream \
539+
element in pipeline eg. merge_entities"
535540
in str(w[-1].message)
536541
)
537542

@@ -546,14 +551,16 @@ def test_warning():
546551
ContextualSpellCheck(vocab_path=True)
547552
assert (
548553
e
549-
== "Please check datatype provided. vocab_path should be str, debug and performance should be bool"
554+
== "Please check datatype provided. \
555+
vocab_path should be str, debug and performance should be bool"
550556
)
551557
max_edit_distance = "non_int_or_float"
552558
with pytest.raises(ValueError) as e:
553559
ContextualSpellCheck(max_edit_dist=max_edit_distance)
554560
assert (
555561
e
556-
== f"cannot convert {max_edit_distance} to int. Please provide a valid integer"
562+
== f"cannot convert {max_edit_distance} to int. \
563+
Please provide a valid integer"
557564
)
558565

559566
try:
@@ -582,8 +589,10 @@ def test_bert_model_name():
582589
model_name = "a_random_model"
583590
error_message = (
584591
f"Can't load config for '{model_name}'. Make sure that:\n\n"
585-
f"- '{model_name}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
586-
f"- or '{model_name}' is the correct path to a directory containing a config.json file\n\n"
592+
f"- '{model_name}' is a correct model identifier listed on \
593+
'https://huggingface.co/models'\n\n"
594+
f"- or '{model_name}' is the correct path to a directory \
595+
containing a config.json file\n\n"
587596
)
588597

589598
with pytest.raises(OSError) as e:

requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,7 @@ mecab-python3==0.996.5
1010
ipadic==1.0.0
1111
unidic-lite==1.0.6
1212

13+
# Code formatting
14+
flake8==3.8.3
15+
black==20.8b1
16+

0 commit comments

Comments
 (0)