Merge pull request #23 from dc-aichara/add_flake

R1j1t · web-flow · commit 96cb79e968c3 · 2020-09-22T11:11:16.000+05:30
Add Flake8 Code Checker
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,8 @@
+[flake8]
+ignore = W503
+exclude = .git,__pycache__,build,peters_code,.ipynb_checkpoints,setup.py
+max-complexity = 15
+per-file-ignores =
+    # imported but unused
+    __init__.py: F401
+max-line-length = 80
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -33,6 +33,12 @@ jobs:
         python -m spacy download en_core_web_sm
     - name: Black Code Formatter
       run: black . --check
+    - name: Flake Code Checker
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings.
+        flake8 . --count --exit-zero --statistics
     - name: Test with pytest
       run: |
         pytest
diff --git a/contextualSpellCheck/contextualSpellCheck.py b/contextualSpellCheck/contextualSpellCheck.py
@@ -1,14 +1,14 @@
-import spacy
-import torch
-import editdistance
-from datetime import datetime
-import os
 import copy
-import warnings, logging
+import logging
+import os
+import warnings
+from datetime import datetime
 
+import editdistance
+import spacy
+import torch
 from spacy.tokens import Doc, Token, Span
 from spacy.vocab import Vocab
-
 from transformers import AutoModelForMaskedLM, AutoTokenizer
 
 
@@ -43,19 +43,20 @@ def __init__(
                                           Defaults to False.
         """
         if (
-            (type(vocab_path) != type(""))
-            or (type(debug) != type(True))
-            or (type(performance) != type(True))
+            not isinstance(vocab_path, str)
+            or not isinstance(debug, type(True))
+            or not isinstance(performance, type(True))
         ):
             raise TypeError(
                 "Please check datatype provided. vocab_path should be str,"
                 " debug and performance should be bool"
             )
         try:
             int(float(max_edit_dist))
-        except ValueError as identifier:
+        except ValueError:
             raise ValueError(
-                f"cannot convert {max_edit_dist} to int. Please provide a valid integer"
+                f"cannot convert {max_edit_dist} to int. Please provide a "
+                f"valid integer "
             )
 
         if vocab_path != "":
@@ -182,7 +183,7 @@ def check(self, query="", spacy_model="en_core_web_sm"):
             (str, `Doc`): returns updated query (if no oov words then "")
                           and updated Doc Object
         """
-        if type(query) != str and len(query) == 0:
+        if not isinstance(query, str) and len(query) == 0:
             return "Invalid query, expected non empty `str` but passed", query
 
         nlp = spacy.load(spacy_model, disable=["tagger", "parser"])
diff --git a/contextualSpellCheck/tests/test_contextualSpellCheck.py b/contextualSpellCheck/tests/test_contextualSpellCheck.py
@@ -1,7 +1,8 @@
 import pytest
 import spacy
 from pytest import approx
-import warnings, os
+import warnings
+import os
 
 from ..contextualSpellCheck import ContextualSpellCheck
 
@@ -18,7 +19,8 @@
     "inputSentence, misspell",
     [
         (
-            "Income was $9.4 million compared to the prior year of $2.7 million.",
+            "Income was $9.4 million \
+compared to the prior year of $2.7 million.",
             [],
         ),
         ("who is Rajat Goel?", []),
@@ -43,8 +45,8 @@ def test_no_misspellIdentify(inputSentence, misspell):
 def test_type_misspellIdentify(inputSentence, misspell):
     print("Start type correction test for spelling mistake identification\n")
     doc = nlp(inputSentence)
-    assert type(checker.misspell_identify(doc)[0]) == type(misspell)
-    assert type(checker.misspell_identify(doc)[1]) == type(doc)
+    assert isinstance(checker.misspell_identify(doc)[0], type(misspell))
+    assert isinstance(checker.misspell_identify(doc)[1], type(doc))
     assert checker.misspell_identify(doc)[1] == doc
 
 
@@ -62,8 +64,8 @@ def test_identify_misspellIdentify(inputSentence, misspell):
     print("Start misspell word identifation test\n")
     doc = nlp(inputSentence)
     checkerReturn = checker.misspell_identify(doc)[0]
-    assert type(checkerReturn) == list
-    ## Changed the approach after v0.1.0
+    assert isinstance(checkerReturn, list)
+    # Changed the approach after v0.1.0
     assert [tok.text_with_ws for tok in checkerReturn] == [
         doc[i].text_with_ws for i in misspell
     ]
@@ -142,9 +144,9 @@ def test_skipURL_misspellIdentify(inputSentence, misspell):
 def test_type_candidateGenerator(inputSentence, misspell):
     doc = nlp(inputSentence)
     misspell, doc = checker.misspell_identify(doc)
-    assert type(checker.candidate_generator(doc, misspell)) == tuple
-    assert type(checker.candidate_generator(doc, misspell)[0]) == type(doc)
-    assert type(checker.candidate_generator(doc, misspell)[1]) == dict
+    assert isinstance(checker.candidate_generator(doc, misspell), tuple)
+    assert isinstance(checker.candidate_generator(doc, misspell)[0], type(doc))
+    assert isinstance(checker.candidate_generator(doc, misspell)[1], dict)
 
 
 @pytest.mark.parametrize(
@@ -203,7 +205,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
     doc = nlp(inputSentence)
     (misspellings, doc) = checker.misspell_identify(doc)
     doc, suggestions = checker.candidate_generator(doc, misspellings)
-    ## changed after v1.0 because of deepCopy creatng issue with ==
+    # changed after v1.0 because of deepCopy creatng issue with ==
     # gold_suggestions = {doc[key]: value for key, value in misspell.items()}
     assert [tok.i for tok in suggestions] == [key for key in misspell.keys()]
     assert [suggString for suggString in suggestions.values()] == [
@@ -226,7 +228,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
 def test_extension_candidateGenerator(inputSentence, misspell):
     doc = nlp(inputSentence)
     (misspellings, doc) = checker.misspell_identify(doc)
-    suggestions = checker.candidate_generator(doc, misspellings)
+    checker.candidate_generator(doc, misspellings)
     assert doc._.performed_spellCheck == misspell
 
 
@@ -321,8 +323,9 @@ def test_ranking_candidateRanking(inputSentence, misspell):
     (misspellings, doc) = checker.misspell_identify(doc)
     doc, suggestions = checker.candidate_generator(doc, misspellings)
     selectedWord = checker.candidate_ranking(doc, suggestions)
-    ## changes made after v0.1
-    # assert selectedWord == {doc[key]: value for key, value in misspell.items()}
+    # changes made after v0.1
+    # assert selectedWord ==
+    # {doc[key]: value for key, value in misspell.items()}
     assert [tok.i for tok in selectedWord.keys()] == [
         tok for tok in misspell.keys()
     ]
@@ -378,8 +381,8 @@ def test_doc_extensions():
             ("%", 0.00041),
         ],
     }
-    assert doc._.contextual_spellCheck == True
-    assert doc._.performed_spellCheck == True
+    assert doc._.contextual_spellCheck
+    assert doc._.performed_spellCheck
     # updated after v0.1
     assert [tok.i for tok in doc._.suggestions_spellCheck.keys()] == [
         tok.i for tok in gold_suggestion.keys()
@@ -422,7 +425,7 @@ def test_doc_extensions():
 def test_span_extensions():
     try:
         nlp.add_pipe(checker)
-    except:
+    except BaseException:
         print("contextual SpellCheck already in pipeline")
     doc = nlp(
         "Income was $9.4 milion compared to the prior year of $2.7 milion."
@@ -446,7 +449,7 @@ def test_span_extensions():
         doc[5]: [],
     }
 
-    assert doc[2:6]._.get_has_spellCheck == True
+    assert doc[2:6]._.get_has_spellCheck
     # splitting components to make use of approx function
     print(doc[2:6]._.score_spellCheck)
     print(gold_score)
@@ -472,7 +475,8 @@ def test_span_extensions():
         abs=1e-4,
     )
 
-    # assert doc[2:6]._.score_spellCheck == approx(gold_score,rel=1e-4, abs=1e-4)
+    # assert doc[2:6]._.score_spellCheck ==
+    # approx(gold_score,rel=1e-4, abs=1e-4)
     nlp.remove_pipe("contextual spellchecker")
 
 
@@ -497,7 +501,7 @@ def test_token_extension():
         ("USD", 0.00113),
     ]
 
-    assert doc[4]._.get_require_spellCheck == True
+    assert doc[4]._.get_require_spellCheck
     assert doc[4]._.get_suggestion_spellCheck == gold_suggestions
     # Match words and score separately to incorporate approx fn in pytest
     assert [word_score[0] for word_score in doc[4]._.score_spellCheck] == [
@@ -525,13 +529,14 @@ def test_warning():
         # warnings.simplefilter("always")
         # Trigger a warning.
 
-        assert doc[4]._.get_require_spellCheck == False
+        assert not doc[4]._.get_require_spellCheck
         assert doc[4]._.get_suggestion_spellCheck == ""
         assert doc[4]._.score_spellCheck == []
         # Verify Warning
         assert issubclass(w[-1].category, UserWarning)
         assert (
-            "Position of tokens modified by downstream element in pipeline eg. merge_entities"
+            "Position of tokens modified by downstream \
+element in pipeline eg. merge_entities"
             in str(w[-1].message)
         )
 
@@ -546,14 +551,16 @@ def test_warning():
             ContextualSpellCheck(vocab_path=True)
             assert (
                 e
-                == "Please check datatype provided. vocab_path should be str, debug and performance should be bool"
+                == "Please check datatype provided. \
+vocab_path should be str, debug and performance should be bool"
             )
         max_edit_distance = "non_int_or_float"
         with pytest.raises(ValueError) as e:
             ContextualSpellCheck(max_edit_dist=max_edit_distance)
             assert (
                 e
-                == f"cannot convert {max_edit_distance} to int. Please provide a valid integer"
+                == f"cannot convert {max_edit_distance} to int. \
+Please provide a valid integer"
             )
 
     try:
@@ -582,8 +589,10 @@ def test_bert_model_name():
     model_name = "a_random_model"
     error_message = (
         f"Can't load config for '{model_name}'. Make sure that:\n\n"
-        f"- '{model_name}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
-        f"- or '{model_name}' is the correct path to a directory containing a config.json  file\n\n"
+        f"- '{model_name}' is a correct model identifier listed on \
+'https://huggingface.co/models'\n\n"
+        f"- or '{model_name}' is the correct path to a directory \
+containing a config.json  file\n\n"
     )
 
     with pytest.raises(OSError) as e:
diff --git a/requirements.txt b/requirements.txt
@@ -10,3 +10,7 @@ mecab-python3==0.996.5
 ipadic==1.0.0
 unidic-lite==1.0.6
 
+# Code formatting
+flake8==3.8.3
+black==20.8b1
+