11import pytest
22import spacy
33from pytest import approx
4- import warnings , os
4+ import warnings
5+ import os
56
67from ..contextualSpellCheck import ContextualSpellCheck
78
1819 "inputSentence, misspell" ,
1920 [
2021 (
21- "Income was $9.4 million compared to the prior year of $2.7 million." ,
22+ "Income was $9.4 million \
23+ compared to the prior year of $2.7 million." ,
2224 [],
2325 ),
2426 ("who is Rajat Goel?" , []),
@@ -43,8 +45,8 @@ def test_no_misspellIdentify(inputSentence, misspell):
4345def test_type_misspellIdentify (inputSentence , misspell ):
4446 print ("Start type correction test for spelling mistake identification\n " )
4547 doc = nlp (inputSentence )
46- assert type (checker .misspell_identify (doc )[0 ]) == type (misspell )
47- assert type (checker .misspell_identify (doc )[1 ]) == type (doc )
48+ assert isinstance (checker .misspell_identify (doc )[0 ], type (misspell ) )
49+ assert isinstance (checker .misspell_identify (doc )[1 ], type (doc ) )
4850 assert checker .misspell_identify (doc )[1 ] == doc
4951
5052
@@ -62,8 +64,8 @@ def test_identify_misspellIdentify(inputSentence, misspell):
6264 print ("Start misspell word identifation test\n " )
6365 doc = nlp (inputSentence )
6466 checkerReturn = checker .misspell_identify (doc )[0 ]
65- assert type (checkerReturn ) == list
66- ## Changed the approach after v0.1.0
67+ assert isinstance (checkerReturn , list )
68+ # Changed the approach after v0.1.0
6769 assert [tok .text_with_ws for tok in checkerReturn ] == [
6870 doc [i ].text_with_ws for i in misspell
6971 ]
@@ -142,9 +144,9 @@ def test_skipURL_misspellIdentify(inputSentence, misspell):
142144def test_type_candidateGenerator (inputSentence , misspell ):
143145 doc = nlp (inputSentence )
144146 misspell , doc = checker .misspell_identify (doc )
145- assert type (checker .candidate_generator (doc , misspell )) == tuple
146- assert type (checker .candidate_generator (doc , misspell )[0 ]) == type (doc )
147- assert type (checker .candidate_generator (doc , misspell )[1 ]) == dict
147+ assert isinstance (checker .candidate_generator (doc , misspell ), tuple )
148+ assert isinstance (checker .candidate_generator (doc , misspell )[0 ], type (doc ) )
149+ assert isinstance (checker .candidate_generator (doc , misspell )[1 ], dict )
148150
149151
150152@pytest .mark .parametrize (
@@ -203,7 +205,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
203205 doc = nlp (inputSentence )
204206 (misspellings , doc ) = checker .misspell_identify (doc )
205207 doc , suggestions = checker .candidate_generator (doc , misspellings )
206- ## changed after v1.0 because of deepCopy creatng issue with ==
208+ # changed after v1.0 because of deepCopy creatng issue with ==
207209 # gold_suggestions = {doc[key]: value for key, value in misspell.items()}
208210 assert [tok .i for tok in suggestions ] == [key for key in misspell .keys ()]
209211 assert [suggString for suggString in suggestions .values ()] == [
@@ -226,7 +228,7 @@ def test_identify_candidateGenerator(inputSentence, misspell):
226228def test_extension_candidateGenerator (inputSentence , misspell ):
227229 doc = nlp (inputSentence )
228230 (misspellings , doc ) = checker .misspell_identify (doc )
229- suggestions = checker .candidate_generator (doc , misspellings )
231+ checker .candidate_generator (doc , misspellings )
230232 assert doc ._ .performed_spellCheck == misspell
231233
232234
@@ -321,8 +323,9 @@ def test_ranking_candidateRanking(inputSentence, misspell):
321323 (misspellings , doc ) = checker .misspell_identify (doc )
322324 doc , suggestions = checker .candidate_generator (doc , misspellings )
323325 selectedWord = checker .candidate_ranking (doc , suggestions )
324- ## changes made after v0.1
325- # assert selectedWord == {doc[key]: value for key, value in misspell.items()}
326+ # changes made after v0.1
327+ # assert selectedWord ==
328+ # {doc[key]: value for key, value in misspell.items()}
326329 assert [tok .i for tok in selectedWord .keys ()] == [
327330 tok for tok in misspell .keys ()
328331 ]
@@ -378,8 +381,8 @@ def test_doc_extensions():
378381 ("%" , 0.00041 ),
379382 ],
380383 }
381- assert doc ._ .contextual_spellCheck == True
382- assert doc ._ .performed_spellCheck == True
384+ assert doc ._ .contextual_spellCheck
385+ assert doc ._ .performed_spellCheck
383386 # updated after v0.1
384387 assert [tok .i for tok in doc ._ .suggestions_spellCheck .keys ()] == [
385388 tok .i for tok in gold_suggestion .keys ()
@@ -422,7 +425,7 @@ def test_doc_extensions():
422425def test_span_extensions ():
423426 try :
424427 nlp .add_pipe (checker )
425- except :
428+ except BaseException :
426429 print ("contextual SpellCheck already in pipeline" )
427430 doc = nlp (
428431 "Income was $9.4 milion compared to the prior year of $2.7 milion."
@@ -446,7 +449,7 @@ def test_span_extensions():
446449 doc [5 ]: [],
447450 }
448451
449- assert doc [2 :6 ]._ .get_has_spellCheck == True
452+ assert doc [2 :6 ]._ .get_has_spellCheck
450453 # splitting components to make use of approx function
451454 print (doc [2 :6 ]._ .score_spellCheck )
452455 print (gold_score )
@@ -472,7 +475,8 @@ def test_span_extensions():
472475 abs = 1e-4 ,
473476 )
474477
475- # assert doc[2:6]._.score_spellCheck == approx(gold_score,rel=1e-4, abs=1e-4)
478+ # assert doc[2:6]._.score_spellCheck ==
479+ # approx(gold_score,rel=1e-4, abs=1e-4)
476480 nlp .remove_pipe ("contextual spellchecker" )
477481
478482
@@ -497,7 +501,7 @@ def test_token_extension():
497501 ("USD" , 0.00113 ),
498502 ]
499503
500- assert doc [4 ]._ .get_require_spellCheck == True
504+ assert doc [4 ]._ .get_require_spellCheck
501505 assert doc [4 ]._ .get_suggestion_spellCheck == gold_suggestions
502506 # Match words and score separately to incorporate approx fn in pytest
503507 assert [word_score [0 ] for word_score in doc [4 ]._ .score_spellCheck ] == [
@@ -525,13 +529,14 @@ def test_warning():
525529 # warnings.simplefilter("always")
526530 # Trigger a warning.
527531
528- assert doc [4 ]._ .get_require_spellCheck == False
532+ assert not doc [4 ]._ .get_require_spellCheck
529533 assert doc [4 ]._ .get_suggestion_spellCheck == ""
530534 assert doc [4 ]._ .score_spellCheck == []
531535 # Verify Warning
532536 assert issubclass (w [- 1 ].category , UserWarning )
533537 assert (
534- "Position of tokens modified by downstream element in pipeline eg. merge_entities"
538+ "Position of tokens modified by downstream \
539+ element in pipeline eg. merge_entities"
535540 in str (w [- 1 ].message )
536541 )
537542
@@ -546,14 +551,16 @@ def test_warning():
546551 ContextualSpellCheck (vocab_path = True )
547552 assert (
548553 e
549- == "Please check datatype provided. vocab_path should be str, debug and performance should be bool"
554+ == "Please check datatype provided. \
555+ vocab_path should be str, debug and performance should be bool"
550556 )
551557 max_edit_distance = "non_int_or_float"
552558 with pytest .raises (ValueError ) as e :
553559 ContextualSpellCheck (max_edit_dist = max_edit_distance )
554560 assert (
555561 e
556- == f"cannot convert { max_edit_distance } to int. Please provide a valid integer"
562+ == f"cannot convert { max_edit_distance } to int. \
563+ Please provide a valid integer"
557564 )
558565
559566 try :
@@ -582,8 +589,10 @@ def test_bert_model_name():
582589 model_name = "a_random_model"
583590 error_message = (
584591 f"Can't load config for '{ model_name } '. Make sure that:\n \n "
585- f"- '{ model_name } ' is a correct model identifier listed on 'https://huggingface.co/models'\n \n "
586- f"- or '{ model_name } ' is the correct path to a directory containing a config.json file\n \n "
592+ f"- '{ model_name } ' is a correct model identifier listed on \
593+ 'https://huggingface.co/models'\n \n "
594+ f"- or '{ model_name } ' is the correct path to a directory \
595+ containing a config.json file\n \n "
587596 )
588597
589598 with pytest .raises (OSError ) as e :
0 commit comments