Skip to content

Commit b6dab1e

Browse files
committed
Move codespell:ignore check into Spellchecker
This makes the API automatically avoid some declared false-positives that the command line tool would also filter.
1 parent ee9502b commit b6dab1e

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

codespell_lib/_codespell.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@
6161
"(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
6262
"\\b[\\w.%+-]+@[\\w.-]+\\b)"
6363
)
64-
inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
6564
USAGE = """
6665
\t%prog [OPTIONS] [file1 file2 ... fileN]
6766
"""
@@ -885,20 +884,10 @@ def parse_file(
885884
if not line or line in exclude_lines:
886885
continue
887886

888-
extra_words_to_ignore = set()
889-
match = inline_ignore_regex.search(line)
890-
if match:
891-
extra_words_to_ignore = set(
892-
filter(None, (match.group("words") or "").split(","))
893-
)
894-
if not extra_words_to_ignore:
895-
continue
896-
897887
fixed_words = set()
898888
asked_for = set()
899889

900-
issues = spellchecker.spellcheck_line(line, line_tokenizer, extra_words_to_ignore=extra_words_to_ignore)
901-
for issue in issues:
890+
for issue in spellchecker.spellcheck_line(line, line_tokenizer):
902891
misspelling = issue.misspelling
903892
word = issue.word
904893
lword = issue.lword

codespell_lib/spellchecker.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,17 @@
1616
Copyright (C) 2011 ProFUSION embedded systems
1717
"""
1818

19-
import re
2019
import os
20+
import re
2121
from typing import (
22-
Dict,
23-
Sequence,
2422
Container,
25-
Optional,
23+
Dict,
24+
FrozenSet,
25+
Generic,
2626
Iterable,
27+
Optional,
2728
Protocol,
28-
Generic,
29+
Sequence,
2930
TypeVar,
3031
)
3132

@@ -108,6 +109,8 @@
108109

109110
_builtin_default_as_tuple = tuple(_builtin_default.split(","))
110111

112+
_inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
113+
111114

112115
class UnknownBuiltinDictionaryError(ValueError):
113116
def __init__(self, name: str) -> None:
@@ -206,12 +209,21 @@ def __init__(
206209
if builtin_dictionaries:
207210
self.load_builtin_dictionaries(builtin_dictionaries)
208211

212+
def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]:
213+
inline_ignore_match = _inline_ignore_regex.search(line)
214+
if inline_ignore_match:
215+
words = frozenset(
216+
filter(None, (inline_ignore_match.group("words") or "").split(","))
217+
)
218+
return words if words else None
219+
return frozenset()
220+
209221
def spellcheck_line(
210222
self,
211223
line: str,
212224
tokenizer: LineTokenizer[T_co],
213225
*,
214-
extra_words_to_ignore: Container[str] = frozenset()
226+
respect_inline_ignore: bool = True,
215227
) -> Iterable[DetectedMisspelling[T_co]]:
216228
"""Tokenize and spellcheck a line
217229
@@ -220,12 +232,19 @@ def spellcheck_line(
220232
221233
:param line: The line to spellcheck.
222234
:param tokenizer: A callable that will tokenize the line
223-
:param extra_words_to_ignore: Extra words to ignore for this particular line
224-
(such as content from a `codespell:ignore` comment)
235+
:param respect_inline_ignore: Whether to check the line for
236+
`codespell:ignore` instructions
237+
:returns: An iterable of discovered typos.
225238
"""
226239
misspellings = self._misspellings
227240
ignore_words_cased = self.ignore_words_cased
228241

242+
extra_words_to_ignore = (
243+
self._parse_inline_ignore(line) if respect_inline_ignore else frozenset()
244+
)
245+
if extra_words_to_ignore is None:
246+
return
247+
229248
for token in tokenizer(line):
230249
word = token.group()
231250
if word in ignore_words_cased:

0 commit comments

Comments
 (0)