Skip to content

Commit de9a5db

Browse files
committed
support --ignore-words-case-sensitive option.
Signed-off-by: Tomoya Fujita <[email protected]>
1 parent 5d95320 commit de9a5db

File tree

3 files changed

+79
-9
lines changed

3 files changed

+79
-9
lines changed

README.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,10 @@ You can select the optional dictionaries with the ``--builtin`` option.
119119
Ignoring words
120120
--------------
121121

122-
When ignoring false positives, note that spelling errors are *case-insensitive* but words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, but to ignore it you must pass ``wrod`` (to match the case of the dictionary entry).
122+
When ignoring false positives, note that spelling errors are *case-insensitive*.
123+
By default, only lowercase words to ignore are *case-insensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, to ignore it you can pass ``wrod``.
124+
Non-lowercase words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``. To ignore it you must pass ``Wrod``.
125+
If you want to ignore all the words in *case-sensitive* mode including lowercase words, you can use the ``--ignore-words-case-sensitive`` optional flag.
123126

124127
The words to ignore can be passed in two ways:
125128

codespell_lib/_codespell.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,13 @@ def parse_options(
476476
'the dictionary file. If set to "*", all '
477477
"misspelling in URIs and emails will be ignored.",
478478
)
479+
parser.add_argument(
480+
"--ignore-words-case-sensitive",
481+
action="store_true",
482+
default=False,
483+
help="all ignore words in the ignore-words arguments in a case-sensitive way. "
484+
"By default, lowercase words to ignore are handled in a case-insensitive way.",
485+
)
479486
parser.add_argument(
480487
"-r",
481488
"--regex",
@@ -697,18 +704,25 @@ def parse_options(
697704

698705

699706
def process_ignore_words(
700-
words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str]
707+
words: Iterable[str],
708+
ignore_words: Set[str],
709+
ignore_words_cased: Set[str],
710+
ignore_words_case_sensitive: bool = False,
701711
) -> None:
702712
for word in words:
703713
word = word.strip()
704-
if word == word.lower():
714+
if ignore_words_case_sensitive:
715+
# all ignore words are handled in a case-sensitive way
716+
ignore_words_cased.add(word)
717+
elif word == word.lower():
718+
# lowercase words to ignore are handled in a case-insensitive way
705719
ignore_words.add(word)
706720
else:
707721
ignore_words_cased.add(word)
708722

709723

710724
def parse_ignore_words_option(
711-
ignore_words_option: List[str],
725+
ignore_words_option: List[str], ignore_words_case_sensitive: bool = False
712726
) -> Tuple[Set[str], Set[str]]:
713727
ignore_words: Set[str] = set()
714728
ignore_words_cased: Set[str] = set()
@@ -718,6 +732,7 @@ def parse_ignore_words_option(
718732
(word.strip() for word in comma_separated_words.split(",")),
719733
ignore_words,
720734
ignore_words_cased,
735+
ignore_words_case_sensitive,
721736
)
722737
return (ignore_words, ignore_words_cased)
723738

@@ -728,11 +743,17 @@ def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
728743

729744

730745
def build_ignore_words(
731-
filename: str, ignore_words: Set[str], ignore_words_cased: Set[str]
746+
filename: str,
747+
ignore_words: Set[str],
748+
ignore_words_cased: Set[str],
749+
ignore_word_case_sensitive: bool = False,
732750
) -> None:
733751
with open(filename, encoding="utf-8") as f:
734752
process_ignore_words(
735-
(line.strip() for line in f), ignore_words, ignore_words_cased
753+
(line.strip() for line in f),
754+
ignore_words,
755+
ignore_words_cased,
756+
ignore_word_case_sensitive,
736757
)
737758

738759

@@ -1173,7 +1194,7 @@ def main(*args: str) -> int:
11731194
ignore_multiline_regex = None
11741195

11751196
ignore_words, ignore_words_cased = parse_ignore_words_option(
1176-
options.ignore_words_list
1197+
options.ignore_words_list, options.ignore_words_case_sensitive
11771198
)
11781199
if options.ignore_words:
11791200
ignore_words_files = flatten_clean_comma_separated_arguments(
@@ -1185,7 +1206,12 @@ def main(*args: str) -> int:
11851206
parser,
11861207
f"ERROR: cannot find ignore-words file: {ignore_words_file}",
11871208
)
1188-
build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased)
1209+
build_ignore_words(
1210+
ignore_words_file,
1211+
ignore_words,
1212+
ignore_words_cased,
1213+
options.ignore_words_case_sensitive,
1214+
)
11891215

11901216
uri_regex = options.uri_regex or uri_regex_def
11911217
try:
@@ -1197,7 +1223,11 @@ def main(*args: str) -> int:
11971223
)
11981224

11991225
uri_ignore_words = set(
1200-
itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list))
1226+
itertools.chain(
1227+
*parse_ignore_words_option(
1228+
options.uri_ignore_words_list, options.ignore_words_case_sensitive
1229+
)
1230+
)
12011231
)
12021232

12031233
dictionaries = flatten_clean_comma_separated_arguments(options.dictionary or ["-"])

codespell_lib/tests/test_basic.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,13 +389,50 @@ def test_ignore_words_with_cases(
389389
assert cs.main("-LMIS,Mis", bad_name) == 1
390390
assert cs.main("-I", fname, "-f", bad_name) == 1
391391
assert cs.main("-LMIS,Mis", "-f", bad_name) == 1
392+
# Only lowercase words are ignored works in a case-insensitive manner
392393
fname.write_text("mis")
393394
assert cs.main("-I", fname, bad_name) == 0
394395
assert cs.main("-Lmis", bad_name) == 0
395396
assert cs.main("-I", fname, "-f", bad_name) == 0
396397
assert cs.main("-Lmis", "-f", bad_name) == 0
397398

398399

400+
def test_ignore_words_with_case_sensitive(
401+
tmp_path: Path,
402+
capsys: pytest.CaptureFixture[str],
403+
) -> None:
404+
"""Test --ignore-words-case-sensitive for -I and -L options."""
405+
bad_name = tmp_path / "MIS.txt"
406+
bad_name.write_text(
407+
"1 MIS (Management Information System) 1\n2 Les Mis (1980 musical) 2\n3 mis 3\n"
408+
)
409+
assert cs.main(bad_name) == 3
410+
assert cs.main(bad_name, "-f") == 4
411+
fname = tmp_path / "ignore.txt"
412+
413+
fname.write_text("miS")
414+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 3
415+
assert cs.main("--ignore-words-case-sensitive", "-LmiS", bad_name) == 3
416+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 4
417+
assert cs.main("--ignore-words-case-sensitive", "-LmiS", "-f", bad_name) == 4
418+
# lowercase words are ignored also works in a case-sensitive manner
419+
fname.write_text("mis")
420+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2
421+
assert cs.main("--ignore-words-case-sensitive", "-Lmis", bad_name) == 2
422+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 3
423+
assert cs.main("--ignore-words-case-sensitive", "-Lmis", "-f", bad_name) == 3
424+
fname.write_text("MIS")
425+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2
426+
assert cs.main("--ignore-words-case-sensitive", "-LMIS", bad_name) == 2
427+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 2
428+
assert cs.main("--ignore-words-case-sensitive", "-LMIS", "-f", bad_name) == 2
429+
fname.write_text("MIS\nMis")
430+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 1
431+
assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", bad_name) == 1
432+
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 1
433+
assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", "-f", bad_name) == 1
434+
435+
399436
def test_ignore_word_list(
400437
tmp_path: Path,
401438
capsys: pytest.CaptureFixture[str],

0 commit comments

Comments
 (0)