Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 152 additions & 59 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,19 @@
import re
import sys
import textwrap
from typing import Any, Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple
from multiprocessing import Pool
from typing import (
Any,
Dict,
Generator,
List,
Match,
Optional,
Pattern,
Sequence,
Set,
Tuple,
)

# autogenerated by setuptools_scm
from ._version import __version__ as VERSION # type: ignore # noqa: N812
Expand Down Expand Up @@ -455,6 +467,20 @@ def parse_options(
"should match the to-be-excluded lines exactly",
)

parser.add_argument(
"-J",
"--jobs",
action="store",
type=int,
default=0,
help="set number of jobs to parallelize processing - one "
"subprocess per file:\n"
"- 0: no parallelization (default)"
"- positive integer: number of sub-processes to use\n"
"- -1: use all available CPUs\n"
"Interactive mode is not compatible with parallel processing",
)

parser.add_argument(
"-i",
"--interactive",
Expand Down Expand Up @@ -1021,12 +1047,58 @@ def parse_file(
return bad_count


class _FileParser:
"""A helper class to provide top level closure for parse_file()"""

def __init__(
self,
colors: TermColors,
summary: Optional[Summary],
misspellings: Dict[str, Misspelling],
exclude_lines: Set[str],
file_opener: FileOpener,
word_regex: Pattern[str],
ignore_word_regex: Optional[Pattern[str]],
uri_regex: Pattern[str],
uri_ignore_words: Set[str],
context: Optional[Tuple[int, int]],
options: argparse.Namespace,
) -> None:
self.colors = colors
self.summary = summary
self.misspellings = misspellings
self.exclude_lines = exclude_lines
self.file_opener = file_opener
self.word_regex = word_regex
self.ignore_word_regex = ignore_word_regex
self.uri_regex = uri_regex
self.uri_ignore_words = uri_ignore_words
self.context = context
self.options = options

def __call__(self, filename: str) -> int:
return parse_file(
filename,
self.colors,
self.summary,
self.misspellings,
self.exclude_lines,
self.file_opener,
self.word_regex,
self.ignore_word_regex,
self.uri_regex,
self.uri_ignore_words,
self.context,
self.options,
)


def _script_main() -> int:
"""Wrap to main() for setuptools."""
return main(*sys.argv[1:])


def main(*args: str) -> int:
def main(*args: str) -> int: # noqa: C901,PLR0915,PLR0911
"""Contains flow control"""
try:
options, parser, used_cfg_files = parse_options(args)
Expand Down Expand Up @@ -1138,6 +1210,25 @@ def main(*args: str) -> int:
else:
summary = None

if options.jobs and options.interactive:
print(
"ERROR: do not enable parallelization in interactive mode",
file=sys.stderr,
)
# no point to parser.print_help() - just hides ERROR away here
return EX_USAGE

jobs = options.jobs
if jobs == -1:
jobs = os.cpu_count()
elif jobs < -1:
print(
f"ERROR: invalid number of jobs: {jobs}",
file=sys.stderr,
)
parser.print_help()
return EX_USAGE

context = None
if options.context is not None:
if (options.before_context is not None) or (options.after_context is not None):
Expand Down Expand Up @@ -1176,66 +1267,68 @@ def main(*args: str) -> int:
)
return EX_USAGE

bad_count = 0
for filename in sorted(options.files):
# ignore hidden files
if is_hidden(filename, options.check_hidden):
continue

if os.path.isdir(filename):
for root, dirs, files in os.walk(filename):
if glob_match.match(root): # skip (absolute) directories
dirs.clear()
continue
if is_hidden(root, options.check_hidden): # dir itself hidden
continue
for file_ in sorted(files):
# ignore hidden files in directories
if is_hidden(file_, options.check_hidden):
continue
if glob_match.match(file_): # skip files
def _find_files() -> Generator[str, None, None]:
"""Yields filename for the parsing"""
for filename in sorted(options.files):
# ignore hidden files
if is_hidden(filename, options.check_hidden):
continue

if os.path.isdir(filename):
for root, dirs, files in os.walk(filename):
if glob_match.match(root): # skip (absolute) directories
dirs.clear()
continue
fname = os.path.join(root, file_)
if glob_match.match(fname): # skip paths
if is_hidden(root, options.check_hidden): # dir itself hidden
continue
bad_count += parse_file(
fname,
colors,
summary,
misspellings,
exclude_lines,
file_opener,
word_regex,
ignore_word_regex,
uri_regex,
uri_ignore_words,
context,
options,
)
for file_ in sorted(files):
# ignore hidden files in directories
if is_hidden(file_, options.check_hidden):
continue
if glob_match.match(file_): # skip files
continue
fname = os.path.join(root, file_)
if glob_match.match(fname): # skip paths
continue
yield fname

# skip (relative) directories
dirs[:] = [
dir_
for dir_ in dirs
if not glob_match.match(dir_)
and not is_hidden(dir_, options.check_hidden)
]

elif not glob_match.match(filename): # skip files
yield filename

# closure to pass only relevant to the job filename
file_parser = _FileParser(
colors,
summary,
misspellings,
exclude_lines,
file_opener,
word_regex,
ignore_word_regex,
uri_regex,
uri_ignore_words,
context,
options,
)

# skip (relative) directories
dirs[:] = [
dir_
for dir_ in dirs
if not glob_match.match(dir_)
and not is_hidden(dir_, options.check_hidden)
]

elif not glob_match.match(filename): # skip files
bad_count += parse_file(
filename,
colors,
summary,
misspellings,
exclude_lines,
file_opener,
word_regex,
ignore_word_regex,
uri_regex,
uri_ignore_words,
context,
options,
)
if jobs:
# parse_file would be in subprocess(es)
with Pool(jobs) as pool:
results = pool.map(file_parser, _find_files())
for result in results:
if isinstance(result, Exception):
raise result
bad_count = sum(results)
else:
# serial
bad_count = sum(map(file_parser, _find_files()))

if summary:
print("\n-------8<-------\nSUMMARY:")
Expand Down