|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import subprocess |
| 3 | +import os |
| 4 | +import sys |
| 5 | +import json |
| 6 | +import glob |
| 7 | +import argparse |
| 8 | +from pathlib import Path |
| 9 | + |
| 10 | +def parse_diff_content(diff_text): |
| 11 | + """Parse unified diff content and extract changed lines with their line numbers.""" |
| 12 | + file_changes = {} |
| 13 | + current_file = None |
| 14 | + current_line = None |
| 15 | + |
| 16 | + for line in diff_text.splitlines(): |
| 17 | + if line.startswith('+++'): |
| 18 | + current_file = line[4:] |
| 19 | + if current_file.startswith('b/'): |
| 20 | + current_file = current_file[2:] |
| 21 | + elif line.startswith('@@'): |
| 22 | + match = next((m for m in line.split() if m.startswith('+')), None) |
| 23 | + if match: |
| 24 | + try: |
| 25 | + current_line = int(match.split(',')[0][1:]) |
| 26 | + except ValueError: |
| 27 | + current_line = None |
| 28 | + elif line.startswith('+') and not line.startswith('+++'): |
| 29 | + if current_file and current_line is not None: |
| 30 | + file_changes.setdefault(current_file, []).append((current_line, line[1:])) |
| 31 | + current_line += 1 |
| 32 | + return file_changes |
| 33 | + |
| 34 | +def is_detached_head(): |
| 35 | + """Returns True if Git is in detached HEAD state.""" |
| 36 | + result = subprocess.run(['git', 'symbolic-ref', '--quiet', 'HEAD'], |
| 37 | + stdout=subprocess.DEVNULL, |
| 38 | + stderr=subprocess.DEVNULL) |
| 39 | + return result.returncode != 0 |
| 40 | + |
| 41 | +def get_diff_files(base_branch="master", includes=None, excludes=None): |
| 42 | + if is_detached_head(): |
| 43 | + if is_debug: print(f"Detached HEAD detected. Fetching '{base_branch}'...") |
| 44 | + subprocess.run(['git', 'fetch', 'origin', base_branch], text=True) |
| 45 | + |
| 46 | + cmd = ['git', 'diff', '--name-only', f"origin/{base_branch}"] |
| 47 | + if is_debug: print(f"git diff cmd: '{cmd}'") |
| 48 | + result = subprocess.run(cmd, capture_output=True, text=True) |
| 49 | + |
| 50 | + changed_files = set(result.stdout.strip().splitlines()) |
| 51 | + |
| 52 | + if is_debug: print(f"All files '{changed_files}'") |
| 53 | + # Helper to glob and normalize paths |
| 54 | + def glob_relative(patterns): |
| 55 | + paths = set() |
| 56 | + for pattern in patterns: |
| 57 | + for path in glob.glob(pattern, recursive=True): |
| 58 | + try: |
| 59 | + repo_root = Path(".").resolve() |
| 60 | + rel = str(Path(path).resolve().relative_to(repo_root)) |
| 61 | + paths.add(rel) |
| 62 | + except ValueError: |
| 63 | + continue # ignore files outside repo |
| 64 | + return paths |
| 65 | + |
| 66 | + included = glob_relative(includes) |
| 67 | + excluded = glob_relative(excludes) |
| 68 | + |
| 69 | + changed_files = sorted((changed_files & included) - excluded) |
| 70 | + if is_debug: print(f"Filtered files {changed_files}") |
| 71 | + |
| 72 | + return changed_files |
| 73 | + |
| 74 | + |
| 75 | +def get_diff_lines(file, base_branch="master"): |
| 76 | + cmd = ['git', 'diff', '-U0', f"origin/{base_branch}", '--', file] |
| 77 | + if is_debug: print(f"Diff cmd '{cmd}'") |
| 78 | + result = subprocess.run(cmd, capture_output=True, text=True) |
| 79 | + return parse_diff_content(result.stdout).get(file, []) |
| 80 | + |
| 81 | + |
| 82 | +def run_spell_checker(lines, cmd="aspell list", dictionary_words=None): |
| 83 | + if is_debug: print(f"Aspell cmd '{cmd}'") |
| 84 | + process = subprocess.Popen(cmd.split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True) |
| 85 | + input_text = '\n'.join([text for _, text in lines]) |
| 86 | + out, _ = process.communicate(input=input_text) |
| 87 | + misspelled = set(out.strip().splitlines()) |
| 88 | + annotations = [] |
| 89 | + for lineno, line in lines: |
| 90 | + for word in line.split(): |
| 91 | + clean = ''.join(filter(str.isalpha, word)) |
| 92 | + if clean and clean in misspelled: |
| 93 | + if dictionary_words and clean in dictionary_words: |
| 94 | + continue |
| 95 | + annotations.append((lineno, clean, line.strip())) |
| 96 | + return annotations |
| 97 | + |
| 98 | + |
| 99 | +def emit_github_annotations(file, annotations): |
| 100 | + for lineno, word, context in annotations: |
| 101 | + print(f"::error file={file},line={lineno}::Possible typo: '{word}' in line: {context}") |
| 102 | + |
| 103 | + |
| 104 | +def emit_console_output(file, annotations): |
| 105 | + for lineno, word, context in annotations: |
| 106 | + print(f"{file}:{lineno}: typo: '{word}' in: {context}") |
| 107 | + |
| 108 | + |
| 109 | +def main(): |
| 110 | + parser = argparse.ArgumentParser( |
| 111 | + description="Spell check only changed lines in Git diffs or a diff file." |
| 112 | + ) |
| 113 | + parser.add_argument('--base-branch', default=os.getenv("INPUT_BASE_BRANCH", |
| 114 | + "master"), |
| 115 | + help='Branch to diff against (ignored if --diff-file is set)') |
| 116 | + parser.add_argument('--include', default=os.getenv("INPUT_INCLUDE", |
| 117 | + '["**/*.md","**/*.txt",' |
| 118 | + '"**/*.rst","**/*.json",' |
| 119 | + '"**/*.yaml","**/*.yml",' |
| 120 | + '"**/*.ini","**/*.tex",' |
| 121 | + '"**/*.html","**/*.xml",' |
| 122 | + '"**/*.xhtml", "**/*.csv"]'), |
| 123 | + help='JSON list of glob patterns to include') |
| 124 | + parser.add_argument('--exclude', default=os.getenv("INPUT_EXCLUDE", '[]'), |
| 125 | + help='JSON list of glob patterns to exclude') |
| 126 | + parser.add_argument('--cmd', default=os.getenv("INPUT_CMD", |
| 127 | + "aspell --mode=sgml" |
| 128 | + " --add-sgml-skip=code,pre,style,script,command,literal,ulink,parameter,filename,programlisting" |
| 129 | + " --lang=en list"), |
| 130 | + help='Spell checker command (default: aspell list)') |
| 131 | + parser.add_argument('--dictionary', default=os.getenv("INPUT_DICTIONARY", ""), |
| 132 | + help='Space-separated list of allowed words') |
| 133 | + parser.add_argument('--console-output', action='store_true', |
| 134 | + help='Emit console output instead of GitHub-style error annotations') |
| 135 | + parser.add_argument('--diff-file', help='Path to a unified diff file (optional)') |
| 136 | + parser.add_argument("--input-string", help="Raw text string to spellcheck directly (optional)") |
| 137 | + parser.add_argument("--debug", action='store_true', |
| 138 | + help="Raw text string to spellcheck directly (optional)") |
| 139 | + |
| 140 | + args = parser.parse_args() |
| 141 | + |
| 142 | + global is_debug |
| 143 | + is_debug = os.getenv("INPUT_DEBUG") or args.debug |
| 144 | + |
| 145 | + includes = json.loads(args.include) |
| 146 | + excludes = json.loads(args.exclude) |
| 147 | + dictionary_words = set(args.dictionary.split()) if args.dictionary else set() |
| 148 | + |
| 149 | + if args.input_string: |
| 150 | + lines = bytes(args.input_string, "utf-8").decode("unicode_escape") |
| 151 | + if lines.lstrip().startswith('diff'): |
| 152 | + file_changes = parse_diff_content(lines) |
| 153 | + else: |
| 154 | + file_changes = {"<stdin>": [(i + 1, line) for i, line in enumerate(lines.splitlines())]} |
| 155 | + elif args.diff_file: |
| 156 | + if not os.path.isfile(args.diff_file): |
| 157 | + print(f"❌ Diff file not found: {args.diff_file}") |
| 158 | + sys.exit(1) |
| 159 | + with open(args.diff_file, 'r') as f: |
| 160 | + diff_text = f.read() |
| 161 | + file_changes = parse_diff_content(diff_text) |
| 162 | + else: |
| 163 | + files = get_diff_files(args.base_branch, includes, excludes) |
| 164 | + if not files: |
| 165 | + print("✅ No files to check.") |
| 166 | + return |
| 167 | + file_changes = {f: get_diff_lines(f, args.base_branch) for f in files} |
| 168 | + |
| 169 | + any_issues = False |
| 170 | + for file, lines in file_changes.items(): |
| 171 | + annotations = run_spell_checker(lines, args.cmd, dictionary_words) |
| 172 | + if annotations: |
| 173 | + any_issues = True |
| 174 | + if args.console_output: |
| 175 | + emit_console_output(file, annotations) |
| 176 | + else: |
| 177 | + emit_github_annotations(file, annotations) |
| 178 | + |
| 179 | + if any_issues: |
| 180 | + sys.exit(1) |
| 181 | + else: |
| 182 | + print("✅ No typos found.") |
| 183 | + |
| 184 | + |
| 185 | +if __name__ == "__main__": |
| 186 | + main() |
0 commit comments