Skip to content

Commit 246fea0

Browse files
committed
Initial version of the diff spell checking service.
This action is supposed to be ran on pull requests and annotate only the newly introduced typos.
1 parent f54e725 commit 246fea0

File tree

8 files changed

+374
-1
lines changed

8 files changed

+374
-1
lines changed

.github/workflows/release.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
name: Release
2+
3+
on:
4+
push:
5+
tags:
6+
- 'v*'
7+
8+
jobs:
9+
release:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
14+
- name: Create Release
15+
uses: softprops/action-gh-release@v2
16+
with:
17+
generate_release_notes: true

.github/workflows/test.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: Test git-spell-check
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
8+
jobs:
9+
test:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
- name: Install aspell
14+
run: |
15+
sudo apt-get install -y aspell aspell-en
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version: 3.11
20+
- name: Install dependencies
21+
run: pip install -r requirements.txt
22+
- name: Run tests
23+
run: pytest
24+
- uses: compiler-research/git-spell-check@master
25+
- name: Setup tmate session
26+
if: ${{ !cancelled() && runner.debug }}
27+
uses: mxschmitt/action-tmate@v3
28+
# When debugging increase to a suitable value!
29+
timeout-minutes: 30

README.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,22 @@
1-
# git-spell-check
1+
# git-spell-check
2+
3+
Run a spell checker **only on changed lines in pull requests**.
4+
5+
## Features
6+
7+
- Only checks added/modified lines
8+
- Ignore custom words
9+
- Works with any spell checker (default: `aspell`)
10+
- Uses GitHub annotations for inline results
11+
12+
## Usage
13+
14+
```yaml
15+
- uses: compiler-research/git-spell-check@v1
16+
with:
17+
base_branch: origin/master
18+
include: '["**/*.md"]'
19+
exclude: '["CHANGELOG.md"]'
20+
cmd: "aspell list"
21+
dictionary: "Compiler-Research GitHub Actions"
22+
```

action.yml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: git-spell-check
2+
description: "Runs a spell checker only on the changed lines in a pull request"
3+
4+
inputs:
5+
base_branch:
6+
description: "Base branch to diff against"
7+
required: false
8+
default: ${{ github.event.pull_request.base.ref || github.event.repository.default_branch }}
9+
include:
10+
description: "JSON array of glob patterns to include"
11+
required: false
12+
default: |
13+
[
14+
"**/*.md",
15+
"**/*.txt",
16+
"**/*.rst",
17+
"**/*.json",
18+
"**/*.yaml",
19+
"**/*.yml",
20+
"**/*.ini",
21+
"**/*.tex",
22+
"**/*.html",
23+
"**/*.xml",
24+
"**/*.xhtml",
25+
"**/*.csv"
26+
]
27+
exclude:
28+
description: "JSON array of glob patterns to exclude"
29+
required: false
30+
default: '[]'
31+
cmd:
32+
description: "Spell checker command to run"
33+
required: false
34+
default: |
35+
aspell --mode=sgml
36+
--add-sgml-skip=code,pre,style,script,command,literal,ulink,parameter,filename,programlisting
37+
--lang=en
38+
list
39+
dictionary:
40+
description: "Custom words to ignore (space-separated)"
41+
required: false
42+
default: ""
43+
debug:
44+
description: "Debug output"
45+
required: false
46+
default: ${{ github.debug }}
47+
48+
runs:
49+
using: "composite"
50+
steps:
51+
- name: Set up Python
52+
uses: actions/setup-python@v5
53+
with:
54+
python-version: "3.x"
55+
56+
- name: Run git-spell-check
57+
shell: bash
58+
env: # GitHub does not pass the default values to the action script.
59+
INPUT_BASE_BRANCH: ${{ inputs.base_branch }}
60+
INPUT_INCLUDE: ${{ inputs.include }}
61+
INPUT_EXCLUDE: ${{ inputs.exclude }}
62+
INPUT_CMD: ${{ inputs.cmd }}
63+
INPUT_DICTIONARY: ${{ inputs.dictionary }}
64+
INPUT_DEBUG: ${{ inputs.debug }}
65+
run: |
66+
python ${{ github.action_path }}/git-spell-check.py

git-spell-check.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
#!/usr/bin/env python3
2+
import subprocess
3+
import os
4+
import sys
5+
import json
6+
import glob
7+
import argparse
8+
from pathlib import Path
9+
10+
def parse_diff_content(diff_text):
11+
"""Parse unified diff content and extract changed lines with their line numbers."""
12+
file_changes = {}
13+
current_file = None
14+
current_line = None
15+
16+
for line in diff_text.splitlines():
17+
if line.startswith('+++'):
18+
current_file = line[4:]
19+
if current_file.startswith('b/'):
20+
current_file = current_file[2:]
21+
elif line.startswith('@@'):
22+
match = next((m for m in line.split() if m.startswith('+')), None)
23+
if match:
24+
try:
25+
current_line = int(match.split(',')[0][1:])
26+
except ValueError:
27+
current_line = None
28+
elif line.startswith('+') and not line.startswith('+++'):
29+
if current_file and current_line is not None:
30+
file_changes.setdefault(current_file, []).append((current_line, line[1:]))
31+
current_line += 1
32+
return file_changes
33+
34+
def is_detached_head():
35+
"""Returns True if Git is in detached HEAD state."""
36+
result = subprocess.run(['git', 'symbolic-ref', '--quiet', 'HEAD'],
37+
stdout=subprocess.DEVNULL,
38+
stderr=subprocess.DEVNULL)
39+
return result.returncode != 0
40+
41+
def get_diff_files(base_branch="master", includes=None, excludes=None):
42+
if is_detached_head():
43+
if is_debug: print(f"Detached HEAD detected. Fetching '{base_branch}'...")
44+
subprocess.run(['git', 'fetch', 'origin', base_branch], text=True)
45+
46+
cmd = ['git', 'diff', '--name-only', f"origin/{base_branch}"]
47+
if is_debug: print(f"git diff cmd: '{cmd}'")
48+
result = subprocess.run(cmd, capture_output=True, text=True)
49+
50+
changed_files = set(result.stdout.strip().splitlines())
51+
52+
if is_debug: print(f"All files '{changed_files}'")
53+
# Helper to glob and normalize paths
54+
def glob_relative(patterns):
55+
paths = set()
56+
for pattern in patterns:
57+
for path in glob.glob(pattern, recursive=True):
58+
try:
59+
repo_root = Path(".").resolve()
60+
rel = str(Path(path).resolve().relative_to(repo_root))
61+
paths.add(rel)
62+
except ValueError:
63+
continue # ignore files outside repo
64+
return paths
65+
66+
included = glob_relative(includes)
67+
excluded = glob_relative(excludes)
68+
69+
changed_files = sorted((changed_files & included) - excluded)
70+
if is_debug: print(f"Filtered files {changed_files}")
71+
72+
return changed_files
73+
74+
75+
def get_diff_lines(file, base_branch="master"):
76+
cmd = ['git', 'diff', '-U0', f"origin/{base_branch}", '--', file]
77+
if is_debug: print(f"Diff cmd '{cmd}'")
78+
result = subprocess.run(cmd, capture_output=True, text=True)
79+
return parse_diff_content(result.stdout).get(file, [])
80+
81+
82+
def run_spell_checker(lines, cmd="aspell list", dictionary_words=None):
83+
if is_debug: print(f"Aspell cmd '{cmd}'")
84+
process = subprocess.Popen(cmd.split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
85+
input_text = '\n'.join([text for _, text in lines])
86+
out, _ = process.communicate(input=input_text)
87+
misspelled = set(out.strip().splitlines())
88+
annotations = []
89+
for lineno, line in lines:
90+
for word in line.split():
91+
clean = ''.join(filter(str.isalpha, word))
92+
if clean and clean in misspelled:
93+
if dictionary_words and clean in dictionary_words:
94+
continue
95+
annotations.append((lineno, clean, line.strip()))
96+
return annotations
97+
98+
99+
def emit_github_annotations(file, annotations):
100+
for lineno, word, context in annotations:
101+
print(f"::error file={file},line={lineno}::Possible typo: '{word}' in line: {context}")
102+
103+
104+
def emit_console_output(file, annotations):
105+
for lineno, word, context in annotations:
106+
print(f"{file}:{lineno}: typo: '{word}' in: {context}")
107+
108+
109+
def main():
110+
parser = argparse.ArgumentParser(
111+
description="Spell check only changed lines in Git diffs or a diff file."
112+
)
113+
parser.add_argument('--base-branch', default=os.getenv("INPUT_BASE_BRANCH",
114+
"master"),
115+
help='Branch to diff against (ignored if --diff-file is set)')
116+
parser.add_argument('--include', default=os.getenv("INPUT_INCLUDE",
117+
'["**/*.md","**/*.txt",'
118+
'"**/*.rst","**/*.json",'
119+
'"**/*.yaml","**/*.yml",'
120+
'"**/*.ini","**/*.tex",'
121+
'"**/*.html","**/*.xml",'
122+
'"**/*.xhtml", "**/*.csv"]'),
123+
help='JSON list of glob patterns to include')
124+
parser.add_argument('--exclude', default=os.getenv("INPUT_EXCLUDE", '[]'),
125+
help='JSON list of glob patterns to exclude')
126+
parser.add_argument('--cmd', default=os.getenv("INPUT_CMD",
127+
"aspell --mode=sgml"
128+
" --add-sgml-skip=code,pre,style,script,command,literal,ulink,parameter,filename,programlisting"
129+
" --lang=en list"),
130+
help='Spell checker command (default: aspell list)')
131+
parser.add_argument('--dictionary', default=os.getenv("INPUT_DICTIONARY", ""),
132+
help='Space-separated list of allowed words')
133+
parser.add_argument('--console-output', action='store_true',
134+
help='Emit console output instead of GitHub-style error annotations')
135+
parser.add_argument('--diff-file', help='Path to a unified diff file (optional)')
136+
parser.add_argument("--input-string", help="Raw text string to spellcheck directly (optional)")
137+
parser.add_argument("--debug", action='store_true',
138+
help="Raw text string to spellcheck directly (optional)")
139+
140+
args = parser.parse_args()
141+
142+
global is_debug
143+
is_debug = os.getenv("INPUT_DEBUG") or args.debug
144+
145+
includes = json.loads(args.include)
146+
excludes = json.loads(args.exclude)
147+
dictionary_words = set(args.dictionary.split()) if args.dictionary else set()
148+
149+
if args.input_string:
150+
lines = bytes(args.input_string, "utf-8").decode("unicode_escape")
151+
if lines.lstrip().startswith('diff'):
152+
file_changes = parse_diff_content(lines)
153+
else:
154+
file_changes = {"<stdin>": [(i + 1, line) for i, line in enumerate(lines.splitlines())]}
155+
elif args.diff_file:
156+
if not os.path.isfile(args.diff_file):
157+
print(f"❌ Diff file not found: {args.diff_file}")
158+
sys.exit(1)
159+
with open(args.diff_file, 'r') as f:
160+
diff_text = f.read()
161+
file_changes = parse_diff_content(diff_text)
162+
else:
163+
files = get_diff_files(args.base_branch, includes, excludes)
164+
if not files:
165+
print("✅ No files to check.")
166+
return
167+
file_changes = {f: get_diff_lines(f, args.base_branch) for f in files}
168+
169+
any_issues = False
170+
for file, lines in file_changes.items():
171+
annotations = run_spell_checker(lines, args.cmd, dictionary_words)
172+
if annotations:
173+
any_issues = True
174+
if args.console_output:
175+
emit_console_output(file, annotations)
176+
else:
177+
emit_github_annotations(file, annotations)
178+
179+
if any_issues:
180+
sys.exit(1)
181+
else:
182+
print("✅ No typos found.")
183+
184+
185+
if __name__ == "__main__":
186+
main()

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pytest
2+

test/fixtures/example.diff

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
diff --git a/example.md b/example.md
2+
index e69de29..4b825dc 100644
3+
--- a/example.md
4+
+++ b/example.md
5+
@@ -0,2 +1,3 @@
6+
This should be ignored
7+
-Thisss should be ignored, too
8+
+Thiss line has spelling errors.

test/test_spellcheck.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import subprocess
2+
import tempfile
3+
import os
4+
import pytest
5+
from pathlib import Path
6+
7+
SCRIPT_PATH = Path(__file__).parent.parent / "git-spell-check.py"
8+
9+
def run_script(args=[], input_text=None):
10+
cmd = ["python3", str(SCRIPT_PATH)] + args
11+
result = subprocess.run(cmd, input=input_text, capture_output=True, text=True)
12+
return result
13+
14+
def test_diff_string_input():
15+
example_diff = """
16+
diff --git a/file.md b/file.md
17+
index abc123..def456 100644
18+
--- a/file.md
19+
+++ b/file.md
20+
@@ -1 +1,2 @@
21+
This is a sentence with a speling mistake.
22+
+Another bad werd here.
23+
"""
24+
result = run_script(["--input-string", example_diff])
25+
assert "speling" not in result.stdout
26+
assert "werd" in result.stdout
27+
assert "::error" in result.stdout # GitHub annotation format
28+
29+
def test_file_input(tmp_path):
30+
file = Path(__file__).parent / "fixtures/example.diff"
31+
result = run_script(["--diff-file", file])
32+
assert "Thiss" in result.stdout
33+
assert "shold" not in result.stdout
34+
assert "Thisss" not in result.stdout
35+
36+
def test_empty_string():
37+
result = run_script(["--input-string", " "])
38+
assert result.returncode == 0
39+
assert result.stdout.strip() == "✅ No typos found."
40+
41+
def test_help_flag():
42+
result = run_script(["--help"])
43+
assert result.returncode == 0
44+
assert "usage" in result.stdout.lower()

0 commit comments

Comments
 (0)