Skip to content

Commit f8abe89

Browse files
committed
fix: OPE-182 + OPE-183 -- large repo resilience + exception list readability
OPE-182a: RecursionError on complex files now caught gracefully - dependency_analyzer.py now catches RecursionError explicitly - Logs warning and continues (was crashing visibly on posthog api.ts) - RecursionError is a BaseException subclass, not caught by 'except Exception' OPE-182b: File limit raised 10K->25K + visible warning instead of silent truncation - MAX_FILES: 10,000 -> 25,000 (PostHog has 19K, most repos <10K) - When limit hit: warning shown in detection summary to the user - models.py: CodebaseDNA.analysis_warnings list added - CLI _show_detection_summary: prints all analysis_warnings in yellow OPE-183: Exception list no longer an unreadable blob - When <=10 exceptions: listed inline as before - When >10 exceptions: shows top 10 + grep command to find the rest - PostHog had 105 exceptions in one line -- now shows top 10 + search hint - Never-create-new-exception-class rule added automatically when >10 found Tests: 457 passing
1 parent 5a6dd04 commit f8abe89

8 files changed

Lines changed: 43 additions & 5 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "saar"
7-
version = "0.5.2"
7+
version = "0.5.3"
88
description = "Extract the essence of your codebase. Auto-generate AGENTS.md, CLAUDE.md, .cursorrules and more."
99
readme = "README.md"
1010
license = "MIT"

saar/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Saar -- extract the essence of your codebase."""
22

3-
__version__ = "0.5.2"
3+
__version__ = "0.5.3"

saar/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,11 @@ def _show_detection_summary(dna, console, no_interview: bool) -> bool:
530530
table.add_row(label, f"[cyan]{value}[/cyan]")
531531

532532
console.print(table)
533+
534+
# show analysis warnings (file limit, recursion skips, etc.) -- OPE-182
535+
for warning in getattr(dna, "analysis_warnings", []):
536+
console.print(f" [yellow]⚠[/yellow] [dim]{warning}[/dim]")
537+
533538
console.print()
534539

535540
if not interactive:

saar/dependency_analyzer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ def analyze_file(self, file_path: str) -> Dict:
135135
"language": language,
136136
"import_count": len(imports),
137137
}
138+
except RecursionError:
139+
logger.warning("Skipped %s (recursion limit -- file too complex)", file_path)
140+
return {"file": file_path, "imports": [], "language": language}
138141
except Exception as e:
139142
logger.error("Error analyzing %s: %s", file_path, e)
140143
return {"file": file_path, "imports": [], "language": language}

saar/extractor.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class DNAExtractor:
8787
}
8888

8989
MAX_FILE_SIZE = 1024 * 1024 # 1MB
90-
MAX_FILES = 10000 # raised -- large monorepos can have many files after exclusions
90+
MAX_FILES = 25000 # generous limit -- PostHog has 19K, most repos <10K
9191

9292
RULES_FILES = [
9393
"CLAUDE.md",
@@ -117,6 +117,7 @@ def __init__(self) -> None:
117117
self._file_cache: Dict[Path, str] = {}
118118
self._stats = {"files_read": 0, "files_skipped": 0, "read_errors": 0}
119119
self._active_skip_dirs = set(self.SKIP_DIRS)
120+
self._file_limit_hit = False # set True when MAX_FILES cap is reached
120121

121122
# -- file I/O ---------------------------------------------------------
122123

@@ -222,6 +223,8 @@ def _discover_files(self, repo_path: Path) -> Tuple[List[Path], List[Path]]:
222223
continue
223224
total = len(app_files) + len(test_files)
224225
if total >= self.MAX_FILES:
226+
# Store hit count on self so CLI can warn user
227+
self._file_limit_hit = True
225228
logger.warning("Hit max file limit (%d)", self.MAX_FILES)
226229
break
227230
if self._is_test_file(item):
@@ -1395,6 +1398,16 @@ def extract(
13951398
"DNA extraction complete: %.2fs, %d files read, %d skipped",
13961399
elapsed, self._stats["files_read"], self._stats["files_skipped"],
13971400
)
1401+
1402+
# surface file limit warning directly on DNA (OPE-182)
1403+
if self._file_limit_hit:
1404+
total_files = len(app_files) + len(test_files)
1405+
dna.analysis_warnings.append(
1406+
f"Large repo: analysed {total_files:,} files (cap={self.MAX_FILES:,}). "
1407+
"Some files may be missing from critical-files and exception detection. "
1408+
"Use --exclude to focus on specific directories."
1409+
)
1410+
13981411
return dna
13991412

14001413
def _enrich_with_style(self, dna: CodebaseDNA, repo_path: str) -> None:

saar/formatters/agents_md.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,22 @@ def render_agents_md(dna: CodebaseDNA) -> str:
224224
if ep.exception_classes or ep.http_exception_usage or ep.logging_on_error:
225225
lines.append("\n## Error Handling\n")
226226
if ep.exception_classes:
227-
lines.append(f"- Use domain exceptions: `{', '.join(ep.exception_classes)}`")
227+
all_exc = ep.exception_classes
228+
total = len(all_exc)
229+
if total <= 10:
230+
# few enough -- list them all inline
231+
lines.append(f"- Use domain exceptions: `{', '.join(all_exc)}`")
232+
else:
233+
# too many to list -- show top 10 + search hint (OPE-183)
234+
top = all_exc[:10]
235+
lines.append(
236+
f"- Use domain exceptions ({total} total). Most common: "
237+
f"`{', '.join(top)}`"
238+
)
239+
lines.append(
240+
"- Never create new exception classes without checking: "
241+
"`grep -r 'class.*Error\\|class.*Exception' . --include='*.py' -l`"
242+
)
228243
if ep.http_exception_usage:
229244
lines.append("- Use HTTPException for API errors")
230245
if ep.logging_on_error:

saar/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ class CodebaseDNA:
165165
# canonical examples -- most-imported file per category (OPE-142)
166166
# Each entry: {"category": str, "file": str, "import_count": int, "reason": str}
167167
canonical_examples: List = field(default_factory=list)
168+
# analysis warnings -- shown in detection summary (e.g. file limit hit)
169+
analysis_warnings: List[str] = field(default_factory=list)
168170

169171
def to_dict(self) -> Dict:
170172
return asdict(self)

tests/test_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class TestCLI:
1313
def test_version(self):
1414
result = runner.invoke(app, ["--version"])
1515
assert result.exit_code == 0
16-
assert "0.5.2" in result.stdout
16+
assert "0.5.3" in result.stdout
1717

1818
def test_help(self):
1919
result = runner.invoke(app, ["--help"])

0 commit comments

Comments
 (0)