From 996523f1d2fc9a702a5f3f5602bc73f5188be4b7 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 12 May 2026 19:22:05 +0200 Subject: [PATCH 01/30] auth: add User-Agent header + PlatformUnavailableError on non-JSON response from /api/auth/client-id --- UNRELEASED.md | 2 + .../interfaces/cli/auth_command.py | 88 +++++++++++++++---- 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index e93f97c..da9c812 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -1,8 +1,10 @@ # Unreleased ### Added +- auth: Typed `PlatformUnavailableError` raised when `/api/auth/client-id` returns a non-JSON body, replacing the silent fall-through that surfaced as a misleading "OAuth not configured" message. ### Changed +- auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. ### Fixed diff --git a/src/reporails_cli/interfaces/cli/auth_command.py b/src/reporails_cli/interfaces/cli/auth_command.py index d9016d6..18923a4 100644 --- a/src/reporails_cli/interfaces/cli/auth_command.py +++ b/src/reporails_cli/interfaces/cli/auth_command.py @@ -27,12 +27,28 @@ # GitHub OAuth App Client ID — public, embedded in CLI. # This is NOT a secret. GitHub Device Flow requires the client ID # to be available client-side. -GITHUB_CLIENT_ID = "" # Set when GitHub OAuth App is created +GITHUB_CLIENT_ID = "" # Always sourced from the platform — see _resolve_client_id() below. # Reporails platform URL — configurable for local dev DEFAULT_PLATFORM_URL = "https://reporails.com" +class PlatformUnavailableError(Exception): + """The Reporails platform's auth surface couldn't be reached or returned an unexpected response.""" + + +def _user_agent() -> str: + """User-Agent string for outbound auth requests to the platform. + + Stable, identifiable UA lets edge allow/Skip rules target CLI traffic by + User-Agent — important when bot mitigation is tightened and clients + classified as "definitely automated" otherwise hit a JS challenge. + """ + from reporails_cli import __version__ + + return f"reporails-cli/{__version__} (auth)" + + def _credentials_path() -> Path: """Path to credentials file.""" return Path.home() / ".reporails" / "credentials.yml" @@ -83,18 +99,42 @@ def _get_platform_url() -> str: def _resolve_client_id(base_url: str) -> str: - """Resolve the GitHub OAuth client ID, trying embedded constant then platform.""" + """Resolve the GitHub OAuth client ID, trying embedded constant then platform. + + Raises PlatformUnavailableError when the platform endpoint is reachable but + returns a non-JSON body (typical of an edge challenge page) — the original + code silently swallowed this into an empty client_id and surfaced it as a + misleading "OAuth not configured" message. + """ import httpx - client_id = GITHUB_CLIENT_ID - if not client_id: - try: - resp = httpx.get(f"{base_url}/api/auth/client-id", timeout=5.0) - resp.raise_for_status() - client_id = resp.json().get("client_id", "") - except (httpx.HTTPError, OSError, ValueError): - pass - return client_id + if GITHUB_CLIENT_ID: + return GITHUB_CLIENT_ID + + headers = {"User-Agent": _user_agent(), "Accept": "application/json"} + try: + resp = httpx.get(f"{base_url}/api/auth/client-id", timeout=5.0, headers=headers) + except (httpx.HTTPError, OSError) as exc: + logger.warning("Platform unreachable for client-id resolution: %s", exc) + raise PlatformUnavailableError( + f"Cannot reach Reporails platform at {base_url}: {exc}", + ) from exc + + if resp.status_code != 200: + logger.warning("Platform returned HTTP %s for client-id", resp.status_code) + raise PlatformUnavailableError( + f"Reporails platform returned HTTP {resp.status_code} for client-id endpoint.", + ) + + try: + return str(resp.json().get("client_id", "")) + except ValueError as exc: + logger.warning("Platform returned non-JSON for client-id: %s", resp.text[:200]) + raise PlatformUnavailableError( + "Reporails platform returned a non-JSON body for the client-id endpoint — " + "likely a Cloudflare challenge or proxy error page. Check your network or " + "contact support@reporails.com.", + ) from exc def _poll_github_token(client_id: str, device_code: str, interval: int) -> str | None: @@ -112,7 +152,7 @@ def _poll_github_token(client_id: str, device_code: str, interval: int) -> str | "device_code": device_code, "grant_type": "urn:ietf:params:oauth:grant-type:device_code", }, - headers={"Accept": "application/json"}, + headers={"Accept": "application/json", "User-Agent": _user_agent()}, timeout=10.0, ) result = poll.json() @@ -179,11 +219,16 @@ def login( import httpx base_url = platform_url or _get_platform_url() - client_id = _resolve_client_id(base_url) + try: + client_id = _resolve_client_id(base_url) + except PlatformUnavailableError as exc: + console.print(f" [red]Reporails platform unavailable:[/] {exc}") + raise typer.Exit(1) from exc if not client_id: console.print( - " [red]GitHub OAuth not configured.[/] Set GITHUB_CLIENT_ID in the CLI or configure the platform.", + " [red]GitHub OAuth not configured on the platform.[/] " + "The /api/auth/client-id endpoint returned an empty client_id.", ) raise typer.Exit(1) @@ -202,7 +247,7 @@ def login( resp = httpx.post( "https://github.com/login/device/code", data={"client_id": client_id, "scope": "read:user user:email"}, - headers={"Accept": "application/json"}, + headers={"Accept": "application/json", "User-Agent": _user_agent()}, timeout=10.0, ) resp.raise_for_status() @@ -230,11 +275,22 @@ def login( exchange = httpx.post( f"{base_url}/api/auth/cli-exchange", json={"github_token": github_token}, + headers={"Accept": "application/json", "User-Agent": _user_agent()}, timeout=10.0, ) exchange.raise_for_status() payload = exchange.json() - except (httpx.HTTPError, OSError, ValueError) as exc: + except ValueError as exc: + logger.warning( + "Platform returned non-JSON for cli-exchange: %s", + exchange.text[:200], + ) + console.print( + " [red]Platform returned a non-JSON response[/] (likely a Cloudflare " + "challenge page). Contact support@reporails.com.", + ) + raise typer.Exit(1) from exc + except (httpx.HTTPError, OSError) as exc: console.print(f" [red]Failed to exchange token:[/] {exc}") raise typer.Exit(1) from exc From e103f62977ad774839d15dc823bf5d84f580cc88 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Sun, 17 May 2026 23:09:03 +0200 Subject: [PATCH 02/30] Add per-capability targeting + focus/listing/top-rules to ails check Refs #24 --- UNRELEASED.md | 5 + .../core/classify/capability_paths.py | 168 ++++++++ .../core/classify/focus_expansion.py | 102 +++++ src/reporails_cli/formatters/json.py | 24 ++ src/reporails_cli/formatters/text/focus.py | 358 ++++++++++++++++++ .../formatters/text/scorecard.py | 52 +++ src/reporails_cli/interfaces/cli/main.py | 211 ++++++++++- tests/unit/test_capability_paths.py | 134 +++++++ tests/unit/test_focus_expansion.py | 91 +++++ 9 files changed, 1129 insertions(+), 16 deletions(-) create mode 100644 src/reporails_cli/core/classify/capability_paths.py create mode 100644 src/reporails_cli/core/classify/focus_expansion.py create mode 100644 src/reporails_cli/formatters/text/focus.py create mode 100644 tests/unit/test_capability_paths.py create mode 100644 tests/unit/test_focus_expansion.py diff --git a/UNRELEASED.md b/UNRELEASED.md index da9c812..447bfe6 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -2,9 +2,14 @@ ### Added - auth: Typed `PlatformUnavailableError` raised when `/api/auth/client-id` returns a non-JSON body, replacing the silent fall-through that surfaced as a misleading "OAuth not configured" message. +- check: Per-capability targeting — `ails check ` resolves to a focused report on one capability target (skill, rule, agents, main, etc.), and `ails check ` lists available targets with per-target scores. Capability vocabulary is read from the detected agent's `framework/rules//config.yml` `file_types:`; supports singular and plural forms (skill/skills, rule/rules, agent/agents). +- check: Focus-mode output layout for capability runs — single-file score, findings grouped by rule with line refs, "Next" action pointer toward the highest-frequency rule. Subagent targets expand to include skills declared in their `skills:` frontmatter. +- check: `Top rules (by finding count)` block in the whole-repo scorecard, ranked across all findings. +- check: `top_rules` array in `-f json` output; `focus` envelope in capability-mode JSON describes the targeted capability, name, agent, and paths. ### Changed - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. +- check: `[PATH]` positional argument is now `[ARG1] [ARG2]` — `ARG1` is sniffed as a capability keyword first, falling through to existing path semantics. No behaviour change for `ails check`, `ails check .`, or `ails check `. ### Fixed diff --git a/src/reporails_cli/core/classify/capability_paths.py b/src/reporails_cli/core/classify/capability_paths.py new file mode 100644 index 0000000..0be1c76 --- /dev/null +++ b/src/reporails_cli/core/classify/capability_paths.py @@ -0,0 +1,168 @@ +"""Capability path resolver — reverse lookup from (agent, capability, name) to path. + +Per-capability targeting (`ails check skill backlog`) needs the inverse of +file classification: given a capability keyword from the agent's +``file_types:`` config and an optional name, resolve to the canonical file +path(s) under the project. + +The capability vocabulary is whatever the detected agent's +``framework/rules//config.yml`` declares — no Claude-specific labels +in this module. +""" + +from __future__ import annotations + +import glob +from collections.abc import Callable +from pathlib import Path + +from reporails_cli.core.classify import load_file_types +from reporails_cli.core.platform.dto.models import FileTypeDeclaration + +_CAPABILITY_SINGULAR_TO_PLURAL: dict[str, str] = { + "skill": "skills", + "rule": "rules", + "agent": "agents", + "command": "commands", +} + + +def available_capabilities(agent: str, project_root: Path | None = None) -> list[str]: + """Return capability names the given agent declares in its config.yml.""" + return [decl.name for decl in load_file_types(agent, project_root=project_root)] + + +def canonicalize_capability(arg: str, agent: str, project_root: Path | None = None) -> str | None: + """Map a user-facing capability keyword (singular or plural) to the agent's config key, or None.""" + if not arg: + return None + decls = available_capabilities(agent, project_root) + if arg in decls: + return arg + plural = _CAPABILITY_SINGULAR_TO_PLURAL.get(arg) + if plural and plural in decls: + return plural + return None + + +def is_capability_keyword(arg: str, agent: str, project_root: Path | None = None) -> bool: + """Sniff helper: does `arg` match a capability name for `agent`? + + Accepts singular (`skill`) or plural (`skills`) forms. Used by + `ails check` to decide whether the first positional argument is a + capability keyword (route to focus / listing) or a filesystem path + (existing behavior). + """ + if not arg or "/" in arg or arg.startswith("."): + return False + return canonicalize_capability(arg, agent, project_root) is not None + + +def list_capability_targets( + agent: str, + capability: str, + project_root: Path, +) -> list[Path]: + """Enumerate files matching `capability` for `agent` under `project_root`. + + Globs the project-scope patterns from the agent's ``file_types:`` + declaration. Returns absolute paths. Returns an empty list when the + agent has no `capability` declared. + """ + decl = _find_declaration(agent, capability, project_root) + if decl is None: + return [] + return _glob_patterns(decl.patterns, project_root) + + +def resolve_capability( + agent: str, + capability: str, + name: str, + project_root: Path, +) -> Path | None: + """Resolve `(agent, capability, name)` to a canonical file path. + + Lists all targets for the capability, then filters by `name` using a + capability-aware extractor: + + - `skills` / `nested_context` / `child_instruction`: parent directory name + (e.g. `.claude/skills/backlog/SKILL.md` → `backlog`). + - `rules` / `agents` / `commands` / `config`: file stem + (`.claude/rules/git.md` → `git`). + - `main` / `override`: filename match against `name` (rarely used + with an explicit name). + + Returns the first match, or None when no candidate matches. + """ + candidates = list_capability_targets(agent, capability, project_root) + extractor = _name_extractor_for(capability) + for candidate in candidates: + if extractor(candidate) == name: + return candidate + return None + + +def _find_declaration( + agent: str, + capability: str, + project_root: Path, +) -> FileTypeDeclaration | None: + for decl in load_file_types(agent, project_root=project_root): + if decl.name == capability: + return decl + return None + + +def _glob_patterns(patterns: tuple[str, ...], project_root: Path) -> list[Path]: + """Expand glob patterns under project_root. Skips user/managed-scope patterns. + + The `FileTypeDeclaration.patterns` tuple comes from `_extract_patterns` + in `core/discovery/agents.py`, which collects project + user + managed + scope patterns. For per-capability targeting we only want files inside + the project tree — drop patterns that start with `~/`, an absolute + path outside `project_root`, or `/etc/`-style managed locations. + + Symlink handling: paths are kept in their pre-resolve form so a project + symlink (e.g. `.claude/` linked to a hub directory) surfaces files + under the project's path even though the underlying inode is + elsewhere. Duplicate physical files (same inode reached via multiple + symlinks) are deduped via the resolved path. + """ + seen_resolved: set[Path] = set() + out: list[Path] = [] + for pattern in patterns: + if _is_external_pattern(pattern): + continue + for match in glob.glob(str(project_root / pattern), recursive=True): + path = Path(match) + if not path.is_file(): + continue + resolved = path.resolve() + if resolved in seen_resolved: + continue + seen_resolved.add(resolved) + out.append(path) + return out + + +def _is_external_pattern(pattern: str) -> bool: + if pattern.startswith(("~", "/")): + return True + return len(pattern) >= 2 and pattern[1] == ":" + + +def _name_extractor_for(capability: str) -> Callable[[Path], str]: + """Return a function path → name appropriate for the capability shape.""" + parent_dir_caps = {"skills", "nested_context", "child_instruction"} + if capability in parent_dir_caps: + return _parent_dir_name + return _file_stem + + +def _parent_dir_name(path: Path) -> str: + return path.parent.name + + +def _file_stem(path: Path) -> str: + return path.stem diff --git a/src/reporails_cli/core/classify/focus_expansion.py b/src/reporails_cli/core/classify/focus_expansion.py new file mode 100644 index 0000000..2bc3489 --- /dev/null +++ b/src/reporails_cli/core/classify/focus_expansion.py @@ -0,0 +1,102 @@ +"""Focus-mode expansion for capability targeting. + +When per-capability targeting points at a subagent +(`ails check agents rule-writer`), the subagent's effective instruction +set includes any skills the subagent preloads. This module reads the +subagent file's frontmatter and resolves declared skills to their +canonical paths, so focus mode renders the subagent and its preloaded +skills together. + +The expansion is agent-aware: ``framework/capabilities_matrix.yml`` +declares which agents have both ``subagents`` and ``skills`` +capabilities, and the cross-agent `skills:` frontmatter convention is +shared across those agents (per the 2026-05-10 +``ails-check-targeted-scope`` seed). +""" + +from __future__ import annotations + +import logging +from pathlib import Path + +import yaml + +from reporails_cli.core.classify.capability_paths import resolve_capability + +logger = logging.getLogger(__name__) + +_SKILL_PRELOAD_FRONTMATTER_KEY = "skills" + + +def expand_focus( + focus_paths: set[Path], + agent: str, + project_root: Path, +) -> set[Path]: + """Expand `focus_paths` to include preloaded skills for any subagent in the set. + + Reads each focus path's YAML frontmatter and looks for a `skills:` + field listing skill names. Each declared skill is resolved through + `resolve_capability(agent, "skills", name, project_root)` and added + to the expanded set. Paths that aren't subagents (no `skills:` + field, no frontmatter, or not a known agents file) pass through + unchanged. + """ + expanded: set[Path] = set(focus_paths) + for path in focus_paths: + for skill_name in _read_preloaded_skills(path): + resolved = resolve_capability(agent, "skills", skill_name, project_root) + if resolved is not None: + expanded.add(resolved) + else: + logger.debug( + "expand_focus: skill %r declared in %s not resolved for agent %s", + skill_name, + path, + agent, + ) + return expanded + + +def _read_preloaded_skills(path: Path) -> list[str]: + """Return skill names declared in `path`'s YAML frontmatter `skills:` field. + + Returns [] for files without frontmatter, without a `skills:` field, + or when the field is not a list of strings. + """ + raw = _load_frontmatter_field(path, _SKILL_PRELOAD_FRONTMATTER_KEY) + if isinstance(raw, list): + return [str(item) for item in raw if isinstance(item, str)] + if isinstance(raw, str): + return [s.strip() for s in raw.split(",") if s.strip()] + return [] + + +def _load_frontmatter_field(path: Path, key: str) -> object: + """Read `path`'s YAML frontmatter and return the value at `key`, or None.""" + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + logger.debug("expand_focus: cannot read %s: %s", path, exc) + return None + frontmatter = _extract_frontmatter(text) + if frontmatter is None: + return None + try: + data = yaml.safe_load(frontmatter) or {} + except yaml.YAMLError as exc: + logger.debug("expand_focus: bad frontmatter in %s: %s", path, exc) + return None + if not isinstance(data, dict): + return None + return data.get(key) + + +def _extract_frontmatter(text: str) -> str | None: + """Return the YAML between the leading `---` fences, or None.""" + if not text.startswith("---"): + return None + end = text.find("\n---", 3) + if end == -1: + return None + return text[3:end].strip() diff --git a/src/reporails_cli/formatters/json.py b/src/reporails_cli/formatters/json.py index 9344158..4916b27 100644 --- a/src/reporails_cli/formatters/json.py +++ b/src/reporails_cli/formatters/json.py @@ -279,4 +279,28 @@ def format_combined_result(result: Any, ruleset_map: Any = None) -> dict[str, An {"name": s.name, "score": s.score, "file_count": s.file_count, "finding_count": s.finding_count} for s in surfaces ] + data["top_rules"] = _aggregate_top_rules(result.findings) return data + + +def _aggregate_top_rules(findings: Any, limit: int = 10) -> list[dict[str, Any]]: + """Group findings by rule and return the top `limit` by count. + + Each entry: `{"rule": "", "count": , "severity": ""}`. + Severity is the worst (error > warning > info) seen for that rule. + Used by both the JSON envelope and the text scorecard so the + aggregation has one source of truth. + """ + severity_rank = {"error": 0, "warning": 1, "info": 2} + buckets: dict[str, dict[str, Any]] = {} + for f in findings: + rule = f.rule + bucket = buckets.setdefault(rule, {"count": 0, "severity": f.severity}) + bucket["count"] += 1 + if severity_rank.get(f.severity, 3) < severity_rank.get(bucket["severity"], 3): + bucket["severity"] = f.severity + ordered = sorted( + ({"rule": rule, "count": b["count"], "severity": b["severity"]} for rule, b in buckets.items()), + key=lambda r: (-r["count"], r["rule"]), + ) + return ordered[:limit] diff --git a/src/reporails_cli/formatters/text/focus.py b/src/reporails_cli/formatters/text/focus.py new file mode 100644 index 0000000..1ef5f75 --- /dev/null +++ b/src/reporails_cli/formatters/text/focus.py @@ -0,0 +1,358 @@ +"""Focus-mode renderer for per-capability `ails check`. + +When a capability target resolves to a small set of files, the per-group +scorecard is overkill — the operator wants every finding for those files +grouped by rule, plus a "next action" pointer. This renderer is the +output of `ails check `. +""" + +from __future__ import annotations + +from collections import Counter +from collections.abc import Callable +from pathlib import Path +from typing import Any + +from rich.console import Console + +from reporails_cli.formatters.text.display_constants import get_term_width +from reporails_cli.formatters.text.scorecard import ( + _RULE_SEVERITY_LABEL, + _RULE_SEVERITY_RANK, +) + +console = Console() + + +def print_focus_result( + result: Any, + capability: str, + name: str, + agent: str, + focus_paths: set[Path], + project_root: Path, + elapsed_ms: float, + ruleset_map: Any = None, +) -> None: + """Render the focus-mode output block. + + Layout: + Reporails — () + + + Score: X.X / 10 ▓▓▓... + + + Findings by rule (N): + RULE_ID xcount severity message + line refs + + Cross-file: involving this file … (when present) + + Next: fix RULE_ID (xcount) — highest-frequency warning. + """ + rel_paths = sorted(_to_rel(p, project_root) for p in focus_paths) + findings = [f for f in result.findings if f.file in {str(p) for p in rel_paths}] + + header = f"[bold]Reporails[/bold] — {capability} {name}".rstrip() + if agent: + header += f" ([dim]{agent}[/dim])" + console.print() + console.print(header) + console.print() + + if len(rel_paths) == 1: + _render_single_file(rel_paths[0], findings, result, ruleset_map) + else: + _render_multi_file(rel_paths, findings, result, ruleset_map) + + _render_findings_by_rule(findings) + _render_cross_file_for_focus(result, rel_paths) + _render_next_action(findings) + + if elapsed_ms: + console.print() + console.print(f" [dim]({elapsed_ms / 1000:.1f}s)[/dim]") + + +def _render_single_file( + rel_path: Path, + findings: list[Any], + result: Any, + ruleset_map: Any, +) -> None: + file_atoms = _atoms_for_file(ruleset_map, rel_path) + score = _focus_score(findings, len(file_atoms), result) + bar = _bar(score) + console.print(f" [bold]{rel_path}[/bold]") + color = "green" if score >= 7.0 else "yellow" if score >= 4.0 else "red" + console.print(f" Score: [{color} bold]{score:.1f}[/{color} bold] / 10 [dim]{bar}[/dim]") + summary = _atom_summary(file_atoms) + if summary: + console.print(f" [dim]{summary}[/dim]") + + +def _render_multi_file( + rel_paths: list[Path], + findings: list[Any], + result: Any, + ruleset_map: Any, +) -> None: + per_file: dict[str, list[Any]] = {} + for f in findings: + per_file.setdefault(f.file, []).append(f) + name_w = max((len(str(p)) for p in rel_paths), default=20) + for rel_path in rel_paths: + key = str(rel_path) + file_findings = per_file.get(key, []) + file_atoms = _atoms_for_file(ruleset_map, rel_path) + score = _focus_score(file_findings, len(file_atoms), result) + color = "green" if score >= 7.0 else "yellow" if score >= 4.0 else "red" + count = len(file_findings) + console.print( + f" [bold]{key:<{name_w}}[/bold] {count:>3} findings Score: [{color} bold]{score:.1f}[/{color} bold]" + ) + + +def _render_findings_by_rule(findings: list[Any]) -> None: + if not findings: + console.print("\n [green]✓[/green] No findings.") + return + by_rule = _group_by_rule(findings) + tw = get_term_width() + console.print() + console.print(f" [bold]Findings by rule ({len(findings)}):[/bold]") + rule_w = max((len(r) for r in by_rule), default=12) + for rule_id in _order_rules(by_rule): + items = by_rule[rule_id] + severity = _worst_severity(items) + label = _RULE_SEVERITY_LABEL.get(severity, severity) + message = _shorten(items[0].message, tw - rule_w - 24) + console.print(f" [bold]{rule_id:<{rule_w}}[/bold] (x{len(items)}) {label} {message}") + lines = [f.line for f in items if f.line] + if lines: + console.print(f" [dim]L{', L'.join(str(line) for line in sorted(set(lines))[:12])}[/dim]") + + +def _render_cross_file_for_focus(result: Any, rel_paths: list[Path]) -> None: + str_paths = {str(p) for p in rel_paths} + pairs = [cf for cf in (result.cross_file or ()) if cf.file_1 in str_paths or cf.file_2 in str_paths] + if not pairs: + return + n_conflicts = sum(1 for cf in pairs if cf.finding_type == "conflict") + n_reps = sum(1 for cf in pairs if cf.finding_type == "repetition") + bits = [] + if n_conflicts: + bits.append(f"{n_conflicts} conflict" + ("s" if n_conflicts > 1 else "")) + if n_reps: + bits.append(f"{n_reps} repetition" + ("s" if n_reps > 1 else "")) + console.print() + console.print(f" Cross-file: {', '.join(bits)} involving this focus.") + console.print(" [dim]Run `ails check` for the full graph.[/dim]") + + +def _render_next_action(findings: list[Any]) -> None: + if not findings: + return + by_rule = _group_by_rule(findings) + ranked = sorted( + by_rule.items(), + key=lambda kv: (_RULE_SEVERITY_RANK.get(_worst_severity(kv[1]), 3), -len(kv[1])), + ) + if not ranked: + return + rule_id, items = ranked[0] + severity = _worst_severity(items) + severity_word = "error" if severity == "error" else "warning" if severity == "warning" else "finding" + console.print() + console.print( + f" [bold]Next:[/bold] fix [bold]{rule_id}[/bold] (x{len(items)}) — highest-frequency {severity_word}." + ) + + +def _atoms_for_file(ruleset_map: Any, rel_path: Path) -> list[Any]: + if ruleset_map is None: + return [] + key = str(rel_path) + return [a for a in getattr(ruleset_map, "atoms", ()) if a.file_path == key] + + +def _atom_summary(atoms: list[Any]) -> str: + if not atoms: + return "" + charge_counts = Counter(a.charge for a in atoms) + directives = charge_counts.get("DIRECTIVE", 0) + charge_counts.get("IMPERATIVE", 0) + constraints = charge_counts.get("CONSTRAINT", 0) + ambiguous = charge_counts.get("AMBIGUOUS", 0) + n_prose = charge_counts.get("NEUTRAL", 0) + total = max(len(atoms), 1) + prose_pct = round(100 * n_prose / total) + parts = [] + if directives: + parts.append(f"{directives} directive") + if constraints: + parts.append(f"{constraints} constraint") + if ambiguous: + parts.append(f"{ambiguous} ambiguous") + parts.append(f"{prose_pct}% prose") + return " · ".join(parts) + + +def _focus_score(findings: list[Any], n_atoms: int, result: Any) -> float: + if not findings: + return 10.0 + severity_counts = Counter(f.severity for f in findings) + errors = severity_counts.get("error", 0) + warnings = severity_counts.get("warning", 0) + infos = severity_counts.get("info", 0) + + # Reuse compute_score's shape: band base + severity penalty / atom denom. + has_quality = result.quality is not None and bool(getattr(result.quality, "compliance_band", "")) + base = 6.0 + if has_quality: + band = result.quality.compliance_band + base = 8.5 if band == "HIGH" else 5.5 if band == "MODERATE" else 3.0 + denom = max(n_atoms, errors + warnings + infos, 1) + penalty = min(4.0, (errors / denom) * 30) + min(2.0, (warnings / denom) * 2) + return float(round(max(0.0, min(10.0, base - penalty)), 1)) + + +def _bar(score: float) -> str: + bar_width = min(20, get_term_width() - 26) + filled = round(bar_width * score / 10) + return "▓" * filled + "░" * (bar_width - filled) + + +def _group_by_rule(findings: list[Any]) -> dict[str, list[Any]]: + out: dict[str, list[Any]] = {} + for f in findings: + out.setdefault(f.rule, []).append(f) + return out + + +def _worst_severity(items: list[Any]) -> str: + return str(min(items, key=lambda f: _RULE_SEVERITY_RANK.get(f.severity, 3)).severity) + + +def _order_rules(by_rule: dict[str, list[Any]]) -> list[str]: + return sorted( + by_rule, + key=lambda r: (_RULE_SEVERITY_RANK.get(_worst_severity(by_rule[r]), 3), -len(by_rule[r]), r), + ) + + +def _shorten(text: str, width: int) -> str: + if width <= 8: + return text + snippet = text.split(".")[0].split("—")[0].strip() + if len(snippet) <= width: + return snippet + return snippet[: width - 1] + "…" + + +def print_listing_result( + result: Any, + capability: str, + agent: str, + candidate_paths: list[Path], + project_root: Path, + ruleset_map: Any = None, +) -> None: + """Render listing mode: capability + per-target scores. + + Output when the operator runs `ails check skill` (no name): + Reporails — skills (, N found) + + … + Run: ails check to focus on one. + """ + rels = [_to_rel(p, project_root) for p in candidate_paths] + name_extractor = _name_extractor_for_capability(capability) + + console.print() + console.print(f"[bold]Reporails[/bold] — {capability} ([dim]{agent}[/dim], {len(rels)} found)") + console.print() + + if not rels: + console.print(f" [dim]No {capability} files found for agent {agent}.[/dim]") + return + + per_file = _findings_per_file(result.findings) + rows = [] + for path in rels: + key = str(path) + file_findings = per_file.get(key, []) + atoms = _atoms_for_file(ruleset_map, path) + score = _focus_score(file_findings, len(atoms), result) + rows.append((name_extractor(path), key, score)) + + name_w = max((len(name) for name, _, _ in rows), default=12) + path_w = max((len(p) for _, p, _ in rows), default=20) + for name, key, score in sorted(rows, key=lambda r: r[0]): + color = "green" if score >= 7.0 else "yellow" if score >= 4.0 else "red" + console.print( + f" [bold]{name:<{name_w}}[/bold] [dim]{key:<{path_w}}[/dim] [{color}]{score:.1f}[/{color}] / 10" + ) + + console.print() + console.print(f" [dim]Run:[/dim] ails check {capability} ") + + +def _findings_per_file(findings: Any) -> dict[str, list[Any]]: + out: dict[str, list[Any]] = {} + for f in findings: + out.setdefault(f.file, []).append(f) + return out + + +def _name_extractor_for_capability(capability: str) -> Callable[[Path], str]: + parent_dir_caps = {"skills", "nested_context", "child_instruction"} + if capability in parent_dir_caps: + return lambda p: p.parent.name + return lambda p: p.stem + + +def filter_result_to_focus(result: Any, focus_paths: set[Path], project_root: Path) -> Any: + """Return a new CombinedResult containing only findings + cross-file pairs in the focus. + + Used by JSON / GitHub / focus text rendering so the envelope reflects + just the targeted file(s) and the score/Top-rules block can be + recomputed from the focused findings. + """ + from dataclasses import replace as _replace + + from reporails_cli.core.platform.runtime.merger import CombinedStats + + rel_keys = {str(_to_rel(p, project_root)) for p in focus_paths} + filtered_findings = tuple(f for f in result.findings if f.file in rel_keys) + filtered_cross = tuple(cf for cf in result.cross_file if cf.file_1 in rel_keys or cf.file_2 in rel_keys) + severity_counts = Counter(f.severity for f in filtered_findings) + stats = CombinedStats( + total_findings=len(filtered_findings), + errors=severity_counts.get("error", 0), + warnings=severity_counts.get("warning", 0), + infos=severity_counts.get("info", 0), + cross_file_conflicts=sum(1 for c in filtered_cross if c.finding_type == "conflict"), + cross_file_repetitions=sum(1 for c in filtered_cross if c.finding_type == "repetition"), + m_probe_count=result.stats.m_probe_count, + client_check_count=result.stats.client_check_count, + server_diagnostic_count=result.stats.server_diagnostic_count, + ) + return _replace(result, findings=filtered_findings, cross_file=filtered_cross, stats=stats) + + +def _to_rel(path: Path, project_root: Path) -> Path: + """Return path relative to project_root WITHOUT resolving symlinks. + + Symlinks may point outside the project (e.g. hub-symlinked skills); + resolving would push the path outside `project_root` and force the + fallback. Use textual prefix stripping instead. + """ + try: + return path.relative_to(project_root) + except ValueError: + pass + try: + return Path(path).resolve().relative_to(project_root.resolve()) + except ValueError: + return path diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index b0b1577..528c902 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -330,6 +330,56 @@ def _render_cross_file_counts(result: Any) -> None: console.print(f" {' \u00b7 '.join(cf_parts)}") +_RULE_SEVERITY_RANK = {"error": 0, "warning": 1, "info": 2} +_RULE_SEVERITY_LABEL = {"error": "[red]err [/red]", "warning": "[yellow]warn[/yellow]", "info": "info"} + + +def _aggregate_top_rules(findings: Any, limit: int = 4) -> list[tuple[str, int, str, str]]: + """Return up to `limit` rules ranked by finding count. + + Each entry: (rule_id, count, severity, sample_message). Severity is the + worst severity (error > warning > info) recorded for that rule across + the findings list; sample_message is the first finding's message, + truncated for the scorecard column. + """ + buckets: dict[str, dict[str, Any]] = {} + for f in findings: + bucket = buckets.setdefault( + f.rule, + {"count": 0, "severity": f.severity, "message": f.message}, + ) + bucket["count"] += 1 + if _RULE_SEVERITY_RANK.get(f.severity, 3) < _RULE_SEVERITY_RANK.get(bucket["severity"], 3): + bucket["severity"] = f.severity + rows = [(rule, b["count"], b["severity"], b["message"]) for rule, b in buckets.items()] + rows.sort(key=lambda r: (-r[1], r[0])) + return rows[:limit] + + +def _render_top_rules(result: Any) -> None: + """Render the Top-rules block in the whole-repo scorecard.""" + if not result.findings: + return + rows = _aggregate_top_rules(result.findings) + if not rows: + return + tw = get_term_width() + console.print() + console.print(" Top rules (by finding count):") + rule_w = max((len(r[0]) for r in rows), default=12) + max_count = max(r[1] for r in rows) + count_w = len(str(max_count)) + 1 # for the x prefix + # 4 (indent) + rule_w + 1 + count_w + 1 + 6 (severity label cell) + 2 (gap) + fixed = 4 + rule_w + 1 + count_w + 1 + 6 + 2 + snippet_w = max(20, tw - fixed - 2) + for rule, count, severity, message in rows: + label = _RULE_SEVERITY_LABEL.get(severity, severity) + snippet = message.split(".")[0].split("—")[0].strip() + if len(snippet) > snippet_w: + snippet = snippet[: snippet_w - 1] + "…" + console.print(f" {rule:<{rule_w}} x{count:<{count_w}} {label} {snippet}") + + def _render_results_summary( result: Any, has_quality: bool, # noqa: ARG001 — kept for API stability @@ -392,6 +442,8 @@ def print_scorecard( if surface_health: _render_surface_health(surface_health) + _render_top_rules(result) + visible_findings, pro_total = _render_results_summary(result, has_quality, hint_errors, hint_warnings) # CTA for free tier diff --git a/src/reporails_cli/interfaces/cli/main.py b/src/reporails_cli/interfaces/cli/main.py index b069bed..d513735 100644 --- a/src/reporails_cli/interfaces/cli/main.py +++ b/src/reporails_cli/interfaces/cli/main.py @@ -65,7 +65,10 @@ def _explain_rules_paths(rules: list[str] | None) -> list[Path] | None: @app.command(rich_help_panel="Commands") def check( # noqa: C901 # pylint: disable=too-many-locals - path: str = typer.Argument(".", help="File or directory to validate"), + arg1: str = typer.Argument( + ".", help="File/directory to validate, OR a capability keyword (skill, rule, agents, main, ...)" + ), + arg2: str = typer.Argument(None, help="Capability target name when arg1 is a capability keyword"), format: str = typer.Option(None, "--format", "-f", help="Output format: text, json, github"), agent: str = typer.Option("", "--agent", help="Agent type (e.g., claude, copilot)"), exclude_dirs: list[str] = typer.Option(None, "--exclude-dirs", help="Directories to exclude"), # noqa: B008 @@ -73,19 +76,58 @@ def check( # noqa: C901 # pylint: disable=too-many-locals strict: bool = typer.Option(False, "--strict", help="Exit code 1 if violations found"), verbose: bool = typer.Option(False, "--verbose", "-v", help="Show details"), ) -> None: - """Validate AI instruction files against reporails rules.""" + """Validate AI instruction files against reporails rules. + + Per-capability targeting: `ails check skill ` focuses output on a + single skill; `ails check skill` (no name) lists skills with per-target + scores. Capability vocabulary comes from the detected agent's + `framework/rules//config.yml` `file_types:` keys. + """ from contextlib import nullcontext + from reporails_cli.core.classify.capability_paths import canonicalize_capability from reporails_cli.core.discovery.agents import detect_agents, get_all_instruction_files from reporails_cli.core.lint.client_checks import run_client_checks from reporails_cli.core.lint.rule_runner import run_content_quality_checks, run_m_probes from reporails_cli.core.platform.adapters.api_client import AilsClient from reporails_cli.core.platform.config.config import get_project_config from reporails_cli.core.platform.runtime.merger import merge_results - from reporails_cli.formatters import json as json_formatter + from reporails_cli.formatters.text.focus import filter_result_to_focus + + # Capability-vs-path sniffing: if arg1 matches a capability keyword for + # the detected agent, route to focus / listing mode. Otherwise treat + # arg1 as a path (existing behavior). + project_root = Path.cwd().resolve() + capability_mode = False + capability = "" + capability_name = "" + if arg1 and arg1 not in (".", "./"): + # The agent for sniffing is whichever the user passed or the project default; + # full agent resolution happens after we know we're in path mode. + config_probe = None + try: + config_probe = get_project_config(project_root) + except (OSError, ValueError): + config_probe = None + sniff_agent = agent or (config_probe.default_agent if config_probe else "") + if not sniff_agent: + from reporails_cli.core.discovery.agents import detect_agents as _detect + + for det in _detect(project_root): + sniff_agent = det.agent_type.id + break + canonical = canonicalize_capability(arg1, sniff_agent, project_root) if sniff_agent else None + if canonical is not None: + capability_mode = True + capability = canonical + capability_name = arg2 or "" + target = project_root + else: + target = Path(arg1).resolve() + else: + target = Path(arg1 or ".").resolve() - target = Path(path).resolve() - if not target.exists(): + if not capability_mode and not target.exists(): console.print(f"[red]Error:[/red] Path not found: {target}") raise typer.Exit(2) @@ -199,24 +241,161 @@ def check( # noqa: C901 # pylint: disable=too-many-locals ) elapsed_ms = (time.perf_counter() - start_time) * 1000 - # 7. Format and display - if output_format == "json": - data = json_formatter.format_combined_result(result, ruleset_map=ruleset_map) - data["elapsed_ms"] = round(elapsed_ms, 1) - print(json.dumps(data, indent=2)) - elif output_format == "github": - from reporails_cli.formatters import github as github_formatter + # 7. Compute focus paths for per-capability mode + focus_paths, listing_candidates = _resolve_focus_targets( + capability_mode, capability, capability_name, effective_agent, project_root + ) - print(github_formatter.format_combined_annotations(result)) - else: - print_text_result(result, elapsed_ms, ascii, verbose, ruleset_map=ruleset_map, funnel_error=funnel_error) + # 8. Display dispatch + display_result = filter_result_to_focus(result, focus_paths, project_root) if focus_paths else result + _dispatch_output( + output_format, + display_result, + result, + ruleset_map, + elapsed_ms, + capability_mode, + capability, + capability_name, + effective_agent, + focus_paths, + listing_candidates, + project_root, + ascii, + verbose, + funnel_error, + ) _show_agent_auto_detect_hint(effective_agent, output_format, assumed, mixed, detected) - if strict and result.findings: + if _should_exit_strict(strict, capability_mode, focus_paths, project_root, result): raise typer.Exit(1) +def _resolve_focus_targets( + capability_mode: bool, + capability: str, + capability_name: str, + effective_agent: str, + project_root: Path, +) -> tuple[set[Path], list[Path]]: + """Compute focus_paths (single-target) or listing_candidates (no name) for capability mode.""" + from reporails_cli.core.classify.capability_paths import ( + available_capabilities, + list_capability_targets, + resolve_capability, + ) + from reporails_cli.core.classify.focus_expansion import expand_focus + + if not capability_mode: + return set(), [] + if capability not in available_capabilities(effective_agent, project_root): + console.print( + f"[red]Error:[/red] capability [bold]{capability}[/bold] is not declared " + f"for agent [bold]{effective_agent}[/bold]. " + f"Available: {', '.join(available_capabilities(effective_agent, project_root)) or '(none)'}" + ) + raise typer.Exit(2) + if not capability_name: + return set(), list_capability_targets(effective_agent, capability, project_root) + resolved = resolve_capability(effective_agent, capability, capability_name, project_root) + if resolved is None: + available = list_capability_targets(effective_agent, capability, project_root) + console.print( + f"[red]Error:[/red] no {capability} named [bold]{capability_name}[/bold] " + f"for agent [bold]{effective_agent}[/bold] under {project_root}." + ) + if available: + console.print(f"[dim]Found {len(available)} {capability}(s) — run `ails check {capability}` to list.[/dim]") + raise typer.Exit(2) + focus_paths = {resolved} + if capability == "agents": + focus_paths = expand_focus(focus_paths, effective_agent, project_root) + return focus_paths, [] + + +def _focus_paths_to_strings(focus_paths: set[Path], project_root: Path) -> set[str]: + return {str(p.relative_to(project_root)) if p.is_relative_to(project_root) else str(p) for p in focus_paths} + + +def _dispatch_output( + output_format: str, + display_result: Any, + full_result: Any, + ruleset_map: Any, + elapsed_ms: float, + capability_mode: bool, + capability: str, + capability_name: str, + effective_agent: str, + focus_paths: set[Path], + listing_candidates: list[Path], + project_root: Path, + ascii_mode: bool, + verbose: bool, + funnel_error: Any, +) -> None: + """Route formatted output to JSON / GitHub / focus / listing / default text.""" + from reporails_cli.formatters import json as json_formatter + from reporails_cli.formatters.text.focus import print_focus_result, print_listing_result + + if output_format == "json": + data = json_formatter.format_combined_result(display_result, ruleset_map=ruleset_map) + data["elapsed_ms"] = round(elapsed_ms, 1) + if capability_mode: + data["focus"] = { + "capability": capability, + "name": capability_name, + "agent": effective_agent, + "paths": sorted(_focus_paths_to_strings(focus_paths, project_root)), + } + print(json.dumps(data, indent=2)) + return + if output_format == "github": + from reporails_cli.formatters import github as github_formatter + + print(github_formatter.format_combined_annotations(display_result)) + return + if capability_mode and capability_name: + print_focus_result( + display_result, + capability=capability, + name=capability_name, + agent=effective_agent, + focus_paths=focus_paths, + project_root=project_root, + elapsed_ms=elapsed_ms, + ruleset_map=ruleset_map, + ) + return + if capability_mode: + print_listing_result( + full_result, + capability=capability, + agent=effective_agent, + candidate_paths=listing_candidates, + project_root=project_root, + ruleset_map=ruleset_map, + ) + return + print_text_result(full_result, elapsed_ms, ascii_mode, verbose, ruleset_map=ruleset_map, funnel_error=funnel_error) + + +def _should_exit_strict( + strict: bool, + capability_mode: bool, + focus_paths: set[Path], + project_root: Path, + result: Any, +) -> bool: + if not strict: + return False + if capability_mode and focus_paths: + rel_keys = _focus_paths_to_strings(focus_paths, project_root) + return any(f.file in rel_keys for f in result.findings) + return bool(result.findings) + + def _suppress_ml_noise() -> None: """Suppress sentence-transformers/HF stderr noise.""" import logging as _logging diff --git a/tests/unit/test_capability_paths.py b/tests/unit/test_capability_paths.py new file mode 100644 index 0000000..67a729d --- /dev/null +++ b/tests/unit/test_capability_paths.py @@ -0,0 +1,134 @@ +"""Unit tests for capability_paths — per-capability targeting plumbing.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from reporails_cli.core.classify.capability_paths import ( + available_capabilities, + canonicalize_capability, + is_capability_keyword, + list_capability_targets, + resolve_capability, +) + + +def _make_skill(root: Path, name: str) -> Path: + skill_dir = root / ".claude" / "skills" / name + skill_dir.mkdir(parents=True, exist_ok=True) + skill_file = skill_dir / "SKILL.md" + skill_file.write_text(f"# {name}\n\nA skill.\n", encoding="utf-8") + return skill_file + + +def _make_rule(root: Path, name: str) -> Path: + rules_dir = root / ".claude" / "rules" + rules_dir.mkdir(parents=True, exist_ok=True) + rule_file = rules_dir / f"{name}.md" + rule_file.write_text(f"# {name}\n\nA rule.\n", encoding="utf-8") + return rule_file + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_available_capabilities_for_claude_contains_expected_keys() -> None: + caps = available_capabilities("claude") + assert "main" in caps + assert "skills" in caps + assert "rules" in caps + assert "agents" in caps + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_canonicalize_singular_to_plural() -> None: + assert canonicalize_capability("skill", "claude") == "skills" + assert canonicalize_capability("rule", "claude") == "rules" + assert canonicalize_capability("agent", "claude") == "agents" + assert canonicalize_capability("skills", "claude") == "skills" # already canonical + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_canonicalize_unknown_capability_returns_none() -> None: + assert canonicalize_capability("nonsense", "claude") is None + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_canonicalize_unknown_agent_returns_none() -> None: + assert canonicalize_capability("skill", "nonexistent-agent") is None + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_is_capability_keyword_rejects_paths() -> None: + assert not is_capability_keyword(".", "claude") + assert not is_capability_keyword("./src/", "claude") + assert not is_capability_keyword("src/foo.md", "claude") + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_is_capability_keyword_accepts_known_capability() -> None: + assert is_capability_keyword("skill", "claude") + assert is_capability_keyword("skills", "claude") + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_list_capability_targets_globs_skills(tmp_path: Path) -> None: + _make_skill(tmp_path, "alpha") + _make_skill(tmp_path, "beta") + targets = list_capability_targets("claude", "skills", tmp_path) + names = sorted(p.parent.name for p in targets) + assert names == ["alpha", "beta"] + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_list_capability_targets_globs_rules(tmp_path: Path) -> None: + _make_rule(tmp_path, "git") + _make_rule(tmp_path, "testing") + targets = list_capability_targets("claude", "rules", tmp_path) + stems = sorted(p.stem for p in targets) + assert stems == ["git", "testing"] + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_resolve_capability_skill_by_parent_dir_name(tmp_path: Path) -> None: + expected = _make_skill(tmp_path, "alpha") + resolved = resolve_capability("claude", "skills", "alpha", tmp_path) + assert resolved is not None + assert resolved.parent.name == expected.parent.name + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_resolve_capability_rule_by_stem(tmp_path: Path) -> None: + expected = _make_rule(tmp_path, "git") + resolved = resolve_capability("claude", "rules", "git", tmp_path) + assert resolved is not None + assert resolved.stem == expected.stem + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_resolve_capability_missing_returns_none(tmp_path: Path) -> None: + _make_skill(tmp_path, "alpha") + assert resolve_capability("claude", "skills", "nonexistent", tmp_path) is None + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_list_capability_targets_unknown_capability_returns_empty(tmp_path: Path) -> None: + assert list_capability_targets("claude", "no-such-cap", tmp_path) == [] + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_canonicalize_handles_empty_string() -> None: + assert canonicalize_capability("", "claude") is None diff --git a/tests/unit/test_focus_expansion.py b/tests/unit/test_focus_expansion.py new file mode 100644 index 0000000..7f5633b --- /dev/null +++ b/tests/unit/test_focus_expansion.py @@ -0,0 +1,91 @@ +"""Unit tests for focus_expansion — subagent→skill preload resolution.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from reporails_cli.core.classify.focus_expansion import expand_focus + + +def _make_agent_file(root: Path, name: str, skills: list[str] | None = None) -> Path: + agents_dir = root / ".claude" / "agents" + agents_dir.mkdir(parents=True, exist_ok=True) + agent_file = agents_dir / f"{name}.md" + fm_lines = [ + "---", + f"name: {name}", + "description: Test agent", + ] + if skills is not None: + fm_lines.append(f"skills: {skills}") + fm_lines.append("---") + fm_lines.append("") + fm_lines.append("# Body") + agent_file.write_text("\n".join(fm_lines), encoding="utf-8") + return agent_file + + +def _make_skill(root: Path, name: str) -> Path: + skill_dir = root / ".claude" / "skills" / name + skill_dir.mkdir(parents=True, exist_ok=True) + skill_file = skill_dir / "SKILL.md" + skill_file.write_text(f"# {name}\n\nA skill.\n", encoding="utf-8") + return skill_file + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_expand_focus_includes_declared_skills(tmp_path: Path) -> None: + agent = _make_agent_file(tmp_path, "rule-writer", skills=["write-rule", "refine-rule"]) + write_rule = _make_skill(tmp_path, "write-rule") + refine_rule = _make_skill(tmp_path, "refine-rule") + expanded = expand_focus({agent}, "claude", tmp_path) + assert agent in expanded + assert write_rule in expanded + assert refine_rule in expanded + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_expand_focus_passes_through_when_no_skills_declared(tmp_path: Path) -> None: + agent = _make_agent_file(tmp_path, "simple") + expanded = expand_focus({agent}, "claude", tmp_path) + assert expanded == {agent} + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_expand_focus_skips_unresolved_skill_names(tmp_path: Path) -> None: + agent = _make_agent_file(tmp_path, "agent", skills=["does-not-exist"]) + expanded = expand_focus({agent}, "claude", tmp_path) + assert expanded == {agent} + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_expand_focus_handles_no_frontmatter(tmp_path: Path) -> None: + agents_dir = tmp_path / ".claude" / "agents" + agents_dir.mkdir(parents=True) + agent = agents_dir / "no-fm.md" + agent.write_text("# Just a body\n\nNo frontmatter here.\n", encoding="utf-8") + expanded = expand_focus({agent}, "claude", tmp_path) + assert expanded == {agent} + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_expand_focus_handles_string_skills_field(tmp_path: Path) -> None: + agents_dir = tmp_path / ".claude" / "agents" + agents_dir.mkdir(parents=True) + agent = agents_dir / "agent.md" + agent.write_text( + "---\nname: agent\nskills: write-rule, refine-rule\n---\n\nbody", + encoding="utf-8", + ) + _make_skill(tmp_path, "write-rule") + _make_skill(tmp_path, "refine-rule") + expanded = expand_focus({agent}, "claude", tmp_path) + names = {p.parent.name for p in expanded if p.name == "SKILL.md"} + assert names == {"write-rule", "refine-rule"} From 481cf0460a35a27b11afaff679e5f12c9f46b004 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Sun, 17 May 2026 23:09:46 +0200 Subject: [PATCH 03/30] Add configurable rule thresholds + generic file class via Markdown link-reachability --- UNRELEASED.md | 2 + .../scope-fields-in-frontmatter/checks.yml | 7 + .../core/scope-fields-in-frontmatter/rule.md | 14 ++ src/reporails_cli/core/classify/__init__.py | 29 ++++ .../core/classify/generic_type.py | 37 +++++ .../core/classify/link_walker.py | 123 +++++++++++++++++ src/reporails_cli/core/lint/regex/runner.py | 87 +++++++++++- src/reporails_cli/core/lint/rule_runner.py | 46 ++++++- .../core/platform/config/config.py | 12 ++ .../core/platform/dto/results.py | 9 ++ .../fixtures/generic-classification/CLAUDE.md | 11 ++ .../notes/architecture.md | 6 + .../generic-classification/notes/cycle-a.md | 3 + .../generic-classification/notes/cycle-b.md | 3 + tests/unit/test_link_walker.py | 123 +++++++++++++++++ tests/unit/test_min_lines_gate.py | 129 ++++++++++++++++++ 16 files changed, 630 insertions(+), 11 deletions(-) create mode 100644 src/reporails_cli/core/classify/generic_type.py create mode 100644 src/reporails_cli/core/classify/link_walker.py create mode 100644 tests/fixtures/generic-classification/CLAUDE.md create mode 100644 tests/fixtures/generic-classification/notes/architecture.md create mode 100644 tests/fixtures/generic-classification/notes/cycle-a.md create mode 100644 tests/fixtures/generic-classification/notes/cycle-b.md create mode 100644 tests/unit/test_link_walker.py create mode 100644 tests/unit/test_min_lines_gate.py diff --git a/UNRELEASED.md b/UNRELEASED.md index 447bfe6..47240a9 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -6,6 +6,8 @@ - check: Focus-mode output layout for capability runs — single-file score, findings grouped by rule with line refs, "Next" action pointer toward the highest-frequency rule. Subagent targets expand to include skills declared in their `skills:` frontmatter. - check: `Top rules (by finding count)` block in the whole-repo scorecard, ranked across all findings. - check: `top_rules` array in `-f json` output; `focus` envelope in capability-mode JSON describes the targeted capability, name, agent, and paths. +- check: Size-aware `CORE:S:0013 scope-fields-in-frontmatter` — rule no longer fires on rules below 30 lines (default). Override per-project via `.ails/config.yml: rule_thresholds.CORE:S:0013.min_lines`. Generic mechanism in deterministic check runner — `min_lines:` arg on any deterministic check + per-rule override. +- check: `generic` file class via Markdown link-reachability — opt-in via `.ails/config.yml: generic_scanning: true`. When on, the classifier BFS-walks outgoing links from each instruction file and assigns `file_type: "generic"` (with `loading: on_demand`) to reached in-tree `.md` files. Cycle-safe, depth-bounded (3 hops), tree-bound, agent-agnostic. Rule routing uses existing `FileMatch.type` — no rule-schema change. Default off everywhere. ### Changed - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. diff --git a/framework/rules/core/scope-fields-in-frontmatter/checks.yml b/framework/rules/core/scope-fields-in-frontmatter/checks.yml index 44e7814..8f98d22 100644 --- a/framework/rules/core/scope-fields-in-frontmatter/checks.yml +++ b/framework/rules/core/scope-fields-in-frontmatter/checks.yml @@ -6,4 +6,11 @@ checks: type: deterministic pattern-regex: '(?i)\b(scope|globs|applies.?to)\s*:' expect: present + # Size gate: skip files below this many lines. Scope declarations are + # boilerplate for tiny rules; the noise reduction is the value here. + # Override per-project via `.ails/config.yml`: + # rule_thresholds: + # CORE:S:0013: + # min_lines: + min_lines: 30 message: "Missing scope fields in frontmatter — declare scope, globs, or path targeting" diff --git a/framework/rules/core/scope-fields-in-frontmatter/rule.md b/framework/rules/core/scope-fields-in-frontmatter/rule.md index b553fa9..5e21dc3 100644 --- a/framework/rules/core/scope-fields-in-frontmatter/rule.md +++ b/framework/rules/core/scope-fields-in-frontmatter/rule.md @@ -43,6 +43,20 @@ title: Python Style Use `ruff` for formatting Python files. ~~~~ +## Threshold + +The check is gated by file size: rules below 30 lines do not fire. Tiny rules consumed in one narrow context get little benefit from explicit scope fields, and the noise drowns out genuine missing-scope cases on load-bearing rules. + +Override the threshold per project in `.ails/config.yml`: + +```yaml +rule_thresholds: + CORE:S:0013: + min_lines: 50 +``` + +Setting `min_lines: 0` removes the gate and restores fire-on-every-rule behavior. + ## Limitations Checks for scope-related frontmatter fields (`scope`, `globs`, `applies_to`). Does not validate whether the declared scope is correct. diff --git a/src/reporails_cli/core/classify/__init__.py b/src/reporails_cli/core/classify/__init__.py index 0053842..720d668 100644 --- a/src/reporails_cli/core/classify/__init__.py +++ b/src/reporails_cli/core/classify/__init__.py @@ -376,6 +376,7 @@ def classify_files( scan_root: Path, files: list[Path], file_types: list[FileTypeDeclaration], + generic_scanning: bool = False, ) -> list[ClassifiedFile]: """Classify files against type declarations. First pattern match wins. @@ -386,11 +387,18 @@ def classify_files( For freeform files, content_format is detected from file content. + When `generic_scanning` is True, after pattern-based classification + the classifier walks Markdown links from each classified file and + assigns `file_type: "generic"` to any in-tree `.md` files reachable + via those links that aren't already classified. See `link_walker.py` + and REQ-025 Phase C for the rationale. + Args: scan_root: Project root / cwd-equivalent for relative paths and ancestor-chain anchoring. files: Files to classify file_types: Type declarations from agent config + generic_scanning: When True, extend with link-reachability pass Returns: List of ClassifiedFile for matched files @@ -430,9 +438,30 @@ def classify_files( ) ) break # First valid match wins + + if generic_scanning: + classified.extend(_classify_generic_via_links(scan_root, classified)) + return classified +def _classify_generic_via_links( + scan_root: Path, + classified: list[ClassifiedFile], +) -> list[ClassifiedFile]: + """BFS Markdown links from classified files; classify reachable `.md` as `generic`. + + Lazy-imported to avoid pulling the walker module when generic scanning + is off (the default). + """ + from reporails_cli.core.classify.generic_type import make_generic_classified + from reporails_cli.core.classify.link_walker import walk_markdown_links + + start_paths = {cf.path for cf in classified} + reached = walk_markdown_links(start_paths, scan_root, start_paths) + return [make_generic_classified(p) for p in sorted(reached)] + + def match_files( classified: list[ClassifiedFile], match: FileMatch, diff --git a/src/reporails_cli/core/classify/generic_type.py b/src/reporails_cli/core/classify/generic_type.py new file mode 100644 index 0000000..4e22b97 --- /dev/null +++ b/src/reporails_cli/core/classify/generic_type.py @@ -0,0 +1,37 @@ +"""Synthesizer for the `generic` file class — link-reached files (REQ-025 Phase C). + +The `generic` class is not declared in any agent config (it's not +agent-specific). When `generic_scanning: true` is set, the classifier +walks Markdown links from already-classified files and assigns `generic` +to the reached `.md` files. This module supplies the synthetic +`FileTypeDeclaration` and the `ClassifiedFile` constructor for those +hits. + +`loading: on_demand` is the load mode: linked files are not always-in-context +(link presence does not imply the agent eagerly loads them), so they +default out of `base` cross-file analysis. Operators that want a linked +file treated as base context can override per-project. +""" + +from __future__ import annotations + +from pathlib import Path + +from reporails_cli.core.platform.dto.models import ClassifiedFile + +GENERIC_TYPE_NAME = "generic" + + +def make_generic_classified(path: Path) -> ClassifiedFile: + """Return a `ClassifiedFile` with `file_type: generic` and on-demand loading.""" + return ClassifiedFile( + path=path, + file_type=GENERIC_TYPE_NAME, + properties={ + "format": "freeform", + "scope": "path_scoped", + "loading": "on_demand", + "lifecycle": "static", + "maintainer": "human", + }, + ) diff --git a/src/reporails_cli/core/classify/link_walker.py b/src/reporails_cli/core/classify/link_walker.py new file mode 100644 index 0000000..e71751c --- /dev/null +++ b/src/reporails_cli/core/classify/link_walker.py @@ -0,0 +1,123 @@ +"""Markdown link-reachability walker for the `generic` file class. + +REQ-025 Phase C: when `generic_scanning: true` is set in `.ails/config.yml`, +the classifier extends its file-type assignment by BFS-walking outgoing +Markdown links from each classified instruction file. Files reached +transitively that live in the project tree but aren't already classified +get `file_type: "generic"`. This catches carryovers, ADRs, sys/ docs, +knowledge docs, learning entries, and per-agent memory entries that an +agent reads as instruction input but that don't have their own canonical +capability path. + +The walker is agent-agnostic by construction — it doesn't read agent +configs or hardcode per-agent paths. Anything an existing classified file +points at via a relative `[text](path.md)` or reference-style link is in +scope; anything outside the project tree is skipped. +""" + +from __future__ import annotations + +import logging +import re +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Inline `[text](path)` — the `path` group is the second `(...)`. +# Allows internal escapes; rejects URLs (anything with `://`) at the caller. +_INLINE_LINK_RE = re.compile(r"\[(?:[^\]]+)\]\(([^)]+)\)") + +# Reference-definition `[ref]: path` — used to back reference-style links. +_REF_DEFINITION_RE = re.compile(r"^\s*\[(?:[^\]]+)\]:\s*(\S+)", re.MULTILINE) + + +def walk_markdown_links( + start_paths: set[Path], + project_root: Path, + classified_paths: set[Path], + max_depth: int = 3, +) -> set[Path]: + """BFS outgoing Markdown links from `start_paths`; return newly reached `.md` paths. + + Files reachable from `start_paths` that: + - live inside `project_root`, + - have a `.md` suffix, + - are not already in `classified_paths`, + - haven't been visited yet, + are returned. The walk is bounded by `max_depth` link hops. + + Cycle-safe via `visited` set; out-of-tree links are silently skipped. + """ + visited: set[Path] = {p.resolve() for p in start_paths if p.exists()} + classified_resolved = {p.resolve() for p in classified_paths} + project_root_resolved = project_root.resolve() + + frontier: list[tuple[Path, int]] = [(p, 0) for p in start_paths if p.exists()] + found: set[Path] = set() + + while frontier: + current, depth = frontier.pop(0) + if depth >= max_depth: + continue + for linked in _outgoing_md_links(current): + resolved = linked.resolve() + if resolved in visited: + continue + visited.add(resolved) + if resolved in classified_resolved: + continue + if not _is_in_tree(resolved, project_root_resolved): + continue + if not resolved.is_file(): + continue + found.add(resolved) + frontier.append((resolved, depth + 1)) + + return found + + +def _outgoing_md_links(file_path: Path) -> list[Path]: + """Extract relative `.md` link targets from `file_path`. + + Returns absolute paths (file_path's directory joined with the link + target). Filters HTTP(s) URLs, anchor-only refs, and non-`.md` links. + """ + try: + text = file_path.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + logger.debug("link_walker: cannot read %s: %s", file_path, exc) + return [] + + targets: list[str] = [m.group(1).strip() for m in _INLINE_LINK_RE.finditer(text)] + targets.extend(m.group(1).strip() for m in _REF_DEFINITION_RE.finditer(text)) + + out: list[Path] = [] + base_dir = file_path.parent + for target in targets: + cleaned = _strip_anchor(target) + if not cleaned or _looks_like_url(cleaned): + continue + if not cleaned.endswith(".md"): + continue + resolved = (base_dir / cleaned).resolve() + out.append(resolved) + return out + + +def _strip_anchor(target: str) -> str: + """Drop trailing `#anchor` and surrounding whitespace from a link target.""" + if "#" in target: + target = target.split("#", 1)[0] + return target.strip() + + +def _looks_like_url(target: str) -> bool: + return "://" in target or target.startswith("mailto:") + + +def _is_in_tree(path: Path, project_root: Path) -> bool: + try: + path.relative_to(project_root) + except ValueError: + return False + return True diff --git a/src/reporails_cli/core/lint/regex/runner.py b/src/reporails_cli/core/lint/regex/runner.py index f19986e..45abac8 100644 --- a/src/reporails_cli/core/lint/regex/runner.py +++ b/src/reporails_cli/core/lint/regex/runner.py @@ -395,12 +395,15 @@ def run_validation( return _scan_all_targets(scan_targets, scan_root, universal, by_pattern, exclude_dirs) -def _load_check_expectations(yml_paths: list[Path]) -> tuple[dict[str, str], dict[str, str]]: - """Load expect and message values from check definitions.""" +def _load_check_expectations( + yml_paths: list[Path], +) -> tuple[dict[str, str], dict[str, str], dict[str, int]]: + """Load expect, message, and min_lines values from check definitions.""" import yaml expect_map: dict[str, str] = {} message_map: dict[str, str] = {} + min_lines_map: dict[str, int] = {} for yml_path in yml_paths: if not yml_path.exists(): continue @@ -414,9 +417,12 @@ def _load_check_expectations(yml_paths: list[Path]) -> tuple[dict[str, str], dic cid = check_def.get("id", "") expect_map[cid] = check_def.get("expect", "present") message_map[cid] = check_def.get("message", "") + ml = check_def.get("min_lines") + if isinstance(ml, (int, float)) and ml > 0: + min_lines_map[cid] = int(ml) except Exception: # yaml.YAMLError or OSError; skip unreadable files continue - return expect_map, message_map + return expect_map, message_map, min_lines_map def _collect_sarif_matches( @@ -463,9 +469,18 @@ def _emit_expect_findings( matched_pairs: set[tuple[str, str]], match_details: dict[tuple[str, str], tuple[int, str]], scanned_files: list[str], + min_lines_map: dict[str, int] | None = None, + scan_root: Path | None = None, ) -> list[LocalFinding]: - """Convert expect/match results to LocalFinding list.""" + """Convert expect/match results to LocalFinding list. + + When a check declares `min_lines`, files below that line count are + skipped — neither marked as failing nor as passing. Used by rules + like `CORE:S:0013 scope-fields-in-frontmatter` where the scope + declaration is boilerplate for tiny files. + """ findings: list[LocalFinding] = [] + min_lines_map = min_lines_map or {} for check_id, expect in expect_map.items(): parts = check_id.split(".") rule_id = f"{parts[0]}:{parts[1]}:{parts[2]}" if len(parts) >= 3 else check_id @@ -475,6 +490,7 @@ def _emit_expect_findings( else "" ) msg = message_map.get(check_id, "") + min_lines = min_lines_map.get(check_id, 0) if expect == "absent": for file_path in scanned_files: if (check_id, file_path) in matched_pairs: @@ -503,19 +519,44 @@ def _emit_expect_findings( ) for file_path in scanned_files if (check_id, file_path) not in matched_pairs + and not _file_below_min_lines(file_path, min_lines, scan_root) ) return findings +def _file_below_min_lines(rel_path: str, min_lines: int, scan_root: Path | None) -> bool: + """Return True when `rel_path` exists under `scan_root` with fewer than `min_lines` lines. + + Files we cannot read are treated as not-below (the deterministic check + still fires, matching pre-`min_lines` behaviour). + """ + if min_lines <= 0 or scan_root is None: + return False + full = scan_root / rel_path + try: + return len(full.read_text(encoding="utf-8", errors="replace").splitlines()) < min_lines + except OSError: + return False + + def run_checks( yml_paths: list[Path], target: Path, instruction_files: list[Path] | None = None, exclude_dirs: list[str] | None = None, body_only_paths: set[Path] | None = None, + min_lines_overrides: dict[str, int] | None = None, ) -> list[LocalFinding]: - """Execute regex validation and return LocalFinding list.""" - expect_map, message_map = _load_check_expectations(yml_paths) + """Execute regex validation and return LocalFinding list. + + `min_lines_overrides` maps full rule IDs (e.g. `CORE:S:0013`) to + integer minimum line counts; values override defaults declared in the + rule's `checks.yml`. Populated by callers from `.ails/config.yml` + `rule_thresholds`. + """ + expect_map, message_map, min_lines_map = _load_check_expectations(yml_paths) + if min_lines_overrides: + min_lines_map = _apply_min_lines_overrides(min_lines_map, expect_map, min_lines_overrides) sarif = run_validation( yml_paths, target, @@ -525,7 +566,39 @@ def run_checks( ) matched_pairs, match_details = _collect_sarif_matches(sarif) scanned_files = _resolve_scanned_files(target, instruction_files, exclude_dirs) - return _emit_expect_findings(expect_map, message_map, matched_pairs, match_details, scanned_files) + scan_root = target if target.is_dir() else target.parent + return _emit_expect_findings( + expect_map, + message_map, + matched_pairs, + match_details, + scanned_files, + min_lines_map=min_lines_map, + scan_root=scan_root, + ) + + +def _apply_min_lines_overrides( + min_lines_map: dict[str, int], + expect_map: dict[str, str], + overrides: dict[str, int], +) -> dict[str, int]: + """Merge `rule_thresholds[rule_id].min_lines` over per-check defaults. + + The override key is the rule id (e.g. `CORE:S:0013`); the check + id (e.g. `CORE.S.0013.pattern_check`) extends it. Walk `expect_map` + to find which check ids belong to a rule id and overwrite their + min_lines entry. + """ + out = dict(min_lines_map) + for check_id in expect_map: + parts = check_id.split(".") + if len(parts) < 3: + continue + rule_id = f"{parts[0]}:{parts[1]}:{parts[2]}" + if rule_id in overrides: + out[check_id] = int(overrides[rule_id]) + return out def checks_per_file( diff --git a/src/reporails_cli/core/lint/rule_runner.py b/src/reporails_cli/core/lint/rule_runner.py index cf81348..01412d0 100644 --- a/src/reporails_cli/core/lint/rule_runner.py +++ b/src/reporails_cli/core/lint/rule_runner.py @@ -74,6 +74,18 @@ def _collect_deterministic_findings( """ from reporails_cli.core.classify import match_files from reporails_cli.core.lint.regex import run_checks + from reporails_cli.core.platform.config.config import get_project_config + + try: + project_config = get_project_config(project_dir) + thresholds = project_config.rule_thresholds + except (OSError, ValueError): + thresholds = {} + min_lines_overrides: dict[str, int] = {} + for rule_id, args in thresholds.items(): + ml = args.get("min_lines") + if isinstance(ml, int): + min_lines_overrides[rule_id] = ml findings: list[LocalFinding] = [] for rule in rules.values(): @@ -92,7 +104,14 @@ def _collect_deterministic_findings( if not target_files: continue - findings.extend(run_checks([rule.yml_path], project_dir, instruction_files=target_files)) + findings.extend( + run_checks( + [rule.yml_path], + project_dir, + instruction_files=target_files, + min_lines_overrides=min_lines_overrides, + ) + ) return findings @@ -104,14 +123,28 @@ def run_m_probes( """Run M-probe checks (mechanical + deterministic) against instruction files.""" from reporails_cli.core.classify import classify_files, load_file_types from reporails_cli.core.platform.adapters.registry import load_rules + from reporails_cli.core.platform.config.config import get_project_config rules = load_rules(project_root=project_dir, scan_root=project_dir, agent=agent) file_types = load_file_types(agent or "generic") - classified = classify_files(project_dir, instruction_files, file_types) + try: + generic_scanning = get_project_config(project_dir).generic_scanning + except (OSError, ValueError): + generic_scanning = False + classified = classify_files(project_dir, instruction_files, file_types, generic_scanning=generic_scanning) + # Extend instruction_files with link-walked generic-class files so + # downstream rules without explicit `match` still see them. + effective_files = list(instruction_files) + if generic_scanning: + known = set(effective_files) + for cf in classified: + if cf.path not in known and cf.file_type == "generic": + effective_files.append(cf.path) + known.add(cf.path) findings: list[LocalFinding] = [] findings.extend(_collect_mechanical_findings(rules, project_dir, classified)) - findings.extend(_collect_deterministic_findings(rules, project_dir, instruction_files, classified)) + findings.extend(_collect_deterministic_findings(rules, project_dir, effective_files, classified)) findings.sort(key=lambda f: (_SEVERITY_ORDER.get(f.severity, 9), f.line)) return findings @@ -131,6 +164,7 @@ def run_content_quality_checks( from reporails_cli.core.classify import classify_files, load_file_types from reporails_cli.core.lint.content_checker import run_content_checks from reporails_cli.core.platform.adapters.registry import load_rules + from reporails_cli.core.platform.config.config import get_project_config from reporails_cli.core.platform.dto.ruleset import RulesetMap as _RulesetMap if not isinstance(ruleset_map, _RulesetMap): @@ -142,6 +176,10 @@ def run_content_quality_checks( classified = [] if instruction_files: file_types = load_file_types(agent or "generic") - classified = classify_files(project_dir, instruction_files, file_types) + try: + generic_scanning = get_project_config(project_dir).generic_scanning + except (OSError, ValueError): + generic_scanning = False + classified = classify_files(project_dir, instruction_files, file_types, generic_scanning=generic_scanning) return run_content_checks(ruleset_map, rules, classified) diff --git a/src/reporails_cli/core/platform/config/config.py b/src/reporails_cli/core/platform/config/config.py index bcaa46c..4ecfc48 100644 --- a/src/reporails_cli/core/platform/config/config.py +++ b/src/reporails_cli/core/platform/config/config.py @@ -159,6 +159,16 @@ def _str_dict(key: str) -> dict[str, dict[str, object]]: ovr = data.get("overrides", {}) ovr_dict: dict[str, dict[str, str]] = ovr if isinstance(ovr, dict) else {} + rt = data.get("rule_thresholds", {}) + rt_dict: dict[str, dict[str, int]] = {} + if isinstance(rt, dict): + for rule_id, args in rt.items(): + if isinstance(args, dict): + rt_dict[str(rule_id)] = {str(k): int(v) for k, v in args.items() if isinstance(v, (int, float))} + + generic_scanning_raw = data.get("generic_scanning", False) + generic_scanning = bool(generic_scanning_raw) if isinstance(generic_scanning_raw, bool) else False + config = ProjectConfig( framework_version=fw_str, packages=_str_list("packages"), @@ -168,6 +178,8 @@ def _str_dict(key: str) -> dict[str, dict[str, object]]: default_agent=da_str, agents=_str_dict("agents"), surfaces=_str_dict("surfaces"), + rule_thresholds=rt_dict, + generic_scanning=generic_scanning, ) # Apply global defaults where project doesn't override global_cfg = get_global_config() diff --git a/src/reporails_cli/core/platform/dto/results.py b/src/reporails_cli/core/platform/dto/results.py index 1a20962..7bdf541 100644 --- a/src/reporails_cli/core/platform/dto/results.py +++ b/src/reporails_cli/core/platform/dto/results.py @@ -128,6 +128,15 @@ class ProjectConfig: # pylint: disable=too-many-instance-attributes # Per-surface include/exclude pattern adjustments. Keys are `.`. # Each entry may have `include: [glob...]` and `exclude: [glob...]`. surfaces: dict[str, dict[str, object]] = field(default_factory=dict) + # Per-rule threshold overrides keyed by full rule id (e.g. `CORE:S:0013`). + # Each entry is a `{arg: value}` map applied over the rule's default check args. + # Generic mechanism; not tied to any single rule. + rule_thresholds: dict[str, dict[str, int]] = field(default_factory=dict) + # When True, the classifier extends past pattern-classified files via Markdown + # link-reachability and assigns `file_type: "generic"` to reached `.md` + # files in the project tree. Default off — anonymous tryout sees zero + # generic findings. See REQ-025 Phase C. + generic_scanning: bool = False # ============================================================================= diff --git a/tests/fixtures/generic-classification/CLAUDE.md b/tests/fixtures/generic-classification/CLAUDE.md new file mode 100644 index 0000000..ed90720 --- /dev/null +++ b/tests/fixtures/generic-classification/CLAUDE.md @@ -0,0 +1,11 @@ +# Generic-classification fixture project + +This CLAUDE.md links to `notes/architecture.md` and `notes/cycle-a.md`. Used +by tests/unit/test_link_walker.py to verify that link-reachability extends +classification. + +See [architecture notes](notes/architecture.md) and [cycle a](notes/cycle-a.md). + +Also: and [anchor only](#section) +and [an external markdown](../outside.md) — none of these should be added +as generic files. diff --git a/tests/fixtures/generic-classification/notes/architecture.md b/tests/fixtures/generic-classification/notes/architecture.md new file mode 100644 index 0000000..04e09e6 --- /dev/null +++ b/tests/fixtures/generic-classification/notes/architecture.md @@ -0,0 +1,6 @@ +# Architecture notes + +This file is reached via Markdown link from CLAUDE.md. With +`generic_scanning: true` it gets classified as `file_type: generic`. + +It does NOT link back, so no cycle here. diff --git a/tests/fixtures/generic-classification/notes/cycle-a.md b/tests/fixtures/generic-classification/notes/cycle-a.md new file mode 100644 index 0000000..6f0cfe0 --- /dev/null +++ b/tests/fixtures/generic-classification/notes/cycle-a.md @@ -0,0 +1,3 @@ +# Cycle A + +Links to [cycle B](cycle-b.md). diff --git a/tests/fixtures/generic-classification/notes/cycle-b.md b/tests/fixtures/generic-classification/notes/cycle-b.md new file mode 100644 index 0000000..c166521 --- /dev/null +++ b/tests/fixtures/generic-classification/notes/cycle-b.md @@ -0,0 +1,3 @@ +# Cycle B + +Links back to [cycle A](cycle-a.md). Tests cycle handling. diff --git a/tests/unit/test_link_walker.py b/tests/unit/test_link_walker.py new file mode 100644 index 0000000..2785db0 --- /dev/null +++ b/tests/unit/test_link_walker.py @@ -0,0 +1,123 @@ +"""Unit tests for the Markdown link-walker — REQ-025 Phase C.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from reporails_cli.core.classify import classify_files, load_file_types +from reporails_cli.core.classify.link_walker import walk_markdown_links + +FIXTURE_ROOT = Path(__file__).resolve().parent.parent / "fixtures" / "generic-classification" + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_walk_markdown_links_finds_inline_md_targets(tmp_path: Path) -> None: + main = tmp_path / "main.md" + target = tmp_path / "linked.md" + main.write_text("Read [the notes](linked.md).\n", encoding="utf-8") + target.write_text("# notes\n", encoding="utf-8") + reached = walk_markdown_links({main}, tmp_path, {main}) + assert reached == {target.resolve()} + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_walk_markdown_links_skips_urls_and_anchors(tmp_path: Path) -> None: + main = tmp_path / "main.md" + main.write_text( + "URL [example](https://example.com)\nanchor [section](#section)\nmailto [link](mailto:nobody@example.com)\n", + encoding="utf-8", + ) + assert walk_markdown_links({main}, tmp_path, {main}) == set() + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_walk_markdown_links_skips_out_of_tree(tmp_path: Path) -> None: + main = tmp_path / "project" / "main.md" + main.parent.mkdir() + outside = tmp_path / "outside.md" + outside.write_text("# outside\n", encoding="utf-8") + main.write_text("Goes [outside](../outside.md).\n", encoding="utf-8") + reached = walk_markdown_links({main}, main.parent, {main}) + assert reached == set() + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_walk_markdown_links_handles_cycle(tmp_path: Path) -> None: + a = tmp_path / "a.md" + b = tmp_path / "b.md" + a.write_text("Goes to [b](b.md).\n", encoding="utf-8") + b.write_text("Goes to [a](a.md).\n", encoding="utf-8") + reached = walk_markdown_links({a}, tmp_path, {a}) + # b is reached; a is already-classified so it does not enter `found`. + assert reached == {b.resolve()} + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_walk_markdown_links_skips_already_classified(tmp_path: Path) -> None: + main = tmp_path / "main.md" + rule = tmp_path / "rule.md" + main.write_text("See [rule](rule.md).\n", encoding="utf-8") + rule.write_text("# rule\n", encoding="utf-8") + # rule is already-classified; walker must not re-include it. + reached = walk_markdown_links({main}, tmp_path, {main, rule}) + assert reached == set() + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_classify_files_with_generic_scanning_off_does_not_walk(tmp_path: Path) -> None: + (tmp_path / "CLAUDE.md").write_text("Read [arch](arch.md).\n", encoding="utf-8") + (tmp_path / "arch.md").write_text("# arch\n", encoding="utf-8") + file_types = load_file_types("claude") + classified = classify_files( + tmp_path, + [tmp_path / "CLAUDE.md", tmp_path / "arch.md"], + file_types, + generic_scanning=False, + ) + types = {cf.path.name: cf.file_type for cf in classified} + assert types.get("CLAUDE.md") == "main" + assert "arch.md" not in types # no generic without scanning + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_classify_files_with_generic_scanning_on_walks_and_classifies(tmp_path: Path) -> None: + (tmp_path / "CLAUDE.md").write_text("Read [arch](arch.md).\n", encoding="utf-8") + (tmp_path / "arch.md").write_text("# arch\n", encoding="utf-8") + file_types = load_file_types("claude") + classified = classify_files( + tmp_path, + [tmp_path / "CLAUDE.md"], + file_types, + generic_scanning=True, + ) + types = {cf.path.name: cf.file_type for cf in classified} + assert types.get("CLAUDE.md") == "main" + assert types.get("arch.md") == "generic" + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_classify_files_generic_loading_is_on_demand(tmp_path: Path) -> None: + """Generic files default to `loading: on_demand` so they don't pollute + base-context cross-file analysis. See DIAGNOSTIC.md cross-file matrix.""" + (tmp_path / "CLAUDE.md").write_text("Read [arch](arch.md).\n", encoding="utf-8") + (tmp_path / "arch.md").write_text("# arch\n", encoding="utf-8") + file_types = load_file_types("claude") + classified = classify_files( + tmp_path, + [tmp_path / "CLAUDE.md"], + file_types, + generic_scanning=True, + ) + arch = next((cf for cf in classified if cf.path.name == "arch.md"), None) + assert arch is not None + assert arch.properties.get("loading") == "on_demand" diff --git a/tests/unit/test_min_lines_gate.py b/tests/unit/test_min_lines_gate.py new file mode 100644 index 0000000..2e50c17 --- /dev/null +++ b/tests/unit/test_min_lines_gate.py @@ -0,0 +1,129 @@ +"""Unit tests for the min_lines gate on deterministic checks (REQ-025 Phase B).""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from reporails_cli.core.lint.regex.runner import ( + _apply_min_lines_overrides, + _emit_expect_findings, + _file_below_min_lines, + _load_check_expectations, +) + + +def _write_yml(path: Path, body: str) -> Path: + path.write_text(body, encoding="utf-8") + return path + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_load_check_expectations_extracts_min_lines(tmp_path: Path) -> None: + yml = _write_yml( + tmp_path / "checks.yml", + "checks:\n" + "- id: CORE.S.0013.pattern_check\n" + " type: deterministic\n" + " pattern-regex: 'x'\n" + " expect: present\n" + " min_lines: 30\n" + " message: missing scope\n", + ) + expect, message, min_lines = _load_check_expectations([yml]) + assert expect == {"CORE.S.0013.pattern_check": "present"} + assert message == {"CORE.S.0013.pattern_check": "missing scope"} + assert min_lines == {"CORE.S.0013.pattern_check": 30} + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_load_check_expectations_defaults_missing_min_lines_to_zero(tmp_path: Path) -> None: + yml = _write_yml( + tmp_path / "checks.yml", + "checks:\n" + "- id: CORE.S.0001.pattern\n" + " type: deterministic\n" + " pattern-regex: 'x'\n" + " expect: present\n" + " message: m\n", + ) + _, _, min_lines = _load_check_expectations([yml]) + assert "CORE.S.0001.pattern" not in min_lines # absent = no gate + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_apply_min_lines_overrides_uses_rule_id(tmp_path: Path) -> None: + expect_map = {"CORE.S.0013.pattern_check": "present"} + base = {"CORE.S.0013.pattern_check": 30} + overrides = {"CORE:S:0013": 50} + merged = _apply_min_lines_overrides(base, expect_map, overrides) + assert merged["CORE.S.0013.pattern_check"] == 50 + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_file_below_min_lines_short_file(tmp_path: Path) -> None: + short = tmp_path / "rule.md" + short.write_text("---\nline 1\nline 2\n---\n# rule\n", encoding="utf-8") + assert _file_below_min_lines("rule.md", 30, tmp_path) is True + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_file_below_min_lines_long_file(tmp_path: Path) -> None: + long_file = tmp_path / "rule.md" + long_file.write_text("\n".join(f"line {i}" for i in range(50)), encoding="utf-8") + assert _file_below_min_lines("rule.md", 30, tmp_path) is False + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_file_below_min_lines_disabled_when_zero(tmp_path: Path) -> None: + short = tmp_path / "rule.md" + short.write_text("x\n", encoding="utf-8") + assert _file_below_min_lines("rule.md", 0, tmp_path) is False + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_emit_expect_findings_skips_short_files_when_present_missing(tmp_path: Path) -> None: + # tiny.md is below threshold; long.md is above. + (tmp_path / "tiny.md").write_text("# tiny\n", encoding="utf-8") + (tmp_path / "long.md").write_text("\n".join(f"L{i}" for i in range(50)), encoding="utf-8") + + expect_map = {"CORE.S.0013.pattern_check": "present"} + message_map = {"CORE.S.0013.pattern_check": "missing scope"} + matched_pairs: set[tuple[str, str]] = set() # neither file matched the pattern + findings = _emit_expect_findings( + expect_map=expect_map, + message_map=message_map, + matched_pairs=matched_pairs, + match_details={}, + scanned_files=["tiny.md", "long.md"], + min_lines_map={"CORE.S.0013.pattern_check": 30}, + scan_root=tmp_path, + ) + files_with_findings = {f.file for f in findings} + assert "tiny.md" not in files_with_findings # gated out + assert "long.md" in files_with_findings # fires normally + + +@pytest.mark.unit +@pytest.mark.subsys_lint +def test_emit_expect_findings_no_gate_when_min_lines_zero(tmp_path: Path) -> None: + (tmp_path / "tiny.md").write_text("# tiny\n", encoding="utf-8") + expect_map = {"CORE.S.0013.pattern_check": "present"} + findings = _emit_expect_findings( + expect_map=expect_map, + message_map={"CORE.S.0013.pattern_check": "missing"}, + matched_pairs=set(), + match_details={}, + scanned_files=["tiny.md"], + min_lines_map=None, + scan_root=tmp_path, + ) + assert len(findings) == 1 From 005fd7eaebdff3039ac7ecf88a9f0642d5954517 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 00:19:51 +0200 Subject: [PATCH 04/30] Fix non-deterministic message text in `client_checks._check_broad_scope` --- UNRELEASED.md | 1 + src/reporails_cli/core/lint/client_checks.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 47240a9..0cae8f8 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -14,5 +14,6 @@ - check: `[PATH]` positional argument is now `[ARG1] [ARG2]` — `ARG1` is sniffed as a capability keyword first, falling through to existing path semantics. No behaviour change for `ails check`, `ails check .`, or `ails check `. ### Fixed +- check: Deterministic message text for the broad-scope client check — `client_checks._check_broad_scope` now sorts the matched broad terms before formatting the message, so output is reproducible across runs regardless of `PYTHONHASHSEED`. The set-iteration order previously caused `"Broad terms (any, integrations)"` vs `"Broad terms (integrations, any)"` drift on identical inputs. ### Removed diff --git a/src/reporails_cli/core/lint/client_checks.py b/src/reporails_cli/core/lint/client_checks.py index 3127e3c..9f73489 100644 --- a/src/reporails_cli/core/lint/client_checks.py +++ b/src/reporails_cli/core/lint/client_checks.py @@ -166,7 +166,7 @@ def _check_broad_scope(charged: list[Atom], filepath: str) -> list[LocalFinding] if not m: continue scope_text = m.group(1).lower() - broad_matches = [w for w in _BROAD_SCOPE_WORDS if w in scope_text] + broad_matches = sorted(w for w in _BROAD_SCOPE_WORDS if w in scope_text) if broad_matches: findings.append( LocalFinding( From a1e7a7af8fd8e72ceee8d131e7596038dcd141d2 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 01:57:46 +0200 Subject: [PATCH 05/30] Fix `detect_features_filesystem` counting user-scope CLAUDE.md as project file --- UNRELEASED.md | 1 + src/reporails_cli/core/discovery/features.py | 22 +++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 0cae8f8..ca1aaba 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -15,5 +15,6 @@ ### Fixed - check: Deterministic message text for the broad-scope client check — `client_checks._check_broad_scope` now sorts the matched broad terms before formatting the message, so output is reproducible across runs regardless of `PYTHONHASHSEED`. The set-iteration order previously caused `"Broad terms (any, integrations)"` vs `"Broad terms (integrations, any)"` drift on identical inputs. +- discovery: `DetectedFeatures.instruction_file_count` and `has_multiple_instruction_files` no longer include user-scope files like `~/.claude/CLAUDE.md`. The claude `main` file_type declares both project and user scope patterns; counting the user-scope file inflated capability gates in `policy/levels.py` (`multiple_files`, `external_references`) and L-level scoring in `policy/capability.py` for any user with a home-directory `CLAUDE.md`. Counts are now scoped to files under `target`; `_find_root_instruction` was already correctly scoped. ### Removed diff --git a/src/reporails_cli/core/discovery/features.py b/src/reporails_cli/core/discovery/features.py index 161807a..4d7cbba 100644 --- a/src/reporails_cli/core/discovery/features.py +++ b/src/reporails_cli/core/discovery/features.py @@ -111,10 +111,18 @@ def detect_features_filesystem(target: Path, agents: list[DetectedAgent] | None if features.has_backbone: features.component_count = _count_components(backbone_path) - # Count instruction files (all agents, not just CLAUDE.md) + # Count instruction files (all agents, not just CLAUDE.md). + # Scope the count to files under `target` — user-level memories like + # `~/.claude/CLAUDE.md` get pulled in by claude's user-scope patterns, + # but they are not part of the project's instruction-file inventory and + # would inflate L-level capability gating (`has_multiple_instruction_files` + # drives the `multiple_files` / `external_references` capability flags + # in `policy/levels.py`). all_instruction_files = get_all_instruction_files(target, agents=agents) - features.instruction_file_count = len(all_instruction_files) - features.has_multiple_instruction_files = len(all_instruction_files) > 1 + target_resolved = target.resolve() + project_instruction_files = [f for f in all_instruction_files if _is_under(f, target_resolved)] + features.instruction_file_count = len(project_instruction_files) + features.has_multiple_instruction_files = len(project_instruction_files) > 1 if features.instruction_file_count > 0: features.has_instruction_file = True @@ -148,6 +156,14 @@ def detect_features_filesystem(target: Path, agents: list[DetectedAgent] | None return features +def _is_under(path: Path, root_resolved: Path) -> bool: + """True when `path` resolves to a location under `root_resolved`.""" + try: + return path.resolve().is_relative_to(root_resolved) + except (OSError, ValueError): + return False + + def _find_root_instruction(target: Path, instruction_files: list[Path]) -> Path | None: """Find the root-level instruction file for content analysis.""" for f in instruction_files: From 891d7efd18c9db39154214908c353c5370fa3359 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 06:27:40 +0200 Subject: [PATCH 06/30] Fix memory/subagent_memory discovery enumerating *.md inside directory-glob patterns Refs #27 --- .githooks/pre-commit | 14 +++ UNRELEASED.md | 1 + framework/rules/gemini/config.yml | 19 ++++ src/reporails_cli/core/classify/__init__.py | 19 +++- .../core/discovery/agent_discovery.py | 64 ++++++++++- tests/unit/test_memory_classification.py | 104 ++++++++++++++++++ 6 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 tests/unit/test_memory_classification.py diff --git a/.githooks/pre-commit b/.githooks/pre-commit index 20716e6..89608fe 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -1,5 +1,19 @@ #!/usr/bin/env bash # Pre-commit hook: require UNRELEASED.md when staging src/ or tests/ changes. +# Skipped during rebase or amend operations — those rewrite existing history +# rather than introducing new user-facing changes, so a new UNRELEASED entry +# is not warranted (any user-facing entry was added by the original commit +# being rewritten). + +# Skip during interactive or non-interactive rebase +if [ -d .git/rebase-merge ] || [ -d .git/rebase-apply ]; then + exit 0 +fi + +# Skip during amend (reflog action set by git when amending) +case "$GIT_REFLOG_ACTION" in + *amend*) exit 0 ;; +esac staged=$(git diff --cached --name-only) diff --git a/UNRELEASED.md b/UNRELEASED.md index ca1aaba..d4cae23 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -16,5 +16,6 @@ ### Fixed - check: Deterministic message text for the broad-scope client check — `client_checks._check_broad_scope` now sorts the matched broad terms before formatting the message, so output is reproducible across runs regardless of `PYTHONHASHSEED`. The set-iteration order previously caused `"Broad terms (any, integrations)"` vs `"Broad terms (integrations, any)"` drift on identical inputs. - discovery: `DetectedFeatures.instruction_file_count` and `has_multiple_instruction_files` no longer include user-scope files like `~/.claude/CLAUDE.md`. The claude `main` file_type declares both project and user scope patterns; counting the user-scope file inflated capability gates in `policy/levels.py` (`multiple_files`, `external_references`) and L-level scoring in `policy/capability.py` for any user with a home-directory `CLAUDE.md`. Counts are now scoped to files under `target`; `_find_root_instruction` was already correctly scoped. +- discovery: Directory-glob patterns (trailing slash) in agent configs now enumerate `*.md` files inside the matched directories. Previously `categorize_file_type` bucketed them as `skip`, leaving capability-owned memory files unclassified — the link walker then mis-tagged them `file_type: "generic"`. Affects claude `memory` and `subagent_memory` (project + local scopes); files under `.claude/agent-memory//` and `.claude/agent-memory-local//` now correctly classify to `subagent_memory`, unblocking `match: {type: memory}` rule routing. ### Removed diff --git a/framework/rules/gemini/config.yml b/framework/rules/gemini/config.yml index aa96234..04a3d41 100644 --- a/framework/rules/gemini/config.yml +++ b/framework/rules/gemini/config.yml @@ -251,6 +251,25 @@ file_types: vcs: committed maintainer: human + memory: + # Gemini's persistent memory is a SECTION inside ~/.gemini/GEMINI.md, + # not a directory of files. The agent appends saved memories to + # `## Gemini Added Memories`. Per-agent memory_locator (REQ-056 Phase + # 3a) reads this `locator:` block to enumerate memory entries as + # virtual records, distinct from claude's file_set shape. No `scopes:` + # block — discovery skips it gracefully (no patterns to glob); the + # memory_locator module is the sole consumer. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/gemini-md.md + format: freeform + scope: global + cardinality: collection + lifecycle: mutable + loading: session_start + locator: + type: file_section + file: ~/.gemini/GEMINI.md + section: "## Gemini Added Memories" + excludes: - CLAUDE:* - CODEX:* diff --git a/src/reporails_cli/core/classify/__init__.py b/src/reporails_cli/core/classify/__init__.py index 720d668..10d02ce 100644 --- a/src/reporails_cli/core/classify/__init__.py +++ b/src/reporails_cli/core/classify/__init__.py @@ -563,16 +563,33 @@ def _first_matching_pattern(rel_path: str, patterns: tuple[str, ...]) -> str | N Used by classify_files so downstream location-mode checks can inspect the specific matched pattern (loose leaf vs path-prefixed) for its location-disambiguation decision. + + Trailing-slash patterns (`.claude/agent-memory/*/`) name a directory + glob whose contents are the file_type's instances; they expand to + `**/*.md` for match purposes so memory entry files inside the + directory tag with the capability's file_type. """ p = PurePosixPath(rel_path) for pattern in patterns: clean = pattern.removeprefix("./") - for variant in _expand_doublestar(clean): + for variant in _expand_doublestar_with_trailing(clean): if p.match(variant): return pattern return None +def _expand_doublestar_with_trailing(pattern: str) -> list[str]: + """Run `_expand_doublestar` after expanding trailing-slash directory globs. + + `.claude/agent-memory/*/` becomes `.claude/agent-memory/*/**/*.md` (and + its doublestar variants) so files inside the matched directory tag + with the file_type whose patterns include that directory glob. + """ + if pattern.endswith("/"): + pattern = pattern + "**/*.md" + return _expand_doublestar(pattern) + + def _expand_doublestar(pattern: str) -> list[str]: """Expand a glob pattern into variants where each ``**/`` matches zero dirs. diff --git a/src/reporails_cli/core/discovery/agent_discovery.py b/src/reporails_cli/core/discovery/agent_discovery.py index dc81d44..0a14f79 100644 --- a/src/reporails_cli/core/discovery/agent_discovery.py +++ b/src/reporails_cli/core/discovery/agent_discovery.py @@ -43,12 +43,12 @@ def categorize_file_type(patterns: list[str], properties: dict[str, str]) -> str Uses file_type properties from config.yml: - format: schema_validated -> config - scope: path_scoped -> rule (scoped rule files) - - directory-only or system paths -> skip + - absolute system paths only -> skip + - directory-only patterns -> instruction (memory / subagent_memory: + `glob_file_type_patterns` enumerates `*.md` files inside the matched + directories so `match: {type: memory}` rules can target them) - everything else -> instruction """ - # Skip directory-only patterns (e.g., ".claude/memory/") - if all(p.endswith("/") for p in patterns): - return "skip" # Skip absolute system paths (managed configs) if all(p.startswith(("/", "C:")) for p in patterns): return "skip" @@ -58,7 +58,7 @@ def categorize_file_type(patterns: list[str], properties: dict[str, str]) -> str # Path-scoped markdown -> rule files bucket if properties.get("scope") == "path_scoped": return "rule" - # Everything else (main, skill, override) -> instruction bucket + # Everything else (main, skill, override, memory/subagent_memory) -> instruction return "instruction" @@ -242,6 +242,9 @@ def glob_file_type_patterns( found: list[Path] = [] for pattern in patterns: if pattern.endswith("/"): + # Directory glob (`.claude/agent-memory/*/`, `~/.claude/projects/*/memory/`) + # -> enumerate `*.md` files inside the matched directories. + _glob_directory_entries(pattern, target, found, exclude_dirs) continue if _is_external_pattern(pattern): _glob_external(pattern, target, found) @@ -283,6 +286,57 @@ def _glob_external(pattern: str, target: Path, found: list[Path]) -> None: found.append(expanded) +def _glob_directory_entries( + pattern: str, + target: Path, + found: list[Path], + exclude_dirs: frozenset[str], +) -> None: + """Enumerate `*.md` files inside directories matching a trailing-slash pattern. + + Trailing-slash patterns in agent configs (e.g. `.claude/agent-memory/*/`, + `~/.claude/projects/*/memory/`) describe a directory glob; the files + inside those directories are the file_type's instances. This helper + resolves the directory glob then walks `*.md` files inside each match. + + Used by `memory` and `subagent_memory` capabilities — the only file_types + declared with directory-only patterns. Older releases bucketed these as + `"skip"`, which left the files unclassified and the `link_walker` then + mis-tagged them `generic`. + """ + dir_pattern = pattern.rstrip("/") + if _is_external_pattern(dir_pattern): + expanded_str = str(Path(dir_pattern).expanduser()) + if "/projects/*/" in expanded_str: + project_key = str(target.resolve()).replace("/", "-") + expanded_str = expanded_str.replace("/projects/*/", f"/projects/{project_key}/") + import glob as _glob + + for d in _glob.glob(expanded_str): + base = Path(d) + if base.is_dir(): + found.extend(p for p in base.rglob("*.md") if p.is_file()) + return + + # In-tree pattern: resolve glob relative to target, enumerate .md inside each dir + for base in _resolve_in_tree_dirs(dir_pattern, target, exclude_dirs): + found.extend( + entry for entry in base.rglob("*.md") if entry.is_file() and not is_excluded(entry, target, exclude_dirs) + ) + + +def _resolve_in_tree_dirs( + dir_pattern: str, + target: Path, + exclude_dirs: frozenset[str], +) -> list[Path]: + """Resolve an in-tree directory glob (no trailing slash) to existing directories.""" + import glob as _glob + + candidates = [Path(p) for p in _glob.glob(str(target / dir_pattern))] + return [p for p in candidates if p.is_dir() and not is_excluded(p, target, exclude_dirs)] + + def load_config_file_types( agent_id: str, rules_paths: list[Path] | None = None, diff --git a/tests/unit/test_memory_classification.py b/tests/unit/test_memory_classification.py new file mode 100644 index 0000000..77a1487 --- /dev/null +++ b/tests/unit/test_memory_classification.py @@ -0,0 +1,104 @@ +"""Memory + subagent_memory directory entries classify to their capability type (not `generic`).""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from reporails_cli.core.classify import _parse_file_types, classify_files +from reporails_cli.core.discovery.agent_discovery import ( + categorize_file_type, + discover_from_config, + load_config_file_types, +) + + +@pytest.fixture +def memory_fixture(tmp_path: Path) -> Path: + """Project with .claude/agent-memory//MEMORY.md + local variant + CLAUDE.md.""" + (tmp_path / ".claude" / "agent-memory" / "foo").mkdir(parents=True) + (tmp_path / ".claude" / "agent-memory" / "foo" / "MEMORY.md").write_text("# foo memory\n") + (tmp_path / ".claude" / "agent-memory-local" / "bar").mkdir(parents=True) + (tmp_path / ".claude" / "agent-memory-local" / "bar" / "MEMORY.md").write_text("# bar local memory\n") + (tmp_path / "CLAUDE.md").write_text( + "# Project\n\nSee [foo](.claude/agent-memory/foo/MEMORY.md) and " + "[bar](.claude/agent-memory-local/bar/MEMORY.md).\n" + ) + return tmp_path + + +class TestCategorizeFileType: + @pytest.mark.unit + @pytest.mark.subsys_lint + def test_trailing_slash_pattern_is_instruction_not_skip(self) -> None: + """memory + subagent_memory directory-glob patterns must enumerate, not skip.""" + assert ( + categorize_file_type( + [".claude/agent-memory/*/", "~/.claude/agent-memory/*/"], + {"scope": "task_scoped"}, + ) + == "instruction" + ) + + @pytest.mark.unit + @pytest.mark.subsys_lint + def test_absolute_system_paths_still_skip(self) -> None: + """Managed-config absolute paths remain skipped — unchanged behavior.""" + assert categorize_file_type(["/etc/claude-code/CLAUDE.md"], {"format": "freeform"}) == "skip" + + +class TestMemoryClassification: + @pytest.mark.unit + @pytest.mark.subsys_lint + def test_subagent_memory_files_classify_as_subagent_memory(self, memory_fixture: Path) -> None: + """.claude/agent-memory//*.md + .claude/agent-memory-local//*.md + receive file_type=subagent_memory, NOT generic. + """ + result = discover_from_config(memory_fixture, "claude") + assert result is not None + instruction, _rule, _config = result + + fts = load_config_file_types("claude") + assert fts is not None + file_types = _parse_file_types(fts) + classified = classify_files(memory_fixture, instruction, file_types, generic_scanning=True) + + memory_files = { + cf.path.relative_to(memory_fixture).as_posix(): cf.file_type + for cf in classified + if cf.path.is_relative_to(memory_fixture) + } + + assert memory_files.get(".claude/agent-memory/foo/MEMORY.md") == "subagent_memory" + assert memory_files.get(".claude/agent-memory-local/bar/MEMORY.md") == "subagent_memory" + # Sanity: no memory file should land in the generic bucket + assert "generic" not in ( + memory_files.get(".claude/agent-memory/foo/MEMORY.md"), + memory_files.get(".claude/agent-memory-local/bar/MEMORY.md"), + ) + + @pytest.mark.unit + @pytest.mark.subsys_lint + def test_main_still_classifies_to_main(self, memory_fixture: Path) -> None: + """CLAUDE.md at project root still gets file_type=main — fix did not regress main routing.""" + result = discover_from_config(memory_fixture, "claude") + assert result is not None + instruction, _rule, _config = result + + fts = load_config_file_types("claude") + assert fts is not None + file_types = _parse_file_types(fts) + classified = classify_files(memory_fixture, instruction, file_types, generic_scanning=False) + + root_claude = next( + ( + cf + for cf in classified + if cf.path.is_relative_to(memory_fixture) + and cf.path.relative_to(memory_fixture).as_posix() == "CLAUDE.md" + ), + None, + ) + assert root_claude is not None + assert root_claude.file_type == "main" From 437b959e86d55f0a551daf3634209e4a323fa27e Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 06:33:27 +0200 Subject: [PATCH 07/30] Add CORE:S:0056 broken-markdown-link rule + thread CheckResult.annotations between chained mechanical checks Refs #26 --- UNRELEASED.md | 3 + .../core/broken-markdown-link/checks.yml | 14 ++ .../rules/core/broken-markdown-link/rule.md | 49 ++++++ .../broken-markdown-link/tests/fail/notes.md | 7 + .../broken-markdown-link/tests/fail/setup.md | 3 + .../broken-markdown-link/tests/pass/notes.md | 8 + .../tests/pass/rules/testing.md | 3 + .../broken-markdown-link/tests/pass/setup.md | 3 + .../rules/core/import-targets-resolve/rule.md | 16 +- .../tests/pass/.claude/rules/example.md | 4 +- .../tests/pass/docs/commit-skill.md | 1 + .../tests/pass/docs/style.md | 1 + src/reporails_cli/core/lint/harness.py | 145 +++++++++++++----- .../core/lint/mechanical/checks.py | 4 + .../core/lint/mechanical/checks_advanced.py | 103 +++++++++++++ .../core/lint/mechanical/runner.py | 21 ++- 16 files changed, 333 insertions(+), 52 deletions(-) create mode 100644 framework/rules/core/broken-markdown-link/checks.yml create mode 100644 framework/rules/core/broken-markdown-link/rule.md create mode 100644 framework/rules/core/broken-markdown-link/tests/fail/notes.md create mode 100644 framework/rules/core/broken-markdown-link/tests/fail/setup.md create mode 100644 framework/rules/core/broken-markdown-link/tests/pass/notes.md create mode 100644 framework/rules/core/broken-markdown-link/tests/pass/rules/testing.md create mode 100644 framework/rules/core/broken-markdown-link/tests/pass/setup.md create mode 100644 framework/rules/core/import-targets-resolve/tests/pass/docs/commit-skill.md create mode 100644 framework/rules/core/import-targets-resolve/tests/pass/docs/style.md diff --git a/UNRELEASED.md b/UNRELEASED.md index d4cae23..4da19f2 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -8,6 +8,8 @@ - check: `top_rules` array in `-f json` output; `focus` envelope in capability-mode JSON describes the targeted capability, name, agent, and paths. - check: Size-aware `CORE:S:0013 scope-fields-in-frontmatter` — rule no longer fires on rules below 30 lines (default). Override per-project via `.ails/config.yml: rule_thresholds.CORE:S:0013.min_lines`. Generic mechanism in deterministic check runner — `min_lines:` arg on any deterministic check + per-rule override. - check: `generic` file class via Markdown link-reachability — opt-in via `.ails/config.yml: generic_scanning: true`. When on, the classifier BFS-walks outgoing links from each instruction file and assigns `file_type: "generic"` (with `loading: on_demand`) to reached in-tree `.md` files. Cycle-safe, depth-bounded (3 hops), tree-bound, agent-agnostic. Rule routing uses existing `FileMatch.type` — no rule-schema change. Default off everywhere. +- rules: `CORE:S:0056 broken-markdown-link` — mechanical rule on freeform markdown files. Discovers `[text](path)` + `[ref]: path` link targets in each file via `extract_markdown_links`, validates each resolves relative to the source file's directory via `check_markdown_link_targets_exist`. Skips URLs, `mailto:`, absolute paths, and anchor-only refs (`#frag`). Severity `medium`, sibling shape to `CORE:S:0024 import-targets-resolve`. +- check: Mechanical check engine threads `CheckResult.annotations` from a rule's discover-stage check into the args of its subsequent validate-stage check (`extract_imports` -> `check_import_targets_exist`, `extract_markdown_links` -> `check_markdown_link_targets_exist`). Annotation accumulator is per-rule; pass and fail fixtures accumulate independently in the harness. Closes a latent gap where the validate stage always saw an empty annotations dict and silently passed. ### Changed - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. @@ -17,5 +19,6 @@ - check: Deterministic message text for the broad-scope client check — `client_checks._check_broad_scope` now sorts the matched broad terms before formatting the message, so output is reproducible across runs regardless of `PYTHONHASHSEED`. The set-iteration order previously caused `"Broad terms (any, integrations)"` vs `"Broad terms (integrations, any)"` drift on identical inputs. - discovery: `DetectedFeatures.instruction_file_count` and `has_multiple_instruction_files` no longer include user-scope files like `~/.claude/CLAUDE.md`. The claude `main` file_type declares both project and user scope patterns; counting the user-scope file inflated capability gates in `policy/levels.py` (`multiple_files`, `external_references`) and L-level scoring in `policy/capability.py` for any user with a home-directory `CLAUDE.md`. Counts are now scoped to files under `target`; `_find_root_instruction` was already correctly scoped. - discovery: Directory-glob patterns (trailing slash) in agent configs now enumerate `*.md` files inside the matched directories. Previously `categorize_file_type` bucketed them as `skip`, leaving capability-owned memory files unclassified — the link walker then mis-tagged them `file_type: "generic"`. Affects claude `memory` and `subagent_memory` (project + local scopes); files under `.claude/agent-memory//` and `.claude/agent-memory-local//` now correctly classify to `subagent_memory`, unblocking `match: {type: memory}` rule routing. +- check: `import-targets-resolve` (CORE:S:0024) fixture and rule body switched from incorrect `@import ` syntax (which extracted `@import` as the path) to canonical `@` syntax matching the `@[\w./-]+` regex in `extract_imports`. The pre-existing fixture silently passed because the engine's annotation-threading was broken; both are now correct. ### Removed diff --git a/framework/rules/core/broken-markdown-link/checks.yml b/framework/rules/core/broken-markdown-link/checks.yml new file mode 100644 index 0000000..e9b54dd --- /dev/null +++ b/framework/rules/core/broken-markdown-link/checks.yml @@ -0,0 +1,14 @@ +checks: +- id: CORE.S.0056.file-in-scope + type: mechanical + check: file_exists + args: + path: "**/*.md" +- id: CORE.S.0056.extract-links + type: mechanical + check: extract_markdown_links + args: + path: "**/*.md" +- id: CORE.S.0056.targets-exist + type: mechanical + check: check_markdown_link_targets_exist diff --git a/framework/rules/core/broken-markdown-link/rule.md b/framework/rules/core/broken-markdown-link/rule.md new file mode 100644 index 0000000..74d569c --- /dev/null +++ b/framework/rules/core/broken-markdown-link/rule.md @@ -0,0 +1,49 @@ +--- +id: CORE:S:0056 +slug: broken-markdown-link +title: Markdown Link Targets Resolve +category: structure +type: mechanical +severity: medium +backed_by: [] +match: {format: freeform} +--- + +# Markdown Link Targets Resolve + +Markdown links in instruction files must resolve to existing files. Broken targets create phantom context — the agent sees the link directive but the referenced content never loads, so the file silently underdelivers compared to what its prose promises. + +## Antipatterns + +- **Renamed file without updating the link**: Moving `docs/setup.md` to `docs/getting-started.md` but leaving `[Setup](docs/setup.md)` in another file. The `extract_markdown_links` check finds the reference and `check_markdown_link_targets_exist` fails because the path no longer resolves. +- **Typo in relative path**: Writing `[Rules](.claude/rules/git-rules.md)` instead of `[Rules](.claude/rules/git.md)`. The link silently fails — the surrounding prose still reads as if the target loaded. +- **Link crossing repository boundary**: Referencing `[Notes](../../other-repo/CLAUDE.md)` which exists in the author's local checkout but not in CI or other contributors' working trees. +- **Reference-style definition pointing nowhere**: Defining `[setup]: docs/old-setup.md` at the bottom of the file after the target was deleted. The definition is still parsed even when no inline `[setup]` consumes it. + +## Pass / Fail + +### Pass + +~~~~markdown +# Project Setup + +See [Getting Started](docs/getting-started.md) for the install steps and +the [testing rules](.claude/rules/testing.md) for the QA gate. + +[setup]: docs/getting-started.md +~~~~ + +### Fail + +~~~~markdown +# Project Setup + +See [Getting Started](docs/old-setup.md) for the install steps and +the [testing rules](.claude/rules/deleted-rule.md) for the QA gate. + +[setup]: docs/old-setup.md +~~~~ + +## Limitations + +Discovers `[text](path)` inline links and `[ref]: path` reference definitions, then resolves each target relative to the source file's directory. Skips URLs (`://`, `mailto:`), absolute paths (`/foo`), and anchor-only references (`#frag`). Does not validate that anchors exist within the target file, does not detect broken references inside fenced code blocks (lookups span the entire file content), and does not check external URLs for reachability. diff --git a/framework/rules/core/broken-markdown-link/tests/fail/notes.md b/framework/rules/core/broken-markdown-link/tests/fail/notes.md new file mode 100644 index 0000000..6fc453f --- /dev/null +++ b/framework/rules/core/broken-markdown-link/tests/fail/notes.md @@ -0,0 +1,7 @@ +# Fail Fixture + +See [Setup Guide](setup.md) for install steps and the +[Deleted Rules](rules/deleted-rule.md) for the QA gate. The +[Old Migration Notes](docs/old-migration.md) cover the legacy path. + +[setup]: setup.md diff --git a/framework/rules/core/broken-markdown-link/tests/fail/setup.md b/framework/rules/core/broken-markdown-link/tests/fail/setup.md new file mode 100644 index 0000000..4535ca9 --- /dev/null +++ b/framework/rules/core/broken-markdown-link/tests/fail/setup.md @@ -0,0 +1,3 @@ +# Setup + +Install dependencies with `uv sync`. diff --git a/framework/rules/core/broken-markdown-link/tests/pass/notes.md b/framework/rules/core/broken-markdown-link/tests/pass/notes.md new file mode 100644 index 0000000..28894a3 --- /dev/null +++ b/framework/rules/core/broken-markdown-link/tests/pass/notes.md @@ -0,0 +1,8 @@ +# Pass Fixture + +See [Setup Guide](setup.md) for install steps and the +[Testing Rules](rules/testing.md) for the QA gate. External docs live at +the [Anthropic site](https://www.anthropic.com) and may be reached via +[email](mailto:team@example.com). + +[setup]: setup.md diff --git a/framework/rules/core/broken-markdown-link/tests/pass/rules/testing.md b/framework/rules/core/broken-markdown-link/tests/pass/rules/testing.md new file mode 100644 index 0000000..be49e83 --- /dev/null +++ b/framework/rules/core/broken-markdown-link/tests/pass/rules/testing.md @@ -0,0 +1,3 @@ +# Testing + +Run `uv run poe qa_fast` before committing. diff --git a/framework/rules/core/broken-markdown-link/tests/pass/setup.md b/framework/rules/core/broken-markdown-link/tests/pass/setup.md new file mode 100644 index 0000000..4535ca9 --- /dev/null +++ b/framework/rules/core/broken-markdown-link/tests/pass/setup.md @@ -0,0 +1,3 @@ +# Setup + +Install dependencies with `uv sync`. diff --git a/framework/rules/core/import-targets-resolve/rule.md b/framework/rules/core/import-targets-resolve/rule.md index cda919f..2f12b56 100644 --- a/framework/rules/core/import-targets-resolve/rule.md +++ b/framework/rules/core/import-targets-resolve/rule.md @@ -15,9 +15,9 @@ Import references in instruction files must resolve to existing files. Broken im ## Antipatterns -- **Renamed file without updating imports**: Moving `docs/setup.md` to `docs/getting-started.md` but leaving `@import docs/setup.md` in another file. The `extract_imports` check finds the reference and `check_import_targets_exist` fails because the path no longer resolves. -- **Relative path from wrong directory**: Writing `@import ../shared/config.md` when the file structure requires `@import ../../shared/config.md`. The path resolution check verifies the target exists relative to the project root. -- **Import referencing a directory instead of a file**: Writing `@import docs/specs/` instead of `@import docs/specs/pipeline.md`. The check expects file paths, not directory paths. +- **Renamed file without updating imports**: Moving `docs/setup.md` to `docs/getting-started.md` but leaving `@docs/setup.md` in another file. The `extract_imports` check finds the reference and `check_import_targets_exist` fails because the path no longer resolves. +- **Relative path from wrong directory**: Writing `@../shared/config.md` when the file structure requires `@../../shared/config.md`. The path resolution check verifies the target exists relative to the project root. +- **Import referencing a directory instead of a file**: Writing `@docs/specs/` instead of `@docs/specs/pipeline.md`. The check expects file paths, not directory paths. ## Pass / Fail @@ -26,8 +26,8 @@ Import references in instruction files must resolve to existing files. Broken im ~~~~markdown # Project Setup -@import docs/getting-started.md -@import .claude/rules/testing-design.md +@docs/getting-started.md +@.claude/rules/testing-design.md ~~~~ ### Fail @@ -35,10 +35,10 @@ Import references in instruction files must resolve to existing files. Broken im ~~~~markdown # Project Setup -@import docs/old-setup.md -@import .claude/rules/deleted-rule.md +@docs/old-setup.md +@.claude/rules/deleted-rule.md ~~~~ ## Limitations -Extracts `@import` references and verifies each target file exists on disk. Does not validate the content of imported files, detect circular imports, or check import syntax correctness. +Extracts `@` references via `extract_imports` (regex `@[\w./-]+`) and verifies each target file exists on disk via `check_import_targets_exist`. Does not validate the content of imported files, detect circular imports, or check whether the regex captured trailing punctuation in inline references. diff --git a/framework/rules/core/import-targets-resolve/tests/pass/.claude/rules/example.md b/framework/rules/core/import-targets-resolve/tests/pass/.claude/rules/example.md index 4b64d6d..452e082 100644 --- a/framework/rules/core/import-targets-resolve/tests/pass/.claude/rules/example.md +++ b/framework/rules/core/import-targets-resolve/tests/pass/.claude/rules/example.md @@ -1,2 +1,2 @@ -@import .claude/rules/style.md -@import .claude/skills/commit/SKILL.md +@docs/style.md +@docs/commit-skill.md diff --git a/framework/rules/core/import-targets-resolve/tests/pass/docs/commit-skill.md b/framework/rules/core/import-targets-resolve/tests/pass/docs/commit-skill.md new file mode 100644 index 0000000..aff06c3 --- /dev/null +++ b/framework/rules/core/import-targets-resolve/tests/pass/docs/commit-skill.md @@ -0,0 +1 @@ +# Commit skill diff --git a/framework/rules/core/import-targets-resolve/tests/pass/docs/style.md b/framework/rules/core/import-targets-resolve/tests/pass/docs/style.md new file mode 100644 index 0000000..ae957a1 --- /dev/null +++ b/framework/rules/core/import-targets-resolve/tests/pass/docs/style.md @@ -0,0 +1 @@ +# Style diff --git a/src/reporails_cli/core/lint/harness.py b/src/reporails_cli/core/lint/harness.py index d322c76..fe35a38 100644 --- a/src/reporails_cli/core/lint/harness.py +++ b/src/reporails_cli/core/lint/harness.py @@ -289,10 +289,21 @@ def _run_mechanical_check( check: dict[str, Any], fixture_root: Path, classified_files: list[ClassifiedFile], + extra_args: dict[str, Any] | None = None, ) -> CheckResult: - """Run a single mechanical check against a fixture directory.""" + """Run a single mechanical check against a fixture directory. + + `extra_args` carries annotations from prior mechanical checks in the + same rule chain (e.g. `discovered_markdown_links` flowing from + `extract_markdown_links` into `check_markdown_link_targets_exist`). + Merged below the check's own declared args so checks.yml entries + always win over upstream annotations. + """ check_name = check.get("check", "") or check.get("name", "") - args = check.get("args", {}) or {} + args: dict[str, Any] = {} + if extra_args: + args.update(extra_args) + args.update(check.get("args", {}) or {}) fn = MECHANICAL_CHECKS.get(check_name) if fn is None: @@ -302,7 +313,7 @@ def _run_mechanical_check( result = fn(fixture_root, args, classified_files) if check.get("expect", "present") == "absent": - result = CheckResult(passed=not result.passed, message=result.message) + result = CheckResult(passed=not result.passed, message=result.message, annotations=result.annotations) return result # type: ignore[no-any-return] @@ -742,29 +753,7 @@ def run_rule( scaffolded_pass, scaffolded_fail, effective_pass_dir, effective_fail_dir = _prepare_scaffolds(rule, file_types) try: - for check in rule.checks: - check_id = check.get("id", "unknown") - check_type = check.get("type", "unknown") - expect = check.get("expect", "present") - - # === Pass fixture: ALL checks must pass === - if rule.has_pass_fixture: - passed, run = _check_fixture( - check, check_id, check_type, expect, effective_pass_dir, rule, file_types, "pass" - ) - result.check_runs.append(run) - if not passed: - result.status = HarnessStatus.FAILED - - # === Fail fixture: at least ONE check must detect a violation === - if rule.has_fail_fixture: - violation, run = _check_fixture_for_violation( - check, check_id, check_type, expect, effective_fail_dir, rule, file_types - ) - result.check_runs.append(run) - if violation: - fail_violation_found = True - + fail_violation_found = _run_rule_checks(rule, file_types, effective_pass_dir, effective_fail_dir, result) if rule.has_fail_fixture and not fail_violation_found: result.status = HarnessStatus.FAILED result.messages.append("Fail fixture: no check detected a violation") @@ -777,6 +766,67 @@ def run_rule( return result +def _run_rule_checks( + rule: RuleInfo, + file_types: list[FileTypeDeclaration], + effective_pass_dir: Path, + effective_fail_dir: Path, + result: HarnessResult, +) -> bool: + """Iterate the rule's checks against pass + fail fixtures. + + Threads per-fixture annotation accumulators so chained checks like + `extract_markdown_links` -> `check_markdown_link_targets_exist` see + each other's `discovered_*` annotations. Pass + fail accumulators are + independent so a pass annotation never bleeds into the fail chain. + Returns whether the fail fixture saw at least one violation. + """ + fail_violation_found = False + pass_extra: dict[str, Any] = {} + fail_extra: dict[str, Any] = {} + for check in rule.checks: + check_id = check.get("id", "unknown") + check_type = check.get("type", "unknown") + expect = check.get("expect", "present") + + if rule.has_pass_fixture: + passed, run, raw = _check_fixture( + check, + check_id, + check_type, + expect, + effective_pass_dir, + rule, + file_types, + "pass", + extra_args=pass_extra, + ) + result.check_runs.append(run) + if not passed: + result.status = HarnessStatus.FAILED + if raw is not None and raw.annotations: + pass_extra.update(raw.annotations) + + if rule.has_fail_fixture: + violation, run, raw = _check_fixture_for_violation( + check, + check_id, + check_type, + expect, + effective_fail_dir, + rule, + file_types, + extra_args=fail_extra, + ) + result.check_runs.append(run) + if violation: + fail_violation_found = True + if raw is not None and raw.annotations: + fail_extra.update(raw.annotations) + + return fail_violation_found + + def _check_fixture( check: dict[str, Any], check_id: str, @@ -786,22 +836,32 @@ def _check_fixture( rule: RuleInfo, file_types: list[FileTypeDeclaration], fixture_name: str, -) -> tuple[bool, CheckRun]: - """Run a check against a pass fixture. Returns (passed, CheckRun).""" + extra_args: dict[str, Any] | None = None, +) -> tuple[bool, CheckRun, CheckResult | None]: + """Run a check against a pass fixture. Returns (passed, CheckRun, raw_result). + + `raw_result` is the mechanical-check `CheckResult` (with annotations), + or None for non-mechanical paths; the caller threads annotations into + subsequent checks via the `extra_args` parameter. + """ if check_type == "mechanical": classified = _classify_fixture(fixture_root, file_types) - cr = _run_mechanical_check(check, fixture_root, classified) - return cr.passed, CheckRun(check_id, check_type, fixture_name, cr.passed, cr.message) + cr = _run_mechanical_check(check, fixture_root, classified, extra_args=extra_args) + return cr.passed, CheckRun(check_id, check_type, fixture_name, cr.passed, cr.message), cr if check_type == "deterministic": ok, count, msg = _run_deterministic_check(rule.checks_yml, check, fixture_root) passed = (ok and count > 0) if expect == "present" else (ok and count == 0) - return passed, CheckRun(check_id, check_type, fixture_name, passed, msg) + return passed, CheckRun(check_id, check_type, fixture_name, passed, msg), None if check_type == "content_query": - return True, CheckRun(check_id, check_type, fixture_name, True, "content_query — skipped (requires mapper)") + return ( + True, + CheckRun(check_id, check_type, fixture_name, True, "content_query — skipped (requires mapper)"), + None, + ) - return False, CheckRun(check_id, check_type, fixture_name, False, f"unknown check type: {check_type}") + return False, CheckRun(check_id, check_type, fixture_name, False, f"unknown check type: {check_type}"), None def _check_fixture_for_violation( @@ -812,23 +872,28 @@ def _check_fixture_for_violation( fixture_root: Path, rule: RuleInfo, file_types: list[FileTypeDeclaration], -) -> tuple[bool, CheckRun]: - """Run a check against a fail fixture. Returns (violation_found, CheckRun).""" + extra_args: dict[str, Any] | None = None, +) -> tuple[bool, CheckRun, CheckResult | None]: + """Run a check against a fail fixture. Returns (violation_found, CheckRun, raw_result). + + Same annotation-threading contract as `_check_fixture`: the raw + `CheckResult` propagates so accumulator state survives chained checks. + """ if check_type == "mechanical": classified = _classify_fixture(fixture_root, file_types) - cr = _run_mechanical_check(check, fixture_root, classified) + cr = _run_mechanical_check(check, fixture_root, classified, extra_args=extra_args) violation = not cr.passed - return violation, CheckRun(check_id, check_type, "fail", True, cr.message) + return violation, CheckRun(check_id, check_type, "fail", True, cr.message), cr if check_type == "deterministic": ok, count, msg = _run_deterministic_check(rule.checks_yml, check, fixture_root) violation = (ok and count == 0) if expect == "present" else (ok and count > 0) - return violation, CheckRun(check_id, check_type, "fail", True, msg) + return violation, CheckRun(check_id, check_type, "fail", True, msg), None if check_type == "content_query": - return False, CheckRun(check_id, check_type, "fail", True, "content_query — skipped (requires mapper)") + return False, CheckRun(check_id, check_type, "fail", True, "content_query — skipped (requires mapper)"), None - return False, CheckRun(check_id, check_type, "fail", False, f"unknown check type: {check_type}") + return False, CheckRun(check_id, check_type, "fail", False, f"unknown check type: {check_type}"), None # ── Batch runner ──────────────────────────────────────────────────── diff --git a/src/reporails_cli/core/lint/mechanical/checks.py b/src/reporails_cli/core/lint/mechanical/checks.py index cc9c179..c1a6c19 100644 --- a/src/reporails_cli/core/lint/mechanical/checks.py +++ b/src/reporails_cli/core/lint/mechanical/checks.py @@ -249,11 +249,13 @@ def byte_size( from reporails_cli.core.lint.mechanical.checks_advanced import ( # noqa: E402 aggregate_byte_size, check_import_targets_exist, + check_markdown_link_targets_exist, content_absent, count_at_least, count_at_most, directory_file_types, extract_imports, + extract_markdown_links, file_absent, filename_matches_pattern, frontmatter_extra_keys, @@ -288,6 +290,8 @@ def byte_size( "count_at_most": count_at_most, "count_at_least": count_at_least, "check_import_targets_exist": check_import_targets_exist, + "extract_markdown_links": extract_markdown_links, + "check_markdown_link_targets_exist": check_markdown_link_targets_exist, "file_absent": file_absent, "filename_matches_pattern": filename_matches_pattern, "frontmatter_extra_keys": frontmatter_extra_keys, diff --git a/src/reporails_cli/core/lint/mechanical/checks_advanced.py b/src/reporails_cli/core/lint/mechanical/checks_advanced.py index b6ce6c5..f96d58e 100644 --- a/src/reporails_cli/core/lint/mechanical/checks_advanced.py +++ b/src/reporails_cli/core/lint/mechanical/checks_advanced.py @@ -351,6 +351,109 @@ def check_import_targets_exist( return CheckResult(passed=True, message=f"All {len(import_paths)} import(s) resolve") +# Markdown link extraction — mirrors `link_walker._INLINE_LINK_RE` / +# `_REF_DEFINITION_RE` so the broken-target rule and the generic-class +# classifier agree on what counts as a Markdown link. +_INLINE_LINK_RE = re.compile(r"\[(?:[^\]]+)\]\(([^)]+)\)") +_REF_DEFINITION_RE = re.compile(r"^\s*\[(?:[^\]]+)\]:\s*(\S+)", re.MULTILINE) + + +def _is_external_link(target: str) -> bool: + """Skip URLs (http://, mailto:, etc.) and pure anchor refs.""" + if "://" in target or target.startswith("mailto:"): + return True + return target.startswith("#") + + +def _strip_anchor(target: str) -> str: + if "#" in target: + target = target.split("#", 1)[0] + return target.strip() + + +def extract_markdown_links( + root: Path, + args: dict[str, Any], + classified_files: list[ClassifiedFile], +) -> CheckResult: + """Discover `[text](path)` + `[ref]: path` link targets in target files. + + Annotates `discovered_markdown_links` as a list of `"::"` + entries; the validate step splits on `::` to resolve each target against + the source file's parent directory. + + Filters URLs (`://`, `mailto:`), bare anchor refs (`#frag`), and absolute + paths (`/foo`). Anchors trailing on otherwise-valid links are stripped. + Mirrors the regex constants in `core/classify/link_walker.py` so the + broken-target rule and the generic-class classifier disagree on no link. + """ + annotations: list[str] = [] + for match in _get_target_files(args, classified_files, root): + if not match.is_file(): + continue + try: + text = match.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + rel = match.relative_to(root).as_posix() if match.is_relative_to(root) else str(match) + targets: list[str] = [] + targets.extend(m.group(1).strip() for m in _INLINE_LINK_RE.finditer(text)) + targets.extend(m.group(1).strip() for m in _REF_DEFINITION_RE.finditer(text)) + for raw in targets: + cleaned = _strip_anchor(raw) + if not cleaned or _is_external_link(cleaned): + continue + # Absolute paths (`/foo/bar.md`) are user-system paths, not + # project-relative; treat as out-of-scope. + if cleaned.startswith("/"): + continue + annotations.append(f"{rel}::{cleaned}") + if annotations: + return CheckResult( + passed=True, + message=f"Found {len(annotations)} markdown link(s)", + annotations={"discovered_markdown_links": annotations}, + ) + return CheckResult(passed=True, message="No markdown links found") + + +def check_markdown_link_targets_exist( + root: Path, + args: dict[str, Any], + _classified_files: list[ClassifiedFile], +) -> CheckResult: + """Verify each discovered markdown link resolves to an existing path. + + Reads `discovered_markdown_links` from args (D-check annotations, + `"::"` entries). Each target is resolved relative to + the source file's parent directory. Returns missing targets; passes + when all resolve. + """ + entries: list[str] = [] + for value in args.values(): + if isinstance(value, list): + entries = value + break + if not entries: + return CheckResult(passed=True, message="No markdown links to check") + missing: list[str] = [] + for raw in entries: + if "::" not in raw: + continue + src_rel, target = raw.split("::", 1) + src_path = root / src_rel + base_dir = src_path.parent if src_path.exists() else root + candidate = (base_dir / target).resolve() + if not candidate.exists(): + missing.append(f"{src_rel} -> {target}") + if missing: + return CheckResult( + passed=False, + message=f"Broken markdown link(s): {'; '.join(missing[:5])}", + ) + return CheckResult(passed=True, message=f"All {len(entries)} markdown link(s) resolve") + + def filename_matches_pattern( root: Path, args: dict[str, Any], diff --git a/src/reporails_cli/core/lint/mechanical/runner.py b/src/reporails_cli/core/lint/mechanical/runner.py index c9c1ae2..bc4742c 100644 --- a/src/reporails_cli/core/lint/mechanical/runner.py +++ b/src/reporails_cli/core/lint/mechanical/runner.py @@ -21,6 +21,7 @@ def dispatch_single_check( root: Path, classified_files: list[ClassifiedFile], location: str, + extra_args: dict[str, Any] | None = None, ) -> tuple[Violation | None, CheckResult | None]: """Dispatch a single mechanical check and return (violation, raw_result). @@ -30,6 +31,10 @@ def dispatch_single_check( root: Project root directory. classified_files: Classified files for file targeting. location: Pre-resolved location string for violation reporting. + extra_args: Annotations accumulated from prior mechanical checks in + the same rule's chain (`discovered_imports`, `discovered_markdown_links`, + etc.). Merged into `args` below the check's own declared args, so a + check.yml `args:` entry always wins over an upstream annotation. Returns: Tuple of (Violation if check failed else None, raw CheckResult or None on error). @@ -42,7 +47,10 @@ def dispatch_single_check( logger.warning("Unknown mechanical check: %s (rule %s)", check.check, rule.id) return None, None - args: dict[str, Any] = dict(check.args or {}) + args: dict[str, Any] = {} + if extra_args: + args.update(extra_args) + args.update(check.args or {}) # Inject rule match type so checks can scope to the rule's file targets. if rule.match is not None and rule.match.type and "_targets" not in args: @@ -105,12 +113,21 @@ def run_mechanical_checks( matched = classified_files location = resolve_location(rule, matched, target) + # Annotations accumulated across mechanical checks in this rule's chain. + # `extract_imports` writes `discovered_imports` into its CheckResult; + # `check_import_targets_exist` later in the same rule reads it via + # the extra_args injection in `dispatch_single_check`. + accumulated_args: dict[str, Any] = {} for check in rule.checks: if check.type != "mechanical": continue - violation, _result = dispatch_single_check(check, rule, target, matched, location) + violation, result = dispatch_single_check( + check, rule, target, matched, location, extra_args=accumulated_args + ) if violation: violations.append(violation) + if result is not None and result.annotations: + accumulated_args.update(result.annotations) return violations From 4336d54608abb45694ec4ad8ce998b50f5c15299 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 06:37:18 +0200 Subject: [PATCH 08/30] =?UTF-8?q?Add=20memory=5Flocator.py=20=E2=80=94=20p?= =?UTF-8?q?er-agent=20memory=20entry=20locator=20used=20by=20L3=20memory?= =?UTF-8?q?=20rules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #27 --- UNRELEASED.md | 1 + .../core/discovery/memory_locator.py | 162 ++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 src/reporails_cli/core/discovery/memory_locator.py diff --git a/UNRELEASED.md b/UNRELEASED.md index 4da19f2..5e37133 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -10,6 +10,7 @@ - check: `generic` file class via Markdown link-reachability — opt-in via `.ails/config.yml: generic_scanning: true`. When on, the classifier BFS-walks outgoing links from each instruction file and assigns `file_type: "generic"` (with `loading: on_demand`) to reached in-tree `.md` files. Cycle-safe, depth-bounded (3 hops), tree-bound, agent-agnostic. Rule routing uses existing `FileMatch.type` — no rule-schema change. Default off everywhere. - rules: `CORE:S:0056 broken-markdown-link` — mechanical rule on freeform markdown files. Discovers `[text](path)` + `[ref]: path` link targets in each file via `extract_markdown_links`, validates each resolves relative to the source file's directory via `check_markdown_link_targets_exist`. Skips URLs, `mailto:`, absolute paths, and anchor-only refs (`#frag`). Severity `medium`, sibling shape to `CORE:S:0024 import-targets-resolve`. - check: Mechanical check engine threads `CheckResult.annotations` from a rule's discover-stage check into the args of its subsequent validate-stage check (`extract_imports` -> `check_import_targets_exist`, `extract_markdown_links` -> `check_markdown_link_targets_exist`). Annotation accumulator is per-rule; pass and fail fixtures accumulate independently in the harness. Closes a latent gap where the validate stage always saw an empty annotations dict and silently passed. +- check: Per-agent memory entry locator at `src/reporails_cli/core/discovery/memory_locator.py` — data-driven adapter that enumerates memory entries per agent (claude: `*.md` files inside `~/.claude/projects/*/memory/`, `.claude/agent-memory//`, `.claude/agent-memory-local//`; gemini: `## Gemini Added Memories` section inside `~/.gemini/GEMINI.md`). Returns `MemoryEntry` records with `agent`, `path`, optional `section`, and `body`. Consumed by the L3 memory rules without per-agent branches. ### Changed - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. diff --git a/src/reporails_cli/core/discovery/memory_locator.py b/src/reporails_cli/core/discovery/memory_locator.py new file mode 100644 index 0000000..ab2bf01 --- /dev/null +++ b/src/reporails_cli/core/discovery/memory_locator.py @@ -0,0 +1,162 @@ +"""Per-agent memory entry locator — config-driven adapter that enumerates memory entries per agent.""" + +from __future__ import annotations + +import logging +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from reporails_cli.core.discovery.agent_discovery import ( + glob_file_type_patterns, + load_config_file_types, +) + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class MemoryEntry: + """One enumerable memory record per agent's memory locator. + + `path` is the file holding the entry (always a real Path). `section` + is the markdown heading whose body holds the entry (None for + file_set agents like claude). `body` is the entry text — for + file_set agents this is the file content; for file_section agents + this is the section content within the file. + """ + + agent: str + path: Path + section: str | None + body: str + + +def memory_entries_for_agent(agent: str, project_root: Path) -> list[MemoryEntry]: + """Enumerate memory entries declared by an agent's config. + + Returns `[]` when the agent has no memory surface OR the surface + exists but holds no entries. Callers should treat empty lists as + "nothing to validate", not "agent unknown" — `discover_from_config` + handles agent presence detection separately. + """ + file_types = load_config_file_types(agent) + if not file_types: + return [] + entries: list[MemoryEntry] = [] + for capability in ("memory", "subagent_memory"): + spec = file_types.get(capability) + if not isinstance(spec, dict): + continue + entries.extend(_entries_from_spec(agent, capability, spec, project_root)) + return entries + + +def _entries_from_spec( + agent: str, + capability: str, + spec: dict[str, Any], + project_root: Path, +) -> list[MemoryEntry]: + """Dispatch on locator type — `file_section` (gemini) vs scopes (claude).""" + locator = spec.get("locator") + if isinstance(locator, dict) and locator.get("type") == "file_section": + return _entries_from_file_section(agent, locator) + return _entries_from_directory_globs(agent, capability, spec, project_root) + + +def _entries_from_file_section(agent: str, locator: dict[str, Any]) -> list[MemoryEntry]: + """Extract section content from a single file (gemini shape). + + `file` is the path to read (supports `~/` expansion). `section` is + the literal section heading (e.g. `"## Gemini Added Memories"`). + Returns a single MemoryEntry with the section body when the section + exists and is non-empty; otherwise an empty list. + """ + file_str = str(locator.get("file") or "") + section_str = str(locator.get("section") or "") + if not file_str or not section_str: + return [] + path = Path(file_str).expanduser() + if not path.is_file(): + return [] + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + logger.debug("memory_locator: cannot read %s: %s", path, exc) + return [] + body = _extract_section(text, section_str) + if not body.strip(): + return [] + return [MemoryEntry(agent=agent, path=path, section=section_str, body=body)] + + +def _extract_section(text: str, section_heading: str) -> str: + """Return the body of `section_heading` — up to the next heading of equal/higher level. + + Treats Markdown ATX headings (`#`, `##`, etc.). Match is line-anchored + against the literal `section_heading` string; case-sensitive. + """ + lines = text.splitlines(keepends=True) + heading_level = _heading_level(section_heading) + if heading_level == 0: + return "" + in_section = False + out: list[str] = [] + target = section_heading.strip() + for line in lines: + if not in_section: + if line.strip() == target: + in_section = True + continue + # Stop at the next equal-or-higher level heading + next_level = _heading_level(line) + if 0 < next_level <= heading_level: + break + out.append(line) + return "".join(out) + + +def _heading_level(line: str) -> int: + match = re.match(r"^(#{1,6})\s+\S", line) + return len(match.group(1)) if match else 0 + + +def _entries_from_directory_globs( + agent: str, + capability: str, + spec: dict[str, Any], + project_root: Path, +) -> list[MemoryEntry]: + """Enumerate `*.md` files inside directory-glob patterns (claude shape). + + Reuses `agent_discovery.glob_file_type_patterns` so the file + enumeration matches what the classifier surfaces — single source of + truth for which paths the agent treats as memory entries. + """ + scopes = spec.get("scopes") + if not isinstance(scopes, dict): + return [] + patterns: list[str] = [] + for scope in scopes.values(): + if not isinstance(scope, dict): + continue + ps = scope.get("patterns") + if isinstance(ps, list): + patterns.extend(str(p) for p in ps) + if not patterns: + return [] + # Pass empty properties — directory-glob dispatch in glob_file_type_patterns + # only needs the patterns themselves for trailing-slash enumeration. + paths = glob_file_type_patterns(project_root, patterns, properties={}) + entries: list[MemoryEntry] = [] + for path in paths: + try: + body = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + entries.append(MemoryEntry(agent=agent, path=path, section=None, body=body)) + # Log capability provenance so debugging can attribute entries to the right surface + logger.debug("memory_locator: %s/%s -> %d entries", agent, capability, len(entries)) + return entries From a45f62bd2c18f43adb1f45278c6a8a1218d58613 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 08:46:46 +0200 Subject: [PATCH 09/30] Fix mapper daemon attach: socket-existence gate, FIFO-only FD close, SIGPIPE ignore --- UNRELEASED.md | 1 + src/reporails_cli/core/mapper/daemon.py | 33 +++++++++++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 5e37133..6b7ad1c 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -21,5 +21,6 @@ - discovery: `DetectedFeatures.instruction_file_count` and `has_multiple_instruction_files` no longer include user-scope files like `~/.claude/CLAUDE.md`. The claude `main` file_type declares both project and user scope patterns; counting the user-scope file inflated capability gates in `policy/levels.py` (`multiple_files`, `external_references`) and L-level scoring in `policy/capability.py` for any user with a home-directory `CLAUDE.md`. Counts are now scoped to files under `target`; `_find_root_instruction` was already correctly scoped. - discovery: Directory-glob patterns (trailing slash) in agent configs now enumerate `*.md` files inside the matched directories. Previously `categorize_file_type` bucketed them as `skip`, leaving capability-owned memory files unclassified — the link walker then mis-tagged them `file_type: "generic"`. Affects claude `memory` and `subagent_memory` (project + local scopes); files under `.claude/agent-memory//` and `.claude/agent-memory-local//` now correctly classify to `subagent_memory`, unblocking `match: {type: memory}` rule routing. - check: `import-targets-resolve` (CORE:S:0024) fixture and rule body switched from incorrect `@import ` syntax (which extracted `@import` as the path) to canonical `@` syntax matching the `@[\w./-]+` regex in `extract_imports`. The pre-existing fixture silently passed because the engine's annotation-threading was broken; both are now correct. +- check: Mapper daemon now stays attached across `ails check` invocations instead of forcing every run to load ML models in-process. Three issues in `core/mapper/daemon.py`: `is_daemon_running` requires the socket file to exist alongside the PID (a stuck `ails check`-turned-daemon used to keep its PID alive indefinitely, fooling every new run into seeing a "running" daemon and falling back); `_become_daemon`'s FD-close loop narrowed to FIFO/pipe FDs via `S_ISFIFO` instead of indiscriminate `range(3, 1024)` — closing all FDs killed numpy / onnxruntime compiled-extension FDs imported pre-fork, breaking the daemon's first `map_ruleset` with `ImportError: import numpy failed`; SIGPIPE set to `SIG_IGN` in `_daemon_main` so a client disconnect mid-response can't terminate the daemon via the default signal handler. Warm `ails check` against a 27-file sample now runs ~5.6 s daemon-attached instead of falling through to ~8-9 s in-process. ### Removed diff --git a/src/reporails_cli/core/mapper/daemon.py b/src/reporails_cli/core/mapper/daemon.py index ff5e3c9..14c4d1f 100644 --- a/src/reporails_cli/core/mapper/daemon.py +++ b/src/reporails_cli/core/mapper/daemon.py @@ -54,19 +54,24 @@ def _lock_path() -> Path: def is_daemon_running() -> bool: - """Check if the global daemon process is alive.""" + """Return True only when the recorded PID is alive AND the socket file exists.""" pid_file = _pid_path() + sock_file = _socket_path() if not pid_file.exists(): return False try: pid = int(pid_file.read_text().strip()) os.kill(pid, 0) # signal 0 = existence check - return True except (ValueError, ProcessLookupError, PermissionError, OSError): - # Stale PID file — clean up pid_file.unlink(missing_ok=True) - _socket_path().unlink(missing_ok=True) + sock_file.unlink(missing_ok=True) + return False + if not sock_file.exists(): + # PID alive but accept loop has shut down (or never bound). + # Reap so the next start_daemon forks a fresh one. + pid_file.unlink(missing_ok=True) return False + return True def stop_daemon() -> bool: @@ -109,18 +114,22 @@ def _become_daemon() -> None: try: os.setsid() - # Close inherited FDs above stderr before redirecting. - # Without this, the daemon child holds references to the parent's - # pipes (e.g., npx's stdio: "inherit"), preventing EOF and causing - # the parent to hang indefinitely waiting for pipe closure. + # Close inherited PIPE FDs above stderr (e.g. npx stdio: "inherit") + # so the parent doesn't hang waiting for pipe closure. Limited to + # pipes so pre-fork-imported C-extension FDs survive. import resource + import stat as _stat max_fd = resource.getrlimit(resource.RLIMIT_NOFILE)[0] import contextlib for fd in range(3, min(max_fd, 1024)): - with contextlib.suppress(OSError): - os.close(fd) + try: + if _stat.S_ISFIFO(os.fstat(fd).st_mode): + with contextlib.suppress(OSError): + os.close(fd) + except OSError: + continue # Redirect std streams to /dev/null devnull = os.open(os.devnull, os.O_RDWR) @@ -259,6 +268,10 @@ def _handle_signal(_signum: int, _frame: object) -> None: signal.signal(signal.SIGTERM, _handle_signal) signal.signal(signal.SIGINT, _handle_signal) + # SIGPIPE on a half-closed client connection must not kill the daemon — + # Python's socket layer raises BrokenPipeError, caught by the per-request + # `except Exception` in the accept loop. Default SIGPIPE handler terminates. + signal.signal(signal.SIGPIPE, signal.SIG_IGN) sock_path = _socket_path() sock_path.unlink(missing_ok=True) From 21d2b8fec23ebae29ecf215e8de2677f138ab49a Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 09:47:09 +0200 Subject: [PATCH 10/30] Widen CORE:S:0024 + CORE:S:0056 match scope to [freeform, frontmatter] Refs #26 --- UNRELEASED.md | 2 + .../rules/core/broken-markdown-link/rule.md | 2 +- .../tests/fail/with-frontmatter.md | 11 +++ .../tests/pass/with-frontmatter.md | 11 +++ .../rules/core/import-targets-resolve/rule.md | 2 +- .../core/discovery/agent_discovery.py | 56 ++++++++++++--- tests/unit/test_symlink_detection.py | 69 +++++++++++++++++++ 7 files changed, 141 insertions(+), 12 deletions(-) create mode 100644 framework/rules/core/broken-markdown-link/tests/fail/with-frontmatter.md create mode 100644 framework/rules/core/broken-markdown-link/tests/pass/with-frontmatter.md diff --git a/UNRELEASED.md b/UNRELEASED.md index 6b7ad1c..3017b98 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -22,5 +22,7 @@ - discovery: Directory-glob patterns (trailing slash) in agent configs now enumerate `*.md` files inside the matched directories. Previously `categorize_file_type` bucketed them as `skip`, leaving capability-owned memory files unclassified — the link walker then mis-tagged them `file_type: "generic"`. Affects claude `memory` and `subagent_memory` (project + local scopes); files under `.claude/agent-memory//` and `.claude/agent-memory-local//` now correctly classify to `subagent_memory`, unblocking `match: {type: memory}` rule routing. - check: `import-targets-resolve` (CORE:S:0024) fixture and rule body switched from incorrect `@import ` syntax (which extracted `@import` as the path) to canonical `@` syntax matching the `@[\w./-]+` regex in `extract_imports`. The pre-existing fixture silently passed because the engine's annotation-threading was broken; both are now correct. - check: Mapper daemon now stays attached across `ails check` invocations instead of forcing every run to load ML models in-process. Three issues in `core/mapper/daemon.py`: `is_daemon_running` requires the socket file to exist alongside the PID (a stuck `ails check`-turned-daemon used to keep its PID alive indefinitely, fooling every new run into seeing a "running" daemon and falling back); `_become_daemon`'s FD-close loop narrowed to FIFO/pipe FDs via `S_ISFIFO` instead of indiscriminate `range(3, 1024)` — closing all FDs killed numpy / onnxruntime compiled-extension FDs imported pre-fork, breaking the daemon's first `map_ruleset` with `ImportError: import numpy failed`; SIGPIPE set to `SIG_IGN` in `_daemon_main` so a client disconnect mid-response can't terminate the daemon via the default signal handler. Warm `ails check` against a 27-file sample now runs ~5.6 s daemon-attached instead of falling through to ~8-9 s in-process. +- discovery: `walk_glob` in `core/discovery/agent_discovery.py` now follows symlinked directories during descendant traversal so files inside symlinked subdirs are visible to whole-repo discovery. Cycle protection via canonical inode tracking ensures each physical directory is entered at most once. Aligns whole-repo discovery with the `glob.glob(..., recursive=True)` behavior used by per-capability listing. +- rules: `CORE:S:0024 import-targets-resolve` and `CORE:S:0056 broken-markdown-link` now declare `match: {format: [freeform, frontmatter]}` so they fire on SKILL.md / `.claude/agents/*.md` / `.claude/rules/*.md` files. Prior `{format: freeform}` constraint excluded frontmatter-bearing instruction files from import-resolution and broken-link coverage even though the agent schema characterizes those file types as `format: [frontmatter, freeform]`. ### Removed diff --git a/framework/rules/core/broken-markdown-link/rule.md b/framework/rules/core/broken-markdown-link/rule.md index 74d569c..aa27864 100644 --- a/framework/rules/core/broken-markdown-link/rule.md +++ b/framework/rules/core/broken-markdown-link/rule.md @@ -6,7 +6,7 @@ category: structure type: mechanical severity: medium backed_by: [] -match: {format: freeform} +match: {format: [freeform, frontmatter]} --- # Markdown Link Targets Resolve diff --git a/framework/rules/core/broken-markdown-link/tests/fail/with-frontmatter.md b/framework/rules/core/broken-markdown-link/tests/fail/with-frontmatter.md new file mode 100644 index 0000000..ae2011e --- /dev/null +++ b/framework/rules/core/broken-markdown-link/tests/fail/with-frontmatter.md @@ -0,0 +1,11 @@ +--- +title: Skill With Broken Link +description: Fail fixture — frontmatter format with a broken Markdown link target. +--- + +# Skill With Broken Link + +See [Setup Guide](setup.md) for install steps and the +[Deleted Rules](rules/deleted-rule.md) for the QA gate. + +[setup]: setup.md diff --git a/framework/rules/core/broken-markdown-link/tests/pass/with-frontmatter.md b/framework/rules/core/broken-markdown-link/tests/pass/with-frontmatter.md new file mode 100644 index 0000000..4a4125d --- /dev/null +++ b/framework/rules/core/broken-markdown-link/tests/pass/with-frontmatter.md @@ -0,0 +1,11 @@ +--- +title: Skill With Working Links +description: Pass fixture — frontmatter format with resolvable Markdown links. +--- + +# Skill With Working Links + +See [Setup Guide](setup.md) for install steps and the +[Testing Rules](rules/testing.md) for the QA gate. + +[setup]: setup.md diff --git a/framework/rules/core/import-targets-resolve/rule.md b/framework/rules/core/import-targets-resolve/rule.md index 2f12b56..9110ded 100644 --- a/framework/rules/core/import-targets-resolve/rule.md +++ b/framework/rules/core/import-targets-resolve/rule.md @@ -6,7 +6,7 @@ category: structure type: mechanical severity: medium backed_by: [developer-context-cursor-study] -match: {format: freeform} +match: {format: [freeform, frontmatter]} --- # Import Targets Resolve diff --git a/src/reporails_cli/core/discovery/agent_discovery.py b/src/reporails_cli/core/discovery/agent_discovery.py index 0a14f79..36d1941 100644 --- a/src/reporails_cli/core/discovery/agent_discovery.py +++ b/src/reporails_cli/core/discovery/agent_discovery.py @@ -6,6 +6,7 @@ from __future__ import annotations +import contextlib import logging import os from pathlib import Path @@ -80,6 +81,9 @@ def walk_glob(root: Path, filename: str, exclude_dirs: frozenset[str]) -> list[P subtrees during traversal instead of filtering afterwards. Uses os.scandir for efficient directory traversal. + Follows symlinked directories; canonical inode paths in `visited_real` + break cycles so each physical directory is entered at most once. + Match is case-SENSITIVE per agent implementations. The OpenAI Codex source (`codex-rs/core/src/agents_md.rs`) declares: @@ -97,6 +101,9 @@ def walk_glob(root: Path, filename: str, exclude_dirs: frozenset[str]) -> list[P skip = exclude_dirs | _ALWAYS_SKIP results: list[Path] = [] stack = [str(root)] + visited_real: set[str] = set() + with contextlib.suppress(OSError): + visited_real.add(os.path.realpath(root)) while stack: current = stack.pop() try: @@ -105,21 +112,50 @@ def walk_glob(root: Path, filename: str, exclude_dirs: frozenset[str]) -> list[P continue with scanner: for entry in scanner: - name = entry.name - if name == filename: - try: - is_match = entry.is_file(follow_symlinks=True) - except OSError: - # Broken/circular symlink — include it so downstream - # code can report the error properly - is_match = entry.is_symlink() - if is_match: + if entry.name == filename: + if _walk_entry_is_file(entry): results.append(Path(entry.path)) - elif entry.is_dir(follow_symlinks=False) and name not in skip: + elif _walk_should_descend(entry, skip, visited_real): stack.append(entry.path) return results +def _walk_entry_is_file(entry: os.DirEntry[str]) -> bool: + """Whether a scandir entry resolves to a regular file (follows symlinks). + + Broken/circular symlinks raise `OSError`; surface them anyway so + downstream code can report the error properly. + """ + try: + return entry.is_file(follow_symlinks=True) + except OSError: + return entry.is_symlink() + + +def _walk_should_descend(entry: os.DirEntry[str], skip: frozenset[str], visited_real: set[str]) -> bool: + """Whether a scandir entry is a directory worth descending into. + + Follows directory symlinks (so hub-adopted skills/rules surface) and + tracks canonical inode paths in `visited_real` to break cycles — + each physical directory is entered at most once across the walk. + """ + if entry.name in skip: + return False + try: + if not entry.is_dir(follow_symlinks=True): + return False + except OSError: + return False + try: + real = os.path.realpath(entry.path) + except OSError: + return False + if real in visited_real: + return False + visited_real.add(real) + return True + + def walk_ancestors(start: Path, filename: str, stop: Path) -> list[Path]: """Walk up from start, collecting filename matches at each ancestor. diff --git a/tests/unit/test_symlink_detection.py b/tests/unit/test_symlink_detection.py index d636dc9..253d25a 100644 --- a/tests/unit/test_symlink_detection.py +++ b/tests/unit/test_symlink_detection.py @@ -244,3 +244,72 @@ def test_extra_targets_none_is_noop(self, tmp_path: Path) -> None: results = sarif.get("runs", [{}])[0].get("results", []) assert len(results) > 0, "Main target should still be scanned" + + +class TestWalkGlobFollowsSymlinkedDirs: + """Regression: `walk_glob` must descend into symlinked directories so files inside them are visible to whole-repo discovery.""" + + @pytest.mark.unit + @pytest.mark.subsys_lint + def test_finds_file_inside_symlinked_directory(self, tmp_path: Path) -> None: + """A SKILL.md inside a directory symlink should appear in results.""" + from reporails_cli.core.discovery.agent_discovery import walk_glob + + # Canonical location outside the project + canonical = tmp_path / "canonical" / "audit" + canonical.mkdir(parents=True) + (canonical / "SKILL.md").write_text("# Audit\n") + + # Project tree with a symlinked directory pointing at the canonical + project = tmp_path / "project" + skills_dir = project / ".claude" / "skills" + skills_dir.mkdir(parents=True) + os.symlink(str(canonical), str(skills_dir / "audit")) + + results = walk_glob(skills_dir, "SKILL.md", frozenset()) + + rel = [str(p.relative_to(project)) for p in results] + assert ".claude/skills/audit/SKILL.md" in rel + + @pytest.mark.unit + @pytest.mark.subsys_lint + def test_breaks_symlink_cycle(self, tmp_path: Path) -> None: + """An `a -> b -> a` directory cycle must terminate the walk.""" + from reporails_cli.core.discovery.agent_discovery import walk_glob + + root = tmp_path / "root" + a = root / "a" + b = root / "b" + a.mkdir(parents=True) + b.mkdir() + (a / "SKILL.md").write_text("# A\n") + + # Cycle: a/loop -> b, b/loop -> a + os.symlink(str(b), str(a / "loop")) + os.symlink(str(a), str(b / "loop")) + + results = walk_glob(root, "SKILL.md", frozenset()) + + # Must find the file exactly once despite the cycle; must not hang. + assert len(results) == 1 + assert results[0].name == "SKILL.md" + + @pytest.mark.unit + @pytest.mark.subsys_lint + def test_dedupes_two_symlinks_to_same_target(self, tmp_path: Path) -> None: + """Two surface paths symlinking to the same canonical dir → file + appears once (canonical inode tracked in `visited_real`).""" + from reporails_cli.core.discovery.agent_discovery import walk_glob + + canonical = tmp_path / "canonical" / "shared" + canonical.mkdir(parents=True) + (canonical / "SKILL.md").write_text("# Shared\n") + + project = tmp_path / "project" + project.mkdir() + os.symlink(str(canonical), str(project / "via_a")) + os.symlink(str(canonical), str(project / "via_b")) + + results = walk_glob(project, "SKILL.md", frozenset()) + + assert len(results) == 1 From f0922c528a616cc43cbf947a11e5f8f41453c4aa Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 12:29:32 +0200 Subject: [PATCH 11/30] discovery: Sync per-agent memory file_types with upstream (gemini directory-glob, codex tombstone) Refs #27 --- UNRELEASED.md | 3 ++ framework/rules/claude/config.yml | 4 ++ framework/rules/codex/config.yml | 27 +++++++++++++ framework/rules/gemini/config.yml | 64 +++++++++++++++++++------------ 4 files changed, 74 insertions(+), 24 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 3017b98..f6f3ddc 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -11,12 +11,15 @@ - rules: `CORE:S:0056 broken-markdown-link` — mechanical rule on freeform markdown files. Discovers `[text](path)` + `[ref]: path` link targets in each file via `extract_markdown_links`, validates each resolves relative to the source file's directory via `check_markdown_link_targets_exist`. Skips URLs, `mailto:`, absolute paths, and anchor-only refs (`#frag`). Severity `medium`, sibling shape to `CORE:S:0024 import-targets-resolve`. - check: Mechanical check engine threads `CheckResult.annotations` from a rule's discover-stage check into the args of its subsequent validate-stage check (`extract_imports` -> `check_import_targets_exist`, `extract_markdown_links` -> `check_markdown_link_targets_exist`). Annotation accumulator is per-rule; pass and fail fixtures accumulate independently in the harness. Closes a latent gap where the validate stage always saw an empty annotations dict and silently passed. - check: Per-agent memory entry locator at `src/reporails_cli/core/discovery/memory_locator.py` — data-driven adapter that enumerates memory entries per agent (claude: `*.md` files inside `~/.claude/projects/*/memory/`, `.claude/agent-memory//`, `.claude/agent-memory-local//`; gemini: `## Gemini Added Memories` section inside `~/.gemini/GEMINI.md`). Returns `MemoryEntry` records with `agent`, `path`, optional `section`, and `body`. Consumed by the L3 memory rules without per-agent branches. +- codex: New `memory` file_type declared as a tombstone — `~/.codex/memories/` holds generated state controlled via the `/memories` slash command and `config.toml` keys (`memories.generate_memories`, `memories.use_memories`, `memories.disable_on_external_context`, etc.), not user-authored markdown. No patterns to glob; surfaces in the agent registry but invites no rule pressure. ### Changed - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. - check: `[PATH]` positional argument is now `[ARG1] [ARG2]` — `ARG1` is sniffed as a capability keyword first, falling through to existing path semantics. No behaviour change for `ails check`, `ails check .`, or `ails check `. ### Fixed +- gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. +- gemini: All `source:` URLs in the agent config now point to the rendered `geminicli.com` docs site instead of GitHub raw markdown links. 13 file_types updated; no behavior change. - check: Deterministic message text for the broad-scope client check — `client_checks._check_broad_scope` now sorts the matched broad terms before formatting the message, so output is reproducible across runs regardless of `PYTHONHASHSEED`. The set-iteration order previously caused `"Broad terms (any, integrations)"` vs `"Broad terms (integrations, any)"` drift on identical inputs. - discovery: `DetectedFeatures.instruction_file_count` and `has_multiple_instruction_files` no longer include user-scope files like `~/.claude/CLAUDE.md`. The claude `main` file_type declares both project and user scope patterns; counting the user-scope file inflated capability gates in `policy/levels.py` (`multiple_files`, `external_references`) and L-level scoring in `policy/capability.py` for any user with a home-directory `CLAUDE.md`. Counts are now scoped to files under `target`; `_find_root_instruction` was already correctly scoped. - discovery: Directory-glob patterns (trailing slash) in agent configs now enumerate `*.md` files inside the matched directories. Previously `categorize_file_type` bucketed them as `skip`, leaving capability-owned memory files unclassified — the link walker then mis-tagged them `file_type: "generic"`. Affects claude `memory` and `subagent_memory` (project + local scopes); files under `.claude/agent-memory//` and `.claude/agent-memory-local//` now correctly classify to `subagent_memory`, unblocking `match: {type: memory}` rule routing. diff --git a/framework/rules/claude/config.yml b/framework/rules/claude/config.yml index 8daa5eb..bcf2263 100644 --- a/framework/rules/claude/config.yml +++ b/framework/rules/claude/config.yml @@ -271,6 +271,9 @@ file_types: maintainer: system memory: + # Memory directory holds MEMORY.md (index, eager) + sibling *.md topic + # files (read on-demand). Directory glob matches both with shared + # `loading: session_start`; per-entry loading split is a future move. source: https://code.claude.com/docs/en/memory#auto-memory format: freeform scope: global @@ -288,6 +291,7 @@ file_types: # Per-subagent persistent memory. Scope chosen via `memory:` frontmatter field # on the agent definition (user|project|local) — mutually exclusive per subagent. # MEMORY.md (first 200 lines / 25KB) injected into subagent system prompt at startup. + # Same MEMORY.md-vs-sibling loading-granularity caveat as `memory` above. source: https://code.claude.com/docs/en/sub-agents#enable-persistent-memory format: freeform scope: task_scoped diff --git a/framework/rules/codex/config.yml b/framework/rules/codex/config.yml index c7a0d30..83f1025 100644 --- a/framework/rules/codex/config.yml +++ b/framework/rules/codex/config.yml @@ -230,6 +230,33 @@ file_types: vcs: external maintainer: human + memory: + # Codex memory (`~/.codex/memories/`) is generated state, not a + # user-authored markdown surface. Per the official docs: "Codex stores + # memories under your Codex home directory" and "treats these files as + # generated state" — they're populated in the background and shaped by + # the model, not by the user. + # + # Control plane (not directly inspectable on disk): + # - `/memories` slash command in the TUI/app — per-thread enable for + # reading existing memories and feeding new ones. + # - `config.toml` keys: `memories.generate_memories`, + # `memories.use_memories`, `memories.disable_on_external_context`, + # `memories.min_rate_limit_remaining_percent`, `memories.extract_model`, + # `memories.consolidation_model`. + # + # Declared as a tombstone — no `scopes:` block, no patterns to glob. + # This keeps the surface visible in the agent registry (so audits know + # it exists) while inviting no instruction-quality rule pressure. + # `maintainer: agent` and `lifecycle: mutable` mark it as agent-owned + # cache, not human-authored content. + source: https://developers.openai.com/codex/memories + format: opaque + scope: global + cardinality: collection + lifecycle: mutable + loading: session_start + enterprise: # Admin-enforced requirements.toml. Constrains approval policy, sandbox, # MCP allowlists, hooks, command rules, filesystem permissions. diff --git a/framework/rules/gemini/config.yml b/framework/rules/gemini/config.yml index 04a3d41..62e5aba 100644 --- a/framework/rules/gemini/config.yml +++ b/framework/rules/gemini/config.yml @@ -19,7 +19,7 @@ file_types: # All discovered files concatenated. Subdirectory GEMINI.md is "Just-In-Time" # loaded on-demand (`nested_context`). User-scope file at ~/.gemini/GEMINI.md # also doubles as the memory surface (agent appends to ## Gemini Added Memories). - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/gemini-md.md + source: https://geminicli.com/docs/cli/gemini-md/ required: true format: freeform scope: global @@ -43,7 +43,7 @@ file_types: # Subdirectory GEMINI.md files — Just-In-Time loaded when Gemini accesses # files in those directories. scope: nested captures the location-based # subtree applicability without overloading other scope values. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/gemini-md.md + source: https://geminicli.com/docs/cli/gemini-md/ format: freeform scope: nested cardinality: hierarchical @@ -58,7 +58,7 @@ file_types: cross_read: # Cross-agent files (AGENTS.md, CONTEXT.md) when context.fileName is configured. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/reference/configuration.md + source: https://geminicli.com/docs/reference/configuration/ format: freeform scope: global cardinality: chain @@ -73,7 +73,7 @@ file_types: skills: # `.agents/skills/` is the cross-agent alias supported by Gemini. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/skills.md + source: https://geminicli.com/docs/cli/skills/ format: [frontmatter, freeform] scope: task_scoped cardinality: hierarchical @@ -94,7 +94,7 @@ file_types: agents: # Subagents — built-ins: codebase_investigator, cli_help, generalist_agent, # browser_agent (experimental). Custom agents invoked via @name syntax. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/core/subagents.md + source: https://geminicli.com/docs/core/subagents/ format: [frontmatter, freeform] scope: task_scoped cardinality: collection @@ -114,7 +114,7 @@ file_types: commands: # TOML format with prompt + description, supports {{args}}, !{shell}, @{file} - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/custom-commands.md + source: https://geminicli.com/docs/cli/custom-commands/ format: schema_validated scope: task_scoped cardinality: collection @@ -134,7 +134,7 @@ file_types: hooks: # 11 events configured under hooks key in settings.json. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/hooks/index.md + source: https://geminicli.com/docs/hooks/ format: schema_validated scope: global cardinality: collection @@ -154,7 +154,7 @@ file_types: config: # Gemini supports both JSON and TOML settings files. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/reference/configuration.md + source: https://geminicli.com/docs/reference/configuration/ format: schema_validated scope: global cardinality: singleton @@ -189,7 +189,7 @@ file_types: mcp: # MCP servers under mcpServers key in settings.json/settings.toml. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/reference/configuration.md + source: https://geminicli.com/docs/reference/configuration/ format: schema_validated scope: global cardinality: collection @@ -209,7 +209,7 @@ file_types: extensions: # Plugin system: bundles MCP servers, commands, skills, hooks, themes - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/extensions/writing-extensions.md + source: https://geminicli.com/docs/extensions/writing-extensions/ format: schema_validated scope: global cardinality: collection @@ -223,7 +223,7 @@ file_types: maintainer: human geminiignore: - source: https://github.com/google-gemini/gemini-cli + source: https://geminicli.com/docs/cli/gemini-ignore/ format: freeform scope: global cardinality: singleton @@ -238,7 +238,7 @@ file_types: system_prompt: # Replace default system prompt entirely - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/system-prompt.md + source: https://geminicli.com/docs/cli/system-prompt/ format: freeform scope: global cardinality: singleton @@ -252,23 +252,39 @@ file_types: maintainer: human memory: - # Gemini's persistent memory is a SECTION inside ~/.gemini/GEMINI.md, - # not a directory of files. The agent appends saved memories to - # `## Gemini Added Memories`. Per-agent memory_locator (REQ-056 Phase - # 3a) reads this `locator:` block to enumerate memory entries as - # virtual records, distinct from claude's file_set shape. No `scopes:` - # block — discovery skips it gracefully (no patterns to glob); the - # memory_locator module is the sole consumer. - source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/gemini-md.md + # Gemini's memory model is a four-tier hierarchy (see snippets.ts + # `renderOperationalGuidelines` in google-gemini/gemini-cli): + # + # 1. Project Instructions — ./GEMINI.md (this config: `main`, scopes.project) + # 2. Subdir Instructions — ./**/GEMINI.md (this config: `nested_context`) + # 3. Private Project Memory — ~/.gemini/tmp//memory/MEMORY.md + # + sibling *.md notes (THIS block) + # 4. Global Personal Memory — ~/.gemini/GEMINI.md (this config: `main`, scopes.user) + # + # Only tier 3 — the private project memory directory — lives under + # `memory`. The other tiers are GEMINI.md context files modeled by + # `main` and `nested_context`. The legacy "Gemini Added Memories" + # section model inside ~/.gemini/GEMINI.md has been retired upstream + # (0 hits in the gemini-cli source for that string) and is no longer + # the memory write target. + # + # The directory glob mirrors Claude's shape — MEMORY.md is the index + # and sibling *.md files are entries. `memory_locator` enumerates them + # via the standard directory-glob dispatch. + # See also https://geminicli.com/docs/reference/memport/ for the @-import + # syntax used to compose GEMINI.md files. + source: https://geminicli.com/docs/cli/tutorials/memory-management/ format: freeform scope: global cardinality: collection lifecycle: mutable loading: session_start - locator: - type: file_section - file: ~/.gemini/GEMINI.md - section: "## Gemini Added Memories" + scopes: + user_project: + patterns: ["~/.gemini/tmp/*/memory/"] + precedence: user + vcs: external + maintainer: hybrid excludes: - CLAUDE:* From 9bf1ddc6606cba36f03183bbbdda5be292d3f2b5 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 14:59:54 +0200 Subject: [PATCH 12/30] classify: attribute link-reached generic files with source verb + linker surface Refs #27 --- UNRELEASED.md | 1 + src/reporails_cli/core/classify/__init__.py | 24 ++- .../core/classify/generic_type.py | 65 +++++--- .../core/classify/link_walker.py | 153 +++++++++++------- src/reporails_cli/core/platform/dto/models.py | 2 + src/reporails_cli/interfaces/cli/main.py | 27 +++- .../link-source-attribution/cycle/CLAUDE.md | 3 + .../link-source-attribution/cycle/a.md | 3 + .../link-source-attribution/cycle/b.md | 3 + .../import-vs-link/CLAUDE.md | 5 + .../import-vs-link/b.md | 3 + .../import-vs-link/c.md | 3 + .../main-link/CLAUDE.md | 3 + .../main-link/README.md | 3 + .../.claude/agent-memory/myagent/MEMORY.md | 3 + .../memory-link/notes.md | 3 + .../multi-source/.claude/skills/foo/SKILL.md | 8 + .../multi-source/CLAUDE.md | 3 + .../multi-source/shared.md | 3 + .../skill-link/.claude/skills/foo/SKILL.md | 8 + .../.claude/skills/foo/architecture.md | 3 + .../test_classify_link_attribution.py | 108 +++++++++++++ tests/unit/test_link_walker.py | 34 ++-- tests/unit/test_link_walker_attribution.py | 135 ++++++++++++++++ 24 files changed, 500 insertions(+), 106 deletions(-) create mode 100644 tests/fixtures/link-source-attribution/cycle/CLAUDE.md create mode 100644 tests/fixtures/link-source-attribution/cycle/a.md create mode 100644 tests/fixtures/link-source-attribution/cycle/b.md create mode 100644 tests/fixtures/link-source-attribution/import-vs-link/CLAUDE.md create mode 100644 tests/fixtures/link-source-attribution/import-vs-link/b.md create mode 100644 tests/fixtures/link-source-attribution/import-vs-link/c.md create mode 100644 tests/fixtures/link-source-attribution/main-link/CLAUDE.md create mode 100644 tests/fixtures/link-source-attribution/main-link/README.md create mode 100644 tests/fixtures/link-source-attribution/memory-link/.claude/agent-memory/myagent/MEMORY.md create mode 100644 tests/fixtures/link-source-attribution/memory-link/notes.md create mode 100644 tests/fixtures/link-source-attribution/multi-source/.claude/skills/foo/SKILL.md create mode 100644 tests/fixtures/link-source-attribution/multi-source/CLAUDE.md create mode 100644 tests/fixtures/link-source-attribution/multi-source/shared.md create mode 100644 tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/SKILL.md create mode 100644 tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/architecture.md create mode 100644 tests/integration/test_classify_link_attribution.py create mode 100644 tests/unit/test_link_walker_attribution.py diff --git a/UNRELEASED.md b/UNRELEASED.md index f6f3ddc..c51d932 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -11,6 +11,7 @@ - rules: `CORE:S:0056 broken-markdown-link` — mechanical rule on freeform markdown files. Discovers `[text](path)` + `[ref]: path` link targets in each file via `extract_markdown_links`, validates each resolves relative to the source file's directory via `check_markdown_link_targets_exist`. Skips URLs, `mailto:`, absolute paths, and anchor-only refs (`#frag`). Severity `medium`, sibling shape to `CORE:S:0024 import-targets-resolve`. - check: Mechanical check engine threads `CheckResult.annotations` from a rule's discover-stage check into the args of its subsequent validate-stage check (`extract_imports` -> `check_import_targets_exist`, `extract_markdown_links` -> `check_markdown_link_targets_exist`). Annotation accumulator is per-rule; pass and fail fixtures accumulate independently in the harness. Closes a latent gap where the validate stage always saw an empty annotations dict and silently passed. - check: Per-agent memory entry locator at `src/reporails_cli/core/discovery/memory_locator.py` — data-driven adapter that enumerates memory entries per agent (claude: `*.md` files inside `~/.claude/projects/*/memory/`, `.claude/agent-memory//`, `.claude/agent-memory-local//`; gemini: `## Gemini Added Memories` section inside `~/.gemini/GEMINI.md`). Returns `MemoryEntry` records with `agent`, `path`, optional `section`, and `body`. Consumed by the L3 memory rules without per-agent branches. +- classify: Link-reached generic files now record their source attribution on `ClassifiedFile.properties` — `loading_verb` ({read, imported, auto_loaded, invoked}), `link_source_type` (the linking file's `file_type` — main, rule, skill, agent, memory, subagent_memory, nested_context), `link_source_path` (project-relative paths of the linking files), and `link_depth` (1-3 from the instruction-file seed). `FileMatch` gains matching `loading_verb` and `link_source_type` fields for rule routing. Rule applicability for generic files is unchanged in this release. - codex: New `memory` file_type declared as a tombstone — `~/.codex/memories/` holds generated state controlled via the `/memories` slash command and `config.toml` keys (`memories.generate_memories`, `memories.use_memories`, `memories.disable_on_external_context`, etc.), not user-authored markdown. No patterns to glob; surfaces in the agent registry but invites no rule pressure. ### Changed diff --git a/src/reporails_cli/core/classify/__init__.py b/src/reporails_cli/core/classify/__init__.py index 10d02ce..4878366 100644 --- a/src/reporails_cli/core/classify/__init__.py +++ b/src/reporails_cli/core/classify/__init__.py @@ -449,17 +449,29 @@ def _classify_generic_via_links( scan_root: Path, classified: list[ClassifiedFile], ) -> list[ClassifiedFile]: - """BFS Markdown links from classified files; classify reachable `.md` as `generic`. + """BFS Markdown links + `@` imports from classified files. + + Reachable `.md` files are classified as `file_type: generic` with + edge-attribution properties — `link_source_type`, `link_source_path`, + `link_depth`, `loading_verb` — set from the incoming `LinkEdge` set. Lazy-imported to avoid pulling the walker module when generic scanning is off (the default). """ from reporails_cli.core.classify.generic_type import make_generic_classified - from reporails_cli.core.classify.link_walker import walk_markdown_links + from reporails_cli.core.classify.link_walker import LinkEdge, walk_markdown_links + + seed_map: dict[Path, str] = {cf.path: cf.file_type for cf in classified} + classified_paths = set(seed_map.keys()) + edges = walk_markdown_links(seed_map, scan_root, classified_paths) + + by_target: dict[Path, list[LinkEdge]] = {} + for edge in edges: + by_target.setdefault(edge.target, []).append(edge) - start_paths = {cf.path for cf in classified} - reached = walk_markdown_links(start_paths, scan_root, start_paths) - return [make_generic_classified(p) for p in sorted(reached)] + return [ + make_generic_classified(target, target_edges, scan_root) for target, target_edges in sorted(by_target.items()) + ] def match_files( @@ -541,6 +553,8 @@ def file_matches(cf: ClassifiedFile, match: FileMatch) -> bool: "vcs", "loading", "precedence", + "loading_verb", + "link_source_type", ): if not _prop_matches(getattr(match, prop), cf.properties.get(prop)): return False diff --git a/src/reporails_cli/core/classify/generic_type.py b/src/reporails_cli/core/classify/generic_type.py index 4e22b97..c9b67c0 100644 --- a/src/reporails_cli/core/classify/generic_type.py +++ b/src/reporails_cli/core/classify/generic_type.py @@ -1,37 +1,56 @@ -"""Synthesizer for the `generic` file class — link-reached files (REQ-025 Phase C). - -The `generic` class is not declared in any agent config (it's not -agent-specific). When `generic_scanning: true` is set, the classifier -walks Markdown links from already-classified files and assigns `generic` -to the reached `.md` files. This module supplies the synthetic -`FileTypeDeclaration` and the `ClassifiedFile` constructor for those -hits. - -`loading: on_demand` is the load mode: linked files are not always-in-context -(link presence does not imply the agent eagerly loads them), so they -default out of `base` cross-file analysis. Operators that want a linked -file treated as base context can override per-project. -""" +"""Synthesizer for the `generic` file class (link-reached files).""" from __future__ import annotations from pathlib import Path +from reporails_cli.core.classify.link_walker import LinkEdge from reporails_cli.core.platform.dto.models import ClassifiedFile GENERIC_TYPE_NAME = "generic" +# Source surfaces whose files load eagerly when in context. A generic +# file pointed at by any of these inherits `loading: session_start`. +_EAGER_SOURCES: frozenset[str] = frozenset({"main", "memory", "subagent_memory"}) + + +def make_generic_classified( + path: Path, + edges: list[LinkEdge], + project_root: Path | None = None, +) -> ClassifiedFile: + """Build a `ClassifiedFile` for `path` with `link_*` properties aggregated from `edges`.""" + source_types = sorted({edge.source_type for edge in edges}) + source_paths = sorted({_rel_or_str(edge.source, project_root) for edge in edges}) + verbs = sorted({edge.verb for edge in edges}) + min_depth = min((edge.depth for edge in edges), default=0) + + loading = "session_start" if any(st in _EAGER_SOURCES for st in source_types) else "on_demand" + + properties: dict[str, str | list[str]] = { + "format": "freeform", + "scope": "path_scoped", + "loading": loading, + "lifecycle": "static", + "maintainer": "human", + "link_source_type": source_types, + "link_source_path": source_paths, + "link_depth": str(min_depth), + "loading_verb": verbs, + } -def make_generic_classified(path: Path) -> ClassifiedFile: - """Return a `ClassifiedFile` with `file_type: generic` and on-demand loading.""" return ClassifiedFile( path=path, file_type=GENERIC_TYPE_NAME, - properties={ - "format": "freeform", - "scope": "path_scoped", - "loading": "on_demand", - "lifecycle": "static", - "maintainer": "human", - }, + properties=properties, ) + + +def _rel_or_str(source: Path, project_root: Path | None) -> str: + """Project-relative POSIX string when possible, absolute POSIX otherwise.""" + if project_root is not None: + try: + return source.relative_to(project_root.resolve()).as_posix() + except ValueError: + pass + return source.as_posix() diff --git a/src/reporails_cli/core/classify/link_walker.py b/src/reporails_cli/core/classify/link_walker.py index e71751c..e6caaa2 100644 --- a/src/reporails_cli/core/classify/link_walker.py +++ b/src/reporails_cli/core/classify/link_walker.py @@ -1,24 +1,10 @@ -"""Markdown link-reachability walker for the `generic` file class. - -REQ-025 Phase C: when `generic_scanning: true` is set in `.ails/config.yml`, -the classifier extends its file-type assignment by BFS-walking outgoing -Markdown links from each classified instruction file. Files reached -transitively that live in the project tree but aren't already classified -get `file_type: "generic"`. This catches carryovers, ADRs, sys/ docs, -knowledge docs, learning entries, and per-agent memory entries that an -agent reads as instruction input but that don't have their own canonical -capability path. - -The walker is agent-agnostic by construction — it doesn't read agent -configs or hardcode per-agent paths. Anything an existing classified file -points at via a relative `[text](path.md)` or reference-style link is in -scope; anything outside the project tree is skipped. -""" +"""Markdown link-reachability walker for the `generic` file class.""" from __future__ import annotations import logging import re +from dataclasses import dataclass from pathlib import Path logger = logging.getLogger(__name__) @@ -30,80 +16,127 @@ # Reference-definition `[ref]: path` — used to back reference-style links. _REF_DEFINITION_RE = re.compile(r"^\s*\[(?:[^\]]+)\]:\s*(\S+)", re.MULTILINE) +# `@` inline include — mirrors the pattern in +# `core/lint/mechanical/checks_advanced.py:extract_imports`/`import_depth`. +# Capture the path without the leading `@`. +_IMPORT_RE = re.compile(r"@([\w./-]+)") + + +@dataclass(frozen=True) +class LinkEdge: + """One `(source, target)` link emitted by `walk_markdown_links`.""" + + target: Path + source: Path + source_type: str + depth: int + verb: str + def walk_markdown_links( - start_paths: set[Path], + start_paths: dict[Path, str], project_root: Path, classified_paths: set[Path], max_depth: int = 3, -) -> set[Path]: - """BFS outgoing Markdown links from `start_paths`; return newly reached `.md` paths. - - Files reachable from `start_paths` that: - - live inside `project_root`, - - have a `.md` suffix, - - are not already in `classified_paths`, - - haven't been visited yet, - are returned. The walk is bounded by `max_depth` link hops. - - Cycle-safe via `visited` set; out-of-tree links are silently skipped. - """ - visited: set[Path] = {p.resolve() for p in start_paths if p.exists()} +) -> list[LinkEdge]: + """BFS outgoing Markdown links + `@` imports from `start_paths`; emit one edge per `(source, target)`.""" classified_resolved = {p.resolve() for p in classified_paths} project_root_resolved = project_root.resolve() - frontier: list[tuple[Path, int]] = [(p, 0) for p in start_paths if p.exists()] - found: set[Path] = set() + seed_resolved: dict[Path, str] = {p.resolve(): ft for p, ft in start_paths.items() if p.exists()} + visited: set[Path] = set(seed_resolved.keys()) + + # Frontier: (resolved_path, depth_already_taken, source_type) + # `depth_already_taken` is the depth at which this node was reached; + # outgoing edges from this node land at depth+1. + frontier: list[tuple[Path, int, str]] = [(resolved, 0, ft) for resolved, ft in seed_resolved.items()] + + # Per-target edges: keyed by (source, target, verb) so each distinct + # (linking file, target, loading verb) tuple contributes one edge — + # a file both linked AND imported by the same source yields two edges. + edges: dict[tuple[Path, Path, str], LinkEdge] = {} while frontier: - current, depth = frontier.pop(0) + current, depth, source_type = frontier.pop(0) if depth >= max_depth: continue - for linked in _outgoing_md_links(current): + next_depth = depth + 1 + for linked, verb in _outgoing_links(current): resolved = linked.resolve() - if resolved in visited: - continue - visited.add(resolved) - if resolved in classified_resolved: - continue if not _is_in_tree(resolved, project_root_resolved): continue if not resolved.is_file(): continue - found.add(resolved) - frontier.append((resolved, depth + 1)) - - return found + if resolved in classified_resolved: + continue + key = (current, resolved, verb) + if key not in edges: + edges[key] = LinkEdge( + target=resolved, + source=current, + source_type=source_type, + depth=next_depth, + verb=verb, + ) + if resolved in visited: + continue + visited.add(resolved) + # The reached file becomes a new frontier node; it carries + # `file_type: "generic"` as the source_type for any links it + # emits onward — once outside the seeded surface set, every + # downstream reach is from a generic file. + frontier.append((resolved, next_depth, "generic")) + return list(edges.values()) -def _outgoing_md_links(file_path: Path) -> list[Path]: - """Extract relative `.md` link targets from `file_path`. - Returns absolute paths (file_path's directory joined with the link - target). Filters HTTP(s) URLs, anchor-only refs, and non-`.md` links. - """ +def _outgoing_links(file_path: Path) -> list[tuple[Path, str]]: + """Extract `(target_path, verb)` pairs for `.md` links and `@` imports in `file_path`.""" try: text = file_path.read_text(encoding="utf-8", errors="replace") except OSError as exc: logger.debug("link_walker: cannot read %s: %s", file_path, exc) return [] - targets: list[str] = [m.group(1).strip() for m in _INLINE_LINK_RE.finditer(text)] - targets.extend(m.group(1).strip() for m in _REF_DEFINITION_RE.finditer(text)) - - out: list[Path] = [] base_dir = file_path.parent - for target in targets: - cleaned = _strip_anchor(target) - if not cleaned or _looks_like_url(cleaned): - continue - if not cleaned.endswith(".md"): - continue - resolved = (base_dir / cleaned).resolve() - out.append(resolved) + out: list[tuple[Path, str]] = [] + + for match in _INLINE_LINK_RE.finditer(text): + target = match.group(1).strip() + resolved = _resolve_md_target(base_dir, target) + if resolved is not None: + out.append((resolved, "read")) + + for match in _REF_DEFINITION_RE.finditer(text): + target = match.group(1).strip() + resolved = _resolve_md_target(base_dir, target) + if resolved is not None: + out.append((resolved, "read")) + + for match in _IMPORT_RE.finditer(text): + target = match.group(1).strip() + resolved = _resolve_md_target(base_dir, target) + if resolved is not None: + out.append((resolved, "imported")) + return out +def _outgoing_md_links(file_path: Path) -> list[Path]: + """Return just the `.md` target paths from `_outgoing_links` (back-compat shim).""" + return [target for target, _verb in _outgoing_links(file_path)] + + +def _resolve_md_target(base_dir: Path, target: str) -> Path | None: + """Resolve a raw link target to a `.md` Path, or None if not eligible.""" + cleaned = _strip_anchor(target) + if not cleaned or _looks_like_url(cleaned): + return None + if not cleaned.endswith(".md"): + return None + return (base_dir / cleaned).resolve() + + def _strip_anchor(target: str) -> str: """Drop trailing `#anchor` and surrounding whitespace from a link target.""" if "#" in target: diff --git a/src/reporails_cli/core/platform/dto/models.py b/src/reporails_cli/core/platform/dto/models.py index 7eec149..d108969 100644 --- a/src/reporails_cli/core/platform/dto/models.py +++ b/src/reporails_cli/core/platform/dto/models.py @@ -140,6 +140,8 @@ class FileMatch(BaseModel): vcs: list[str] | str | None = None loading: list[str] | str | None = None precedence: list[str] | str | None = None + loading_verb: list[str] | str | None = None + link_source_type: list[str] | str | None = None class Check(BaseModel): diff --git a/src/reporails_cli/interfaces/cli/main.py b/src/reporails_cli/interfaces/cli/main.py index d513735..399d9b2 100644 --- a/src/reporails_cli/interfaces/cli/main.py +++ b/src/reporails_cli/interfaces/cli/main.py @@ -49,7 +49,18 @@ def _serialize_match(match: FileMatch | None) -> dict[str, object]: result: dict[str, object] = {} if match.type is not None: result["type"] = match.type - for prop in ("format", "scope", "cardinality", "lifecycle", "maintainer", "vcs", "loading", "precedence"): + for prop in ( + "format", + "scope", + "cardinality", + "lifecycle", + "maintainer", + "vcs", + "loading", + "precedence", + "loading_verb", + "link_source_type", + ): val = getattr(match, prop) if val is not None: result[prop] = val @@ -66,9 +77,19 @@ def _explain_rules_paths(rules: list[str] | None) -> list[Path] | None: @app.command(rich_help_panel="Commands") def check( # noqa: C901 # pylint: disable=too-many-locals arg1: str = typer.Argument( - ".", help="File/directory to validate, OR a capability keyword (skill, rule, agents, main, ...)" + ".", + help=( + "Capability keyword (memory, skill, rule, agent, main, nested_context, ...) — " + "vocabulary comes from the detected agent's config.yml `file_types:`. " + "Falls back to file/directory path for legacy invocations. Defaults to whole-project scan." + ), + ), + arg2: str = typer.Argument( + None, + help=( + "Target name when arg1 is a capability — e.g. `ails check skill backlog`, `ails check agent docs-auditor`." + ), ), - arg2: str = typer.Argument(None, help="Capability target name when arg1 is a capability keyword"), format: str = typer.Option(None, "--format", "-f", help="Output format: text, json, github"), agent: str = typer.Option("", "--agent", help="Agent type (e.g., claude, copilot)"), exclude_dirs: list[str] = typer.Option(None, "--exclude-dirs", help="Directories to exclude"), # noqa: B008 diff --git a/tests/fixtures/link-source-attribution/cycle/CLAUDE.md b/tests/fixtures/link-source-attribution/cycle/CLAUDE.md new file mode 100644 index 0000000..2ff968e --- /dev/null +++ b/tests/fixtures/link-source-attribution/cycle/CLAUDE.md @@ -0,0 +1,3 @@ +# Main + +Entry to cycle: [a](a.md). diff --git a/tests/fixtures/link-source-attribution/cycle/a.md b/tests/fixtures/link-source-attribution/cycle/a.md new file mode 100644 index 0000000..cce92a1 --- /dev/null +++ b/tests/fixtures/link-source-attribution/cycle/a.md @@ -0,0 +1,3 @@ +# A + +Links to [b](b.md). diff --git a/tests/fixtures/link-source-attribution/cycle/b.md b/tests/fixtures/link-source-attribution/cycle/b.md new file mode 100644 index 0000000..bc84f80 --- /dev/null +++ b/tests/fixtures/link-source-attribution/cycle/b.md @@ -0,0 +1,3 @@ +# B + +Links back to [a](a.md). diff --git a/tests/fixtures/link-source-attribution/import-vs-link/CLAUDE.md b/tests/fixtures/link-source-attribution/import-vs-link/CLAUDE.md new file mode 100644 index 0000000..25328ea --- /dev/null +++ b/tests/fixtures/link-source-attribution/import-vs-link/CLAUDE.md @@ -0,0 +1,5 @@ +# Main + +Imports: @b.md + +See also [c](c.md). diff --git a/tests/fixtures/link-source-attribution/import-vs-link/b.md b/tests/fixtures/link-source-attribution/import-vs-link/b.md new file mode 100644 index 0000000..d0d72d3 --- /dev/null +++ b/tests/fixtures/link-source-attribution/import-vs-link/b.md @@ -0,0 +1,3 @@ +# B + +Reached via `@b.md` import. diff --git a/tests/fixtures/link-source-attribution/import-vs-link/c.md b/tests/fixtures/link-source-attribution/import-vs-link/c.md new file mode 100644 index 0000000..ed42ea9 --- /dev/null +++ b/tests/fixtures/link-source-attribution/import-vs-link/c.md @@ -0,0 +1,3 @@ +# C + +Reached via `[c](c.md)` markdown link. diff --git a/tests/fixtures/link-source-attribution/main-link/CLAUDE.md b/tests/fixtures/link-source-attribution/main-link/CLAUDE.md new file mode 100644 index 0000000..69c77df --- /dev/null +++ b/tests/fixtures/link-source-attribution/main-link/CLAUDE.md @@ -0,0 +1,3 @@ +# Main + +See [the readme](README.md) for project layout. diff --git a/tests/fixtures/link-source-attribution/main-link/README.md b/tests/fixtures/link-source-attribution/main-link/README.md new file mode 100644 index 0000000..847df71 --- /dev/null +++ b/tests/fixtures/link-source-attribution/main-link/README.md @@ -0,0 +1,3 @@ +# Readme + +Project overview content reached from CLAUDE.md. diff --git a/tests/fixtures/link-source-attribution/memory-link/.claude/agent-memory/myagent/MEMORY.md b/tests/fixtures/link-source-attribution/memory-link/.claude/agent-memory/myagent/MEMORY.md new file mode 100644 index 0000000..053dfad --- /dev/null +++ b/tests/fixtures/link-source-attribution/memory-link/.claude/agent-memory/myagent/MEMORY.md @@ -0,0 +1,3 @@ +# Memory + +See [external notes](../../../notes.md) — carried across sessions. diff --git a/tests/fixtures/link-source-attribution/memory-link/notes.md b/tests/fixtures/link-source-attribution/memory-link/notes.md new file mode 100644 index 0000000..4fbee98 --- /dev/null +++ b/tests/fixtures/link-source-attribution/memory-link/notes.md @@ -0,0 +1,3 @@ +# Notes + +Memory-linked notes content (lives outside the agent-memory dir). diff --git a/tests/fixtures/link-source-attribution/multi-source/.claude/skills/foo/SKILL.md b/tests/fixtures/link-source-attribution/multi-source/.claude/skills/foo/SKILL.md new file mode 100644 index 0000000..f65c921 --- /dev/null +++ b/tests/fixtures/link-source-attribution/multi-source/.claude/skills/foo/SKILL.md @@ -0,0 +1,8 @@ +--- +name: foo +description: Sample skill for multi-source fixture +--- + +# Foo + +Also reads [the shared doc](../../../shared.md). diff --git a/tests/fixtures/link-source-attribution/multi-source/CLAUDE.md b/tests/fixtures/link-source-attribution/multi-source/CLAUDE.md new file mode 100644 index 0000000..ac32f81 --- /dev/null +++ b/tests/fixtures/link-source-attribution/multi-source/CLAUDE.md @@ -0,0 +1,3 @@ +# Main + +Read [the shared doc](shared.md). diff --git a/tests/fixtures/link-source-attribution/multi-source/shared.md b/tests/fixtures/link-source-attribution/multi-source/shared.md new file mode 100644 index 0000000..e33761a --- /dev/null +++ b/tests/fixtures/link-source-attribution/multi-source/shared.md @@ -0,0 +1,3 @@ +# Shared + +Reached from both main and the skill. diff --git a/tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/SKILL.md b/tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/SKILL.md new file mode 100644 index 0000000..e8b5014 --- /dev/null +++ b/tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/SKILL.md @@ -0,0 +1,8 @@ +--- +name: foo +description: Sample skill for link-source-attribution fixture +--- + +# Foo + +Read [architecture](architecture.md) before invoking. diff --git a/tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/architecture.md b/tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/architecture.md new file mode 100644 index 0000000..4e41f70 --- /dev/null +++ b/tests/fixtures/link-source-attribution/skill-link/.claude/skills/foo/architecture.md @@ -0,0 +1,3 @@ +# Architecture + +Skill-scoped architecture notes reached from SKILL.md. diff --git a/tests/integration/test_classify_link_attribution.py b/tests/integration/test_classify_link_attribution.py new file mode 100644 index 0000000..9e364b1 --- /dev/null +++ b/tests/integration/test_classify_link_attribution.py @@ -0,0 +1,108 @@ +"""Integration coverage for link-source attribution on generic-classified files.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from reporails_cli.core.classify import classify_files, load_file_types +from reporails_cli.core.platform.dto.models import ClassifiedFile + +FIXTURE_ROOT = Path(__file__).resolve().parent.parent / "fixtures" / "link-source-attribution" + + +def _walk_md_files(root: Path) -> list[Path]: + return sorted(p for p in root.rglob("*.md")) + + +def _classify(case: str) -> tuple[Path, list[ClassifiedFile]]: + scan_root = FIXTURE_ROOT / case + files = _walk_md_files(scan_root) + file_types = load_file_types("claude") + classified = classify_files(scan_root, files, file_types, generic_scanning=True) + return scan_root, classified + + +def _generic_by_name(classified: list[ClassifiedFile], name: str) -> ClassifiedFile: + matches = [cf for cf in classified if cf.path.name == name and cf.file_type == "generic"] + assert matches, f"expected one generic-classified file named {name!r}, got none" + return matches[0] + + +@pytest.mark.integration +@pytest.mark.subsys_classify +def test_main_link_attribution() -> None: + """CLAUDE.md -> README.md emits a `main` link_source_type.""" + _root, classified = _classify("main-link") + readme = _generic_by_name(classified, "README.md") + assert readme.properties.get("link_source_type") == ["main"] + assert readme.properties.get("loading_verb") == ["read"] + assert readme.properties.get("link_depth") == "1" + sources = readme.properties.get("link_source_path") + assert isinstance(sources, list) and sources == ["CLAUDE.md"] + # main is an eager surface -> linked file inherits session_start. + assert readme.properties.get("loading") == "session_start" + + +@pytest.mark.integration +@pytest.mark.subsys_classify +def test_skill_link_attribution() -> None: + """SKILL.md -> architecture.md emits a `skill` link_source_type.""" + _root, classified = _classify("skill-link") + arch = _generic_by_name(classified, "architecture.md") + assert arch.properties.get("link_source_type") == ["skill"] + assert arch.properties.get("loading_verb") == ["read"] + assert arch.properties.get("link_depth") == "1" + # skill is not in the eager source set -> on_demand. + assert arch.properties.get("loading") == "on_demand" + + +@pytest.mark.integration +@pytest.mark.subsys_classify +def test_memory_link_attribution() -> None: + """subagent_memory MEMORY.md -> notes.md emits a `subagent_memory` source.""" + _root, classified = _classify("memory-link") + notes = _generic_by_name(classified, "notes.md") + assert notes.properties.get("link_source_type") == ["subagent_memory"] + assert notes.properties.get("loading_verb") == ["read"] + # subagent_memory is in the eager source set -> session_start. + assert notes.properties.get("loading") == "session_start" + + +@pytest.mark.integration +@pytest.mark.subsys_classify +def test_multi_source_merges_attribution() -> None: + """Main + skill both link to shared.md -> list contains both surface types.""" + _root, classified = _classify("multi-source") + shared = _generic_by_name(classified, "shared.md") + assert shared.properties.get("link_source_type") == ["main", "skill"] + sources = shared.properties.get("link_source_path") + assert isinstance(sources, list) and "CLAUDE.md" in sources and any("SKILL.md" in s for s in sources) + # main is eager -> session_start dominates the derivation. + assert shared.properties.get("loading") == "session_start" + + +@pytest.mark.integration +@pytest.mark.subsys_classify +def test_cycle_does_not_hang() -> None: + """CLAUDE.md -> a -> b -> a terminates and emits both a and b as generic.""" + _root, classified = _classify("cycle") + names = {cf.path.name: cf for cf in classified if cf.file_type == "generic"} + assert "a.md" in names and "b.md" in names + # a is reached at depth 1 from main and also at depth 3 from b (generic); + # min depth wins. + assert names["a.md"].properties.get("link_depth") == "1" + # b is only reached at depth 2. + assert names["b.md"].properties.get("link_depth") == "2" + + +@pytest.mark.integration +@pytest.mark.subsys_classify +def test_import_vs_link_distinguishes_verb() -> None: + """`@b.md` -> verb=imported; `[c](c.md)` -> verb=read.""" + _root, classified = _classify("import-vs-link") + b = _generic_by_name(classified, "b.md") + c = _generic_by_name(classified, "c.md") + assert b.properties.get("loading_verb") == ["imported"] + assert c.properties.get("loading_verb") == ["read"] diff --git a/tests/unit/test_link_walker.py b/tests/unit/test_link_walker.py index 2785db0..850d12d 100644 --- a/tests/unit/test_link_walker.py +++ b/tests/unit/test_link_walker.py @@ -1,4 +1,4 @@ -"""Unit tests for the Markdown link-walker — REQ-025 Phase C.""" +"""Unit tests for `walk_markdown_links` reach-only behavior (targets derived from edge set).""" from __future__ import annotations @@ -12,6 +12,11 @@ FIXTURE_ROOT = Path(__file__).resolve().parent.parent / "fixtures" / "generic-classification" +def _reached_targets(edges: list) -> set[Path]: + """Collapse a `list[LinkEdge]` to the set of resolved target paths.""" + return {edge.target for edge in edges} + + @pytest.mark.unit @pytest.mark.subsys_classify def test_walk_markdown_links_finds_inline_md_targets(tmp_path: Path) -> None: @@ -19,8 +24,8 @@ def test_walk_markdown_links_finds_inline_md_targets(tmp_path: Path) -> None: target = tmp_path / "linked.md" main.write_text("Read [the notes](linked.md).\n", encoding="utf-8") target.write_text("# notes\n", encoding="utf-8") - reached = walk_markdown_links({main}, tmp_path, {main}) - assert reached == {target.resolve()} + edges = walk_markdown_links({main: "main"}, tmp_path, {main}) + assert _reached_targets(edges) == {target.resolve()} @pytest.mark.unit @@ -31,7 +36,7 @@ def test_walk_markdown_links_skips_urls_and_anchors(tmp_path: Path) -> None: "URL [example](https://example.com)\nanchor [section](#section)\nmailto [link](mailto:nobody@example.com)\n", encoding="utf-8", ) - assert walk_markdown_links({main}, tmp_path, {main}) == set() + assert walk_markdown_links({main: "main"}, tmp_path, {main}) == [] @pytest.mark.unit @@ -42,8 +47,8 @@ def test_walk_markdown_links_skips_out_of_tree(tmp_path: Path) -> None: outside = tmp_path / "outside.md" outside.write_text("# outside\n", encoding="utf-8") main.write_text("Goes [outside](../outside.md).\n", encoding="utf-8") - reached = walk_markdown_links({main}, main.parent, {main}) - assert reached == set() + edges = walk_markdown_links({main: "main"}, main.parent, {main}) + assert edges == [] @pytest.mark.unit @@ -53,9 +58,9 @@ def test_walk_markdown_links_handles_cycle(tmp_path: Path) -> None: b = tmp_path / "b.md" a.write_text("Goes to [b](b.md).\n", encoding="utf-8") b.write_text("Goes to [a](a.md).\n", encoding="utf-8") - reached = walk_markdown_links({a}, tmp_path, {a}) - # b is reached; a is already-classified so it does not enter `found`. - assert reached == {b.resolve()} + edges = walk_markdown_links({a: "main"}, tmp_path, {a}) + # b is reached; a is already-classified so it does not enter the edge set. + assert _reached_targets(edges) == {b.resolve()} @pytest.mark.unit @@ -66,8 +71,8 @@ def test_walk_markdown_links_skips_already_classified(tmp_path: Path) -> None: main.write_text("See [rule](rule.md).\n", encoding="utf-8") rule.write_text("# rule\n", encoding="utf-8") # rule is already-classified; walker must not re-include it. - reached = walk_markdown_links({main}, tmp_path, {main, rule}) - assert reached == set() + edges = walk_markdown_links({main: "main"}, tmp_path, {main, rule}) + assert edges == [] @pytest.mark.unit @@ -106,9 +111,8 @@ def test_classify_files_with_generic_scanning_on_walks_and_classifies(tmp_path: @pytest.mark.unit @pytest.mark.subsys_classify -def test_classify_files_generic_loading_is_on_demand(tmp_path: Path) -> None: - """Generic files default to `loading: on_demand` so they don't pollute - base-context cross-file analysis. See DIAGNOSTIC.md cross-file matrix.""" +def test_classify_files_generic_loading_is_session_start_when_main_links(tmp_path: Path) -> None: + """A generic file reached from `main` inherits `loading: session_start` (derived from `link_source_type`).""" (tmp_path / "CLAUDE.md").write_text("Read [arch](arch.md).\n", encoding="utf-8") (tmp_path / "arch.md").write_text("# arch\n", encoding="utf-8") file_types = load_file_types("claude") @@ -120,4 +124,4 @@ def test_classify_files_generic_loading_is_on_demand(tmp_path: Path) -> None: ) arch = next((cf for cf in classified if cf.path.name == "arch.md"), None) assert arch is not None - assert arch.properties.get("loading") == "on_demand" + assert arch.properties.get("loading") == "session_start" diff --git a/tests/unit/test_link_walker_attribution.py b/tests/unit/test_link_walker_attribution.py new file mode 100644 index 0000000..8c00831 --- /dev/null +++ b/tests/unit/test_link_walker_attribution.py @@ -0,0 +1,135 @@ +"""Edge-attribution coverage for `walk_markdown_links`. + +One edge per `(source, target)`, depth tracking, verb (read/imported), +cycle termination, and `source_type` propagation from the seed map. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from reporails_cli.core.classify.link_walker import LinkEdge, walk_markdown_links + + +def _edges_to(edges: list[LinkEdge], target: Path) -> list[LinkEdge]: + """Filter edges down to a specific target (paths are already resolved).""" + resolved = target.resolve() + return [e for e in edges if e.target == resolved] + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_emits_one_edge_per_source(tmp_path: Path) -> None: + """File A and file B both link to C → two edges with distinct sources.""" + a = tmp_path / "A.md" + b = tmp_path / "B.md" + c = tmp_path / "C.md" + a.write_text("Read [C](C.md).\n", encoding="utf-8") + b.write_text("Read [C](C.md).\n", encoding="utf-8") + c.write_text("# c\n", encoding="utf-8") + + edges = walk_markdown_links({a: "main", b: "skill"}, tmp_path, {a, b}) + c_edges = _edges_to(edges, c) + assert {e.source for e in c_edges} == {a.resolve(), b.resolve()} + assert {e.source_type for e in c_edges} == {"main", "skill"} + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_depth_tracked_correctly(tmp_path: Path) -> None: + """main -> mid.md -> leaf.md emits edges at depth 1 and depth 2.""" + main = tmp_path / "main.md" + mid = tmp_path / "mid.md" + leaf = tmp_path / "leaf.md" + main.write_text("[m](mid.md)\n", encoding="utf-8") + mid.write_text("[l](leaf.md)\n", encoding="utf-8") + leaf.write_text("# leaf\n", encoding="utf-8") + + edges = walk_markdown_links({main: "main"}, tmp_path, {main}) + by_target = {edge.target: edge for edge in edges} + assert by_target[mid.resolve()].depth == 1 + assert by_target[leaf.resolve()].depth == 2 + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_verb_distinguishes_md_link_vs_import(tmp_path: Path) -> None: + """Same file emits two edges with verbs `read` and `imported`.""" + main = tmp_path / "main.md" + md_target = tmp_path / "a.md" + import_target = tmp_path / "b.md" + main.write_text("Link: [a](a.md)\nImport: @b.md\n", encoding="utf-8") + md_target.write_text("# a\n", encoding="utf-8") + import_target.write_text("# b\n", encoding="utf-8") + + edges = walk_markdown_links({main: "main"}, tmp_path, {main}) + verbs_by_target = {edge.target: edge.verb for edge in edges} + assert verbs_by_target[md_target.resolve()] == "read" + assert verbs_by_target[import_target.resolve()] == "imported" + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_cycle_breaks(tmp_path: Path) -> None: + """a <-> b emits two edges and terminates without recursion.""" + a = tmp_path / "a.md" + b = tmp_path / "b.md" + a.write_text("[b](b.md)\n", encoding="utf-8") + b.write_text("[a](a.md)\n", encoding="utf-8") + + # Seed only `a` so `b` is reached and emits an edge back to `a`. + edges = walk_markdown_links({a: "main"}, tmp_path, {a}) + # `a` is in classified_paths -> the b->a edge is filtered out. + # Only the a->b edge survives. + assert {(e.source, e.target) for e in edges} == {(a.resolve(), b.resolve())} + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_cycle_with_both_seeded_terminates(tmp_path: Path) -> None: + """Pure cycle protection: both files seeded, walker still terminates.""" + a = tmp_path / "a.md" + b = tmp_path / "b.md" + a.write_text("[b](b.md)\n", encoding="utf-8") + b.write_text("[a](a.md)\n", encoding="utf-8") + + edges = walk_markdown_links({a: "main", b: "main"}, tmp_path, {a, b}) + # Both are in classified_paths -> no generic edges emitted. + assert edges == [] + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_source_type_propagates(tmp_path: Path) -> None: + """Seed map's file_type lands on the emitted edge's source_type.""" + skill = tmp_path / "SKILL.md" + readme = tmp_path / "README.md" + skill.write_text("[r](README.md)\n", encoding="utf-8") + readme.write_text("# readme\n", encoding="utf-8") + + edges = walk_markdown_links({skill: "skill"}, tmp_path, {skill}) + assert len(edges) == 1 + assert edges[0].source_type == "skill" + + +@pytest.mark.unit +@pytest.mark.subsys_classify +def test_max_depth_cuts_off(tmp_path: Path) -> None: + """Chain a -> b -> c -> d at max_depth=3 reaches d (depth 3), not beyond.""" + a = tmp_path / "a.md" + b = tmp_path / "b.md" + c = tmp_path / "c.md" + d = tmp_path / "d.md" + e = tmp_path / "e.md" + a.write_text("[b](b.md)\n", encoding="utf-8") + b.write_text("[c](c.md)\n", encoding="utf-8") + c.write_text("[d](d.md)\n", encoding="utf-8") + d.write_text("[e](e.md)\n", encoding="utf-8") + e.write_text("# e\n", encoding="utf-8") + + edges = walk_markdown_links({a: "main"}, tmp_path, {a}, max_depth=3) + reached = {edge.target for edge in edges} + assert d.resolve() in reached + assert e.resolve() not in reached From 61b2deae6cefea646d14d5beec3e6dc105a658cb Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 15:00:10 +0200 Subject: [PATCH 13/30] =?UTF-8?q?docs:=20backfill=200.5.10=20surfaces=20?= =?UTF-8?q?=E2=80=94=20capability=20focus,=20per-rule=20thresholds,=20gene?= =?UTF-8?q?ric=20scanning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #24 --- docs/agent-support.md | 6 +++--- docs/configuration.md | 31 +++++++++++++++++++++++++++++-- docs/getting-started.md | 20 ++++++++++++++++++-- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/docs/agent-support.md b/docs/agent-support.md index 3eb6289..d26cef6 100644 --- a/docs/agent-support.md +++ b/docs/agent-support.md @@ -1,8 +1,8 @@ --- title: "Agent Support" description: "Which agents are recognized and what's covered" -version: "0.5.6" -last_updated: 2026-05-04 +version: "0.5.10" +last_updated: 2026-05-18 --- # Agent Support @@ -17,7 +17,7 @@ Reporails recognizes the instruction-file conventions of five coding agents and | Codex | `AGENTS.md` (+ optional `AGENTS.override.md`) | `.codex/rules/*.rules` | `.agents/skills/**/SKILL.md` | `.codex/agents/*.toml` | hooks, `.codex/config.toml`, skill metadata (`agents/openai.yaml`) | | Copilot (VS Code) | `.github/copilot-instructions.md` or `**/AGENTS.md` | `.github/instructions/**/*.instructions.md`, `.claude/rules/**/*.md` | `.github/skills/`, `.claude/skills/`, `.agents/skills/` | `.github/agents/*.agent.md` | hooks, prompts, MCP | | Cursor | `**/AGENTS.md` (`.cursorrules` recognized but legacy) | `.cursor/rules/**/*.mdc`, `.cursor/rules/**/*.md` | `.cursor/skills/`, `.claude/skills/`, `.codex/skills/` | `.cursor/agents/*.md`, `.claude/agents/*.md`, `.codex/agents/*.md` | hooks, MCP, managed policy, bugbot rules | -| Gemini | `GEMINI.md` or `**/AGENTS.md` | (no dedicated rules surface) | `.gemini/skills/**/SKILL.md` | `.gemini/agents/*.md` | commands, extensions, settings, hooks | +| Gemini | `GEMINI.md` or `**/AGENTS.md` | (no dedicated rules surface) | `.gemini/skills/**/SKILL.md` | `.gemini/agents/*.md` | commands, extensions, settings, hooks, memory (section inside `~/.gemini/GEMINI.md`), MCP, system_prompt, geminiignore | Many agents intentionally read each other's directories — Cursor's skills column, for example, includes `.claude/skills/` and `.codex/skills/` because Cursor invokes skills regardless of which agent first authored them. The cells above show the most common project-level patterns; user-level and system-level patterns are also recognized — see [What gets scanned](#what-gets-scanned). diff --git a/docs/configuration.md b/docs/configuration.md index a3c72d5..4900b67 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,8 +1,8 @@ --- title: "Configuration" description: "Disabling rules, project / global config, exclude paths" -version: "0.5.7" -last_updated: 2026-05-06 +version: "0.5.10" +last_updated: 2026-05-18 --- # Configuration @@ -154,6 +154,33 @@ When `ails config set …` writes `.ails/config.yml`, it also writes `.ails/.git config.local.yml ``` +## Per-rule thresholds + +Some rules ship with a built-in `min_lines` gate so small files do not get flagged for issues that only matter at scale. For example, `CORE:S:0013 scope-fields-in-frontmatter` ships with `min_lines: 30` — a 5-line rule file won't fail it. You can raise or lower the threshold per project under `overrides.rule_thresholds`: + +```yaml +# .ails/config.yml +overrides: + rule_thresholds: + CORE:S:0013: + min_lines: 50 # require 50+ lines before this rule fires + CORE:C:0034: + min_lines: 0 # always fire, even on tiny files +``` + +Any deterministic check that declares a `min_lines:` entry in its `checks.yml` can be tuned this way — see `ails explain ` for which rules expose the gate. + +## Generic-class scanning (opt-in) + +By default, `ails check` only validates files that match one of the agent's declared instruction-file patterns. Set `generic_scanning: true` to extend coverage to any reachable Markdown file: + +```yaml +# .ails/config.yml +generic_scanning: true +``` + +When on, the discovery walker follows inline and reference Markdown links out of classified files (bounded depth, cycle-safe, tree-bound), and any reached `.md` file inside your repo gets a `generic` classification. Structural and formatting rules (charge ordering, direction imbalance, formatting hygiene) still fire on these files; main-shape rules (tech stack, MCP docs) do not. Default is off so anonymous tryouts against third-party repos stay quiet. + ## Severity overrides Severity is what makes a finding "critical" vs "info". Default severity comes from the rule itself; you can override it per project: diff --git a/docs/getting-started.md b/docs/getting-started.md index f8e2b97..1cec7dc 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -1,8 +1,8 @@ --- title: "Getting Started" description: "Install, first run, what the output means" -version: "0.5.6" -last_updated: 2026-05-04 +version: "0.5.10" +last_updated: 2026-05-18 --- # Getting Started @@ -105,6 +105,22 @@ ails check --agent claude # only run rules scoped to one agent The JSON output groups findings under `files{path: {findings: [...], count: N}}` plus aggregate `stats` and (when present) `cross_file` blocks — see [Configuration → Output format](configuration.md#output-format) for the full shape, including which fields are tier-conditional. +## Focus on one file or capability + +When the whole-repo view is too noisy, name the capability and (optionally) the target: + +```bash +ails check skill backlog # focus on .claude/skills/backlog/SKILL.md +ails check rule git # focus on .claude/rules/git.md +ails check agent rule-writer + # subagent + any skills its frontmatter preloads +ails check skill # listing mode — table of all skills with scores +``` + +The full pipeline still runs (so cross-file rules see the whole project), but only the focused file or capability appears in the output, with findings grouped by rule and a `Next:` action pointer. Listing mode (`ails check ` with no name) prints a per-target score table for that capability under the detected agent. Capability names come from the agent's declared `file_types:` — both singular and plural are accepted. + +The whole-repo summary also shows a `Top rules (by finding count)` block — a fast triage view of which rule classes contribute the most findings across your project. + ## Next steps - [Score Guide](score-guide.md) — what the number means in practice From 62df2d0d5bd5b60634b9fb1d379fa82061b7610f Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 15:01:04 +0200 Subject: [PATCH 14/30] tests: wrap E501 docstring in test_symlink_detection TestWalkGlobFollowsSymlinkedDirs --- UNRELEASED.md | 1 + tests/unit/test_symlink_detection.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index c51d932..2195a37 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -28,5 +28,6 @@ - check: Mapper daemon now stays attached across `ails check` invocations instead of forcing every run to load ML models in-process. Three issues in `core/mapper/daemon.py`: `is_daemon_running` requires the socket file to exist alongside the PID (a stuck `ails check`-turned-daemon used to keep its PID alive indefinitely, fooling every new run into seeing a "running" daemon and falling back); `_become_daemon`'s FD-close loop narrowed to FIFO/pipe FDs via `S_ISFIFO` instead of indiscriminate `range(3, 1024)` — closing all FDs killed numpy / onnxruntime compiled-extension FDs imported pre-fork, breaking the daemon's first `map_ruleset` with `ImportError: import numpy failed`; SIGPIPE set to `SIG_IGN` in `_daemon_main` so a client disconnect mid-response can't terminate the daemon via the default signal handler. Warm `ails check` against a 27-file sample now runs ~5.6 s daemon-attached instead of falling through to ~8-9 s in-process. - discovery: `walk_glob` in `core/discovery/agent_discovery.py` now follows symlinked directories during descendant traversal so files inside symlinked subdirs are visible to whole-repo discovery. Cycle protection via canonical inode tracking ensures each physical directory is entered at most once. Aligns whole-repo discovery with the `glob.glob(..., recursive=True)` behavior used by per-capability listing. - rules: `CORE:S:0024 import-targets-resolve` and `CORE:S:0056 broken-markdown-link` now declare `match: {format: [freeform, frontmatter]}` so they fire on SKILL.md / `.claude/agents/*.md` / `.claude/rules/*.md` files. Prior `{format: freeform}` constraint excluded frontmatter-bearing instruction files from import-resolution and broken-link coverage even though the agent schema characterizes those file types as `format: [frontmatter, freeform]`. +- tests: Wrapped the `TestWalkGlobFollowsSymlinkedDirs` class docstring in `tests/unit/test_symlink_detection.py` to satisfy `ruff` E501; no behavior change. ### Removed diff --git a/tests/unit/test_symlink_detection.py b/tests/unit/test_symlink_detection.py index 253d25a..9640ddf 100644 --- a/tests/unit/test_symlink_detection.py +++ b/tests/unit/test_symlink_detection.py @@ -247,7 +247,10 @@ def test_extra_targets_none_is_noop(self, tmp_path: Path) -> None: class TestWalkGlobFollowsSymlinkedDirs: - """Regression: `walk_glob` must descend into symlinked directories so files inside them are visible to whole-repo discovery.""" + """Regression: `walk_glob` descends into symlinked directories. + + Files inside symlinked dirs must be visible to whole-repo discovery. + """ @pytest.mark.unit @pytest.mark.subsys_lint From 9e4fc4b3c91868a923886823acbd2a400b4d98b5 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 18:37:06 +0200 Subject: [PATCH 15/30] agents: exclude CORE:S:0024 import-targets-resolve for codex+copilot per per-agent @-import semantics Refs #26 --- UNRELEASED.md | 1 + framework/rules/codex/config.yml | 1 + framework/rules/copilot/config.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/UNRELEASED.md b/UNRELEASED.md index 2195a37..ce0209d 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -17,6 +17,7 @@ ### Changed - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. - check: `[PATH]` positional argument is now `[ARG1] [ARG2]` — `ARG1` is sniffed as a capability keyword first, falling through to existing path semantics. No behaviour change for `ails check`, `ails check .`, or `ails check `. +- agents: Added `CORE:S:0024 import-targets-resolve` to `codex` and `copilot` agent `excludes:` lists — neither agent's instruction files support `@` import syntax per their official documentation, so the rule has no antipattern to detect in those agents. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/framework/rules/codex/config.yml b/framework/rules/codex/config.yml index 83f1025..22248f8 100644 --- a/framework/rules/codex/config.yml +++ b/framework/rules/codex/config.yml @@ -278,4 +278,5 @@ file_types: excludes: - CLAUDE:* - COPILOT:* + - CORE:S:0024 # import-targets-resolve — Codex AGENTS.md does not honor @ import syntax (single-file format per developers.openai.com/codex/guides/agents-md) - CORE:S:0033 # import-depth-within-limit — Codex AGENTS.md does not support chained @import diff --git a/framework/rules/copilot/config.yml b/framework/rules/copilot/config.yml index 3697bec..f1fc672 100644 --- a/framework/rules/copilot/config.yml +++ b/framework/rules/copilot/config.yml @@ -190,4 +190,5 @@ file_types: excludes: - CLAUDE:* - CODEX:* + - CORE:S:0024 # import-targets-resolve — Copilot instruction files have no documented @ import syntax (per docs.github.com/en/copilot/how-tos/configure-custom-instructions/add-repository-instructions) - CORE:S:0033 # import-depth-within-limit — Copilot instructions files do not support chained @import From fbd674e0ceec51eb1c009cb52dd59d725b8e4cdb Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 22:42:18 +0200 Subject: [PATCH 16/30] =?UTF-8?q?rules:=20bump=20severity=20medium?= =?UTF-8?q?=E2=86=92high=20for=20broken-link=20/=20import=20rules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #26 --- UNRELEASED.md | 1 + .../claude/import-depth-within-limit/rule.md | 2 +- .../rules/core/broken-markdown-link/rule.md | 2 +- .../core/import-depth-within-limit/rule.md | 2 +- .../rules/core/import-targets-resolve/rule.md | 2 +- .../cursor/import-depth-within-limit/rule.md | 2 +- src/reporails_cli/core/lint/content_checker.py | 4 +++- src/reporails_cli/core/lint/rule_runner.py | 18 +++++++++++++++++- 8 files changed, 26 insertions(+), 7 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index ce0209d..f76f0dc 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -18,6 +18,7 @@ - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. - check: `[PATH]` positional argument is now `[ARG1] [ARG2]` — `ARG1` is sniffed as a capability keyword first, falling through to existing path semantics. No behaviour change for `ails check`, `ails check .`, or `ails check `. - agents: Added `CORE:S:0024 import-targets-resolve` to `codex` and `copilot` agent `excludes:` lists — neither agent's instruction files support `@` import syntax per their official documentation, so the rule has no antipattern to detect in those agents. +- rules: `CORE:S:0024 import-targets-resolve`, `CORE:S:0033 import-depth-within-limit`, and `CORE:S:0056 broken-markdown-link` severity raised from `medium` to `high` — broken includes, links, and over-depth chains are functional context gaps (referenced content silently fails to load), not stylistic warnings. `CLAUDE:S:0010` and `CURSOR:S:0002` per-agent supersedes updated to match. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/framework/rules/claude/import-depth-within-limit/rule.md b/framework/rules/claude/import-depth-within-limit/rule.md index 1e1417f..5293bca 100644 --- a/framework/rules/claude/import-depth-within-limit/rule.md +++ b/framework/rules/claude/import-depth-within-limit/rule.md @@ -4,7 +4,7 @@ slug: import-depth-within-limit title: Import Depth Within Limit category: structure type: mechanical -severity: medium +severity: high match: {type: main} supersedes: CORE:S:0033 source: https://code.claude.com/docs/en/memory#import-additional-files diff --git a/framework/rules/core/broken-markdown-link/rule.md b/framework/rules/core/broken-markdown-link/rule.md index aa27864..37904a8 100644 --- a/framework/rules/core/broken-markdown-link/rule.md +++ b/framework/rules/core/broken-markdown-link/rule.md @@ -4,7 +4,7 @@ slug: broken-markdown-link title: Markdown Link Targets Resolve category: structure type: mechanical -severity: medium +severity: high backed_by: [] match: {format: [freeform, frontmatter]} --- diff --git a/framework/rules/core/import-depth-within-limit/rule.md b/framework/rules/core/import-depth-within-limit/rule.md index bf08e02..9ba24e1 100644 --- a/framework/rules/core/import-depth-within-limit/rule.md +++ b/framework/rules/core/import-depth-within-limit/rule.md @@ -4,7 +4,7 @@ slug: import-depth-within-limit title: Import Depth Within Limit category: structure type: mechanical -severity: medium +severity: high match: {type: main} source: https://code.claude.com/docs/en/memory#import-additional-files --- diff --git a/framework/rules/core/import-targets-resolve/rule.md b/framework/rules/core/import-targets-resolve/rule.md index 9110ded..b394468 100644 --- a/framework/rules/core/import-targets-resolve/rule.md +++ b/framework/rules/core/import-targets-resolve/rule.md @@ -4,7 +4,7 @@ slug: import-targets-resolve title: Import Targets Resolve category: structure type: mechanical -severity: medium +severity: high backed_by: [developer-context-cursor-study] match: {format: [freeform, frontmatter]} --- diff --git a/framework/rules/cursor/import-depth-within-limit/rule.md b/framework/rules/cursor/import-depth-within-limit/rule.md index 997e445..84507a1 100644 --- a/framework/rules/cursor/import-depth-within-limit/rule.md +++ b/framework/rules/cursor/import-depth-within-limit/rule.md @@ -4,7 +4,7 @@ slug: import-depth-within-limit title: Import Depth Within Limit category: structure type: mechanical -severity: medium +severity: high match: {type: main} supersedes: CORE:S:0033 source: https://cursor.com/docs/rules diff --git a/src/reporails_cli/core/lint/content_checker.py b/src/reporails_cli/core/lint/content_checker.py index 1eaaf2d..80a98ae 100644 --- a/src/reporails_cli/core/lint/content_checker.py +++ b/src/reporails_cli/core/lint/content_checker.py @@ -71,10 +71,12 @@ def _evaluate_check( message = f"Content check failed: {check.query} (expect={check.expect})" display_path = _relative_path(target_files[0] if target_files else primary_file) + from reporails_cli.core.lint.rule_runner import _to_display_severity + return LocalFinding( file=display_path, line=1, - severity=rule.severity.value, + severity=_to_display_severity(rule.severity.value), rule=rule.id, message=message, source="content_query", diff --git a/src/reporails_cli/core/lint/rule_runner.py b/src/reporails_cli/core/lint/rule_runner.py index 01412d0..4598751 100644 --- a/src/reporails_cli/core/lint/rule_runner.py +++ b/src/reporails_cli/core/lint/rule_runner.py @@ -22,6 +22,22 @@ _SEVERITY_ORDER = {"error": 0, "warning": 1, "info": 2} +def _to_display_severity(raw: str) -> str: + """Normalize a Severity enum value to the display vocabulary. + + `Rule.severity` and `Check.severity` carry the SARIF-adjacent + `critical`/`high`/`medium`/`low`/`info` vocabulary; the merger and + text formatters only count `error`/`warning`/`info`. Mirrors the + translation already applied to deterministic regex findings in + `core/lint/regex/compiler.py`. + """ + if raw in ("error", "critical", "high"): + return "error" + if raw == "info": + return "info" + return "warning" + + def _collect_mechanical_findings( rules: dict[str, Rule], project_dir: Path, @@ -45,7 +61,7 @@ def _collect_mechanical_findings( LocalFinding( file=file_path, line=line, - severity=v.severity.value, + severity=_to_display_severity(v.severity.value), rule=v.rule_id, message=v.message, source="m_probe", From dda9c361aa4e14b83620cf630ccaf98dce4b2e32 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 22:43:26 +0200 Subject: [PATCH 17/30] classify: capability listing honors exclude_dirs, delegates memory to memory_locator, folds main/memories aliases Refs #24 --- UNRELEASED.md | 3 + .../core/classify/capability_paths.py | 124 ++++++++++++++++-- src/reporails_cli/interfaces/cli/main.py | 23 +++- 3 files changed, 137 insertions(+), 13 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index f76f0dc..3382377 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -13,6 +13,7 @@ - check: Per-agent memory entry locator at `src/reporails_cli/core/discovery/memory_locator.py` — data-driven adapter that enumerates memory entries per agent (claude: `*.md` files inside `~/.claude/projects/*/memory/`, `.claude/agent-memory//`, `.claude/agent-memory-local//`; gemini: `## Gemini Added Memories` section inside `~/.gemini/GEMINI.md`). Returns `MemoryEntry` records with `agent`, `path`, optional `section`, and `body`. Consumed by the L3 memory rules without per-agent branches. - classify: Link-reached generic files now record their source attribution on `ClassifiedFile.properties` — `loading_verb` ({read, imported, auto_loaded, invoked}), `link_source_type` (the linking file's `file_type` — main, rule, skill, agent, memory, subagent_memory, nested_context), `link_source_path` (project-relative paths of the linking files), and `link_depth` (1-3 from the instruction-file seed). `FileMatch` gains matching `loading_verb` and `link_source_type` fields for rule routing. Rule applicability for generic files is unchanged in this release. - codex: New `memory` file_type declared as a tombstone — `~/.codex/memories/` holds generated state controlled via the `/memories` slash command and `config.toml` keys (`memories.generate_memories`, `memories.use_memories`, `memories.disable_on_external_context`, etc.), not user-authored markdown. No patterns to glob; surfaces in the agent registry but invites no rule pressure. +- check: Capability-name aliases for `memory|memories`, `subagent_memory|subagent_memories`, `nested_context|nested_contexts`. `ails check memory` (singular) and `ails check memories` (plural) both resolve. The memories alias folds `memory` and `subagent_memory` file_types into one listing; the main alias folds `main` and `nested_context`. Data-driven sing/plural map remains a follow-up. ### Changed - auth: Set explicit `User-Agent: reporails-cli/ (auth)` header on platform and GitHub requests so identifiable CLI traffic can be allow-listed at the edge. @@ -31,5 +32,7 @@ - discovery: `walk_glob` in `core/discovery/agent_discovery.py` now follows symlinked directories during descendant traversal so files inside symlinked subdirs are visible to whole-repo discovery. Cycle protection via canonical inode tracking ensures each physical directory is entered at most once. Aligns whole-repo discovery with the `glob.glob(..., recursive=True)` behavior used by per-capability listing. - rules: `CORE:S:0024 import-targets-resolve` and `CORE:S:0056 broken-markdown-link` now declare `match: {format: [freeform, frontmatter]}` so they fire on SKILL.md / `.claude/agents/*.md` / `.claude/rules/*.md` files. Prior `{format: freeform}` constraint excluded frontmatter-bearing instruction files from import-resolution and broken-link coverage even though the agent schema characterizes those file types as `format: [frontmatter, freeform]`. - tests: Wrapped the `TestWalkGlobFollowsSymlinkedDirs` class docstring in `tests/unit/test_symlink_detection.py` to satisfy `ruff` E501; no behavior change. +- discovery: Capability-listing path (`ails check `) now honors `.ails/config.yml: exclude_dirs` — `list_capability_targets` accepts and applies the exclude set, mirroring the whole-repo discovery filter. Previously the listing bypassed the config and surfaced matches inside excluded directories. +- discovery: `ails check memory` (and `memories`) now enumerates `~/.claude/projects//memory/` entries via `memory_locator.memory_entries_for_agent` instead of returning 0 — the glob path silently dropped user-scope patterns starting with `~/`. ### Removed diff --git a/src/reporails_cli/core/classify/capability_paths.py b/src/reporails_cli/core/classify/capability_paths.py index 0be1c76..a946c29 100644 --- a/src/reporails_cli/core/classify/capability_paths.py +++ b/src/reporails_cli/core/classify/capability_paths.py @@ -24,6 +24,20 @@ "rule": "rules", "agent": "agents", "command": "commands", + "memory": "memories", + "subagent_memory": "subagent_memories", + "nested_context": "nested_contexts", +} + +# Capabilities that fold into a primary bucket for the redesigned display. +# `ails check main` resolves to files of either type; `ails check memories` +# enumerates both memory and subagent_memory entries. Tuple members are the +# config keys each agent might use — claude declares `child_instruction`, +# most other agents declare `nested_context` for the nested subtree shape. +_CAPABILITY_FOLD: dict[str, tuple[str, ...]] = { + "main": ("main", "nested_context", "child_instruction"), + "memories": ("memory", "subagent_memory"), + "memory": ("memory", "subagent_memory"), } @@ -33,12 +47,21 @@ def available_capabilities(agent: str, project_root: Path | None = None) -> list def canonicalize_capability(arg: str, agent: str, project_root: Path | None = None) -> str | None: - """Map a user-facing capability keyword (singular or plural) to the agent's config key, or None.""" + """Map a user-facing capability keyword (singular or plural) to the agent's config key, or None. + + For fold-source aliases (`memories`, `memory`), returns the alias itself + when any member of the fold tuple is declared by the agent — the + listing path walks the fold tuple. For non-fold aliases, returns the + singular config key declared by the agent. + """ if not arg: return None decls = available_capabilities(agent, project_root) if arg in decls: return arg + fold = _CAPABILITY_FOLD.get(arg) + if fold and any(f in decls for f in fold): + return arg plural = _CAPABILITY_SINGULAR_TO_PLURAL.get(arg) if plural and plural in decls: return plural @@ -62,17 +85,75 @@ def list_capability_targets( agent: str, capability: str, project_root: Path, + exclude_dirs: list[str] | tuple[str, ...] | None = None, ) -> list[Path]: """Enumerate files matching `capability` for `agent` under `project_root`. Globs the project-scope patterns from the agent's ``file_types:`` - declaration. Returns absolute paths. Returns an empty list when the - agent has no `capability` declared. + declaration, honoring `.ails/config.yml: exclude_dirs` via + `exclude_dirs`. Returns absolute paths. Returns an empty list when + the agent has no `capability` declared. + + Fold-source aliases (``main``, ``memories``, ``memory``) union the + enumeration of every member declared by the agent. Memory file_types + whose patterns target ``~/.claude/...`` delegate to + `memory_locator.memory_entries_for_agent` so user-scope entries + surface in the listing. """ - decl = _find_declaration(agent, capability, project_root) - if decl is None: - return [] - return _glob_patterns(decl.patterns, project_root) + out: list[Path] = [] + seen: set[Path] = set() + for ft_name in _resolve_fold(agent, capability, project_root): + decl = _find_declaration(agent, ft_name, project_root) + if decl is None: + continue + if _is_user_scope_memory(ft_name, decl.patterns): + paths = _user_scope_memory_paths(agent, project_root) + else: + paths = _glob_patterns(decl.patterns, project_root, exclude_dirs) + for path in paths: + resolved = _safe_resolve(path) + if resolved in seen: + continue + seen.add(resolved) + out.append(path) + return out + + +def _resolve_fold(agent: str, capability: str, project_root: Path) -> tuple[str, ...]: + """Return the fold tuple for `capability`, restricted to declared types.""" + decls = available_capabilities(agent, project_root) + fold = _CAPABILITY_FOLD.get(capability) + if fold is None: + return (capability,) if capability in decls else () + return tuple(f for f in fold if f in decls) + + +def _is_user_scope_memory(ft_name: str, patterns: tuple[str, ...]) -> bool: + """True when the declared file_type names a user-scope memory directory. + + The capability-listing path can't reach `~/.claude/projects/*/memory/` + via `glob.glob(project_root / pattern)` because the pattern is absolute + once expanded. `memory_locator` already knows how to walk the + per-project memory directory — delegate when the file_type name is + `memory` or `subagent_memory` AND any pattern starts with `~/`. + """ + if ft_name not in ("memory", "subagent_memory"): + return False + return any(p.startswith("~/") for p in patterns) + + +def _user_scope_memory_paths(agent: str, project_root: Path) -> list[Path]: + """Resolve user-scope memory entries via memory_locator.""" + from reporails_cli.core.discovery.memory_locator import memory_entries_for_agent + + return [entry.path for entry in memory_entries_for_agent(agent, project_root)] + + +def _safe_resolve(path: Path) -> Path: + try: + return path.resolve() + except OSError: + return path def resolve_capability( @@ -80,6 +161,7 @@ def resolve_capability( capability: str, name: str, project_root: Path, + exclude_dirs: list[str] | tuple[str, ...] | None = None, ) -> Path | None: """Resolve `(agent, capability, name)` to a canonical file path. @@ -90,12 +172,13 @@ def resolve_capability( (e.g. `.claude/skills/backlog/SKILL.md` → `backlog`). - `rules` / `agents` / `commands` / `config`: file stem (`.claude/rules/git.md` → `git`). + - `memory` / `memories`: file stem (memory entry filename minus `.md`). - `main` / `override`: filename match against `name` (rarely used with an explicit name). Returns the first match, or None when no candidate matches. """ - candidates = list_capability_targets(agent, capability, project_root) + candidates = list_capability_targets(agent, capability, project_root, exclude_dirs) extractor = _name_extractor_for(capability) for candidate in candidates: if extractor(candidate) == name: @@ -114,7 +197,11 @@ def _find_declaration( return None -def _glob_patterns(patterns: tuple[str, ...], project_root: Path) -> list[Path]: +def _glob_patterns( + patterns: tuple[str, ...], + project_root: Path, + exclude_dirs: list[str] | tuple[str, ...] | None = None, +) -> list[Path]: """Expand glob patterns under project_root. Skips user/managed-scope patterns. The `FileTypeDeclaration.patterns` tuple comes from `_extract_patterns` @@ -123,6 +210,11 @@ def _glob_patterns(patterns: tuple[str, ...], project_root: Path) -> list[Path]: the project tree — drop patterns that start with `~/`, an absolute path outside `project_root`, or `/etc/`-style managed locations. + `exclude_dirs` mirrors `.ails/config.yml: exclude_dirs` — any matched + path whose ancestor-chain (relative to project_root) contains a + directory name in the set is filtered out so listing-mode matches + full-project discovery. + Symlink handling: paths are kept in their pre-resolve form so a project symlink (e.g. `.claude/` linked to a hub directory) surfaces files under the project's path even though the underlying inode is @@ -130,6 +222,7 @@ def _glob_patterns(patterns: tuple[str, ...], project_root: Path) -> list[Path]: symlinks) are deduped via the resolved path. """ seen_resolved: set[Path] = set() + excl_set = set(exclude_dirs or ()) out: list[Path] = [] for pattern in patterns: if _is_external_pattern(pattern): @@ -138,6 +231,8 @@ def _glob_patterns(patterns: tuple[str, ...], project_root: Path) -> list[Path]: path = Path(match) if not path.is_file(): continue + if _is_under_excluded_dir(path, project_root, excl_set): + continue resolved = path.resolve() if resolved in seen_resolved: continue @@ -146,6 +241,17 @@ def _glob_patterns(patterns: tuple[str, ...], project_root: Path) -> list[Path]: return out +def _is_under_excluded_dir(path: Path, project_root: Path, excl: set[str]) -> bool: + """True when any ancestor dir name (relative to project_root) is in `excl`.""" + if not excl: + return False + try: + rel = path.relative_to(project_root) + except ValueError: + return False + return any(part in excl for part in rel.parts[:-1]) + + def _is_external_pattern(pattern: str) -> bool: if pattern.startswith(("~", "/")): return True diff --git a/src/reporails_cli/interfaces/cli/main.py b/src/reporails_cli/interfaces/cli/main.py index 399d9b2..866a3b1 100644 --- a/src/reporails_cli/interfaces/cli/main.py +++ b/src/reporails_cli/interfaces/cli/main.py @@ -264,7 +264,7 @@ def check( # noqa: C901 # pylint: disable=too-many-locals # 7. Compute focus paths for per-capability mode focus_paths, listing_candidates = _resolve_focus_targets( - capability_mode, capability, capability_name, effective_agent, project_root + capability_mode, capability, capability_name, effective_agent, project_root, excl ) # 8. Display dispatch @@ -299,6 +299,7 @@ def _resolve_focus_targets( capability_name: str, effective_agent: str, project_root: Path, + exclude_dirs: list[str] | tuple[str, ...] | None = None, ) -> tuple[set[Path], list[Path]]: """Compute focus_paths (single-target) or listing_candidates (no name) for capability mode.""" from reporails_cli.core.classify.capability_paths import ( @@ -310,7 +311,7 @@ def _resolve_focus_targets( if not capability_mode: return set(), [] - if capability not in available_capabilities(effective_agent, project_root): + if not _capability_declared(capability, effective_agent, project_root): console.print( f"[red]Error:[/red] capability [bold]{capability}[/bold] is not declared " f"for agent [bold]{effective_agent}[/bold]. " @@ -318,10 +319,10 @@ def _resolve_focus_targets( ) raise typer.Exit(2) if not capability_name: - return set(), list_capability_targets(effective_agent, capability, project_root) + return set(), list_capability_targets(effective_agent, capability, project_root, exclude_dirs) resolved = resolve_capability(effective_agent, capability, capability_name, project_root) if resolved is None: - available = list_capability_targets(effective_agent, capability, project_root) + available = list_capability_targets(effective_agent, capability, project_root, exclude_dirs) console.print( f"[red]Error:[/red] no {capability} named [bold]{capability_name}[/bold] " f"for agent [bold]{effective_agent}[/bold] under {project_root}." @@ -335,6 +336,20 @@ def _resolve_focus_targets( return focus_paths, [] +def _capability_declared(capability: str, effective_agent: str, project_root: Path) -> bool: + """True when `capability` (or any fold-source it resolves to) is declared for the agent.""" + from reporails_cli.core.classify.capability_paths import ( + _CAPABILITY_FOLD, + available_capabilities, + ) + + decls = available_capabilities(effective_agent, project_root) + if capability in decls: + return True + fold = _CAPABILITY_FOLD.get(capability) + return bool(fold and any(f in decls for f in fold)) + + def _focus_paths_to_strings(focus_paths: set[Path], project_root: Path) -> set[str]: return {str(p.relative_to(project_root)) if p.is_relative_to(project_root) else str(p) for p in focus_paths} From 23ac32e330673a15c536af8775f0de6a77310a4a Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 22:55:51 +0200 Subject: [PATCH 18/30] check: capability focus/listing reuse whole-repo display via filtered result + ruleset_map Refs #24 --- UNRELEASED.md | 1 + src/reporails_cli/formatters/text/focus.py | 340 ++------------------- src/reporails_cli/interfaces/cli/main.py | 94 ++---- 3 files changed, 63 insertions(+), 372 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 3382377..083c614 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -20,6 +20,7 @@ - check: `[PATH]` positional argument is now `[ARG1] [ARG2]` — `ARG1` is sniffed as a capability keyword first, falling through to existing path semantics. No behaviour change for `ails check`, `ails check .`, or `ails check `. - agents: Added `CORE:S:0024 import-targets-resolve` to `codex` and `copilot` agent `excludes:` lists — neither agent's instruction files support `@` import syntax per their official documentation, so the rule has no antipattern to detect in those agents. - rules: `CORE:S:0024 import-targets-resolve`, `CORE:S:0033 import-depth-within-limit`, and `CORE:S:0056 broken-markdown-link` severity raised from `medium` to `high` — broken includes, links, and over-depth chains are functional context gaps (referenced content silently fails to load), not stylistic warnings. `CLAUDE:S:0010` and `CURSOR:S:0002` per-agent supersedes updated to match. +- check: Capability focus (`ails check skill backlog`) and capability listing (`ails check skills`) now render with the same shape as the whole-repo `ails check` — same surface sections, file cards, surface-health bars, scorecard — just filtered to the focused subset. The standalone focus and listing layouts in `formatters/text/focus.py` were dropped; both modes go through `print_text_result` with the `CombinedResult` and `RulesetMap` filtered to the focus paths. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/src/reporails_cli/formatters/text/focus.py b/src/reporails_cli/formatters/text/focus.py index 1ef5f75..1abe6d5 100644 --- a/src/reporails_cli/formatters/text/focus.py +++ b/src/reporails_cli/formatters/text/focus.py @@ -1,323 +1,24 @@ -"""Focus-mode renderer for per-capability `ails check`. +"""Focus filters for capability-mode `ails check`. -When a capability target resolves to a small set of files, the per-group -scorecard is overkill — the operator wants every finding for those files -grouped by rule, plus a "next action" pointer. This renderer is the -output of `ails check `. +Capability mode (`ails check []`) reuses the standard +whole-repo renderer (`print_text_result`); these helpers narrow the +`CombinedResult` and `RulesetMap` to the focused subset of files so the +rendered output has the same shape with fewer rows. """ from __future__ import annotations from collections import Counter -from collections.abc import Callable from pathlib import Path from typing import Any -from rich.console import Console - -from reporails_cli.formatters.text.display_constants import get_term_width -from reporails_cli.formatters.text.scorecard import ( - _RULE_SEVERITY_LABEL, - _RULE_SEVERITY_RANK, -) - -console = Console() - - -def print_focus_result( - result: Any, - capability: str, - name: str, - agent: str, - focus_paths: set[Path], - project_root: Path, - elapsed_ms: float, - ruleset_map: Any = None, -) -> None: - """Render the focus-mode output block. - - Layout: - Reporails — () - - - Score: X.X / 10 ▓▓▓... - - - Findings by rule (N): - RULE_ID xcount severity message - line refs - - Cross-file: involving this file … (when present) - - Next: fix RULE_ID (xcount) — highest-frequency warning. - """ - rel_paths = sorted(_to_rel(p, project_root) for p in focus_paths) - findings = [f for f in result.findings if f.file in {str(p) for p in rel_paths}] - - header = f"[bold]Reporails[/bold] — {capability} {name}".rstrip() - if agent: - header += f" ([dim]{agent}[/dim])" - console.print() - console.print(header) - console.print() - - if len(rel_paths) == 1: - _render_single_file(rel_paths[0], findings, result, ruleset_map) - else: - _render_multi_file(rel_paths, findings, result, ruleset_map) - - _render_findings_by_rule(findings) - _render_cross_file_for_focus(result, rel_paths) - _render_next_action(findings) - - if elapsed_ms: - console.print() - console.print(f" [dim]({elapsed_ms / 1000:.1f}s)[/dim]") - - -def _render_single_file( - rel_path: Path, - findings: list[Any], - result: Any, - ruleset_map: Any, -) -> None: - file_atoms = _atoms_for_file(ruleset_map, rel_path) - score = _focus_score(findings, len(file_atoms), result) - bar = _bar(score) - console.print(f" [bold]{rel_path}[/bold]") - color = "green" if score >= 7.0 else "yellow" if score >= 4.0 else "red" - console.print(f" Score: [{color} bold]{score:.1f}[/{color} bold] / 10 [dim]{bar}[/dim]") - summary = _atom_summary(file_atoms) - if summary: - console.print(f" [dim]{summary}[/dim]") - - -def _render_multi_file( - rel_paths: list[Path], - findings: list[Any], - result: Any, - ruleset_map: Any, -) -> None: - per_file: dict[str, list[Any]] = {} - for f in findings: - per_file.setdefault(f.file, []).append(f) - name_w = max((len(str(p)) for p in rel_paths), default=20) - for rel_path in rel_paths: - key = str(rel_path) - file_findings = per_file.get(key, []) - file_atoms = _atoms_for_file(ruleset_map, rel_path) - score = _focus_score(file_findings, len(file_atoms), result) - color = "green" if score >= 7.0 else "yellow" if score >= 4.0 else "red" - count = len(file_findings) - console.print( - f" [bold]{key:<{name_w}}[/bold] {count:>3} findings Score: [{color} bold]{score:.1f}[/{color} bold]" - ) - - -def _render_findings_by_rule(findings: list[Any]) -> None: - if not findings: - console.print("\n [green]✓[/green] No findings.") - return - by_rule = _group_by_rule(findings) - tw = get_term_width() - console.print() - console.print(f" [bold]Findings by rule ({len(findings)}):[/bold]") - rule_w = max((len(r) for r in by_rule), default=12) - for rule_id in _order_rules(by_rule): - items = by_rule[rule_id] - severity = _worst_severity(items) - label = _RULE_SEVERITY_LABEL.get(severity, severity) - message = _shorten(items[0].message, tw - rule_w - 24) - console.print(f" [bold]{rule_id:<{rule_w}}[/bold] (x{len(items)}) {label} {message}") - lines = [f.line for f in items if f.line] - if lines: - console.print(f" [dim]L{', L'.join(str(line) for line in sorted(set(lines))[:12])}[/dim]") - - -def _render_cross_file_for_focus(result: Any, rel_paths: list[Path]) -> None: - str_paths = {str(p) for p in rel_paths} - pairs = [cf for cf in (result.cross_file or ()) if cf.file_1 in str_paths or cf.file_2 in str_paths] - if not pairs: - return - n_conflicts = sum(1 for cf in pairs if cf.finding_type == "conflict") - n_reps = sum(1 for cf in pairs if cf.finding_type == "repetition") - bits = [] - if n_conflicts: - bits.append(f"{n_conflicts} conflict" + ("s" if n_conflicts > 1 else "")) - if n_reps: - bits.append(f"{n_reps} repetition" + ("s" if n_reps > 1 else "")) - console.print() - console.print(f" Cross-file: {', '.join(bits)} involving this focus.") - console.print(" [dim]Run `ails check` for the full graph.[/dim]") - - -def _render_next_action(findings: list[Any]) -> None: - if not findings: - return - by_rule = _group_by_rule(findings) - ranked = sorted( - by_rule.items(), - key=lambda kv: (_RULE_SEVERITY_RANK.get(_worst_severity(kv[1]), 3), -len(kv[1])), - ) - if not ranked: - return - rule_id, items = ranked[0] - severity = _worst_severity(items) - severity_word = "error" if severity == "error" else "warning" if severity == "warning" else "finding" - console.print() - console.print( - f" [bold]Next:[/bold] fix [bold]{rule_id}[/bold] (x{len(items)}) — highest-frequency {severity_word}." - ) - - -def _atoms_for_file(ruleset_map: Any, rel_path: Path) -> list[Any]: - if ruleset_map is None: - return [] - key = str(rel_path) - return [a for a in getattr(ruleset_map, "atoms", ()) if a.file_path == key] - - -def _atom_summary(atoms: list[Any]) -> str: - if not atoms: - return "" - charge_counts = Counter(a.charge for a in atoms) - directives = charge_counts.get("DIRECTIVE", 0) + charge_counts.get("IMPERATIVE", 0) - constraints = charge_counts.get("CONSTRAINT", 0) - ambiguous = charge_counts.get("AMBIGUOUS", 0) - n_prose = charge_counts.get("NEUTRAL", 0) - total = max(len(atoms), 1) - prose_pct = round(100 * n_prose / total) - parts = [] - if directives: - parts.append(f"{directives} directive") - if constraints: - parts.append(f"{constraints} constraint") - if ambiguous: - parts.append(f"{ambiguous} ambiguous") - parts.append(f"{prose_pct}% prose") - return " · ".join(parts) - - -def _focus_score(findings: list[Any], n_atoms: int, result: Any) -> float: - if not findings: - return 10.0 - severity_counts = Counter(f.severity for f in findings) - errors = severity_counts.get("error", 0) - warnings = severity_counts.get("warning", 0) - infos = severity_counts.get("info", 0) - - # Reuse compute_score's shape: band base + severity penalty / atom denom. - has_quality = result.quality is not None and bool(getattr(result.quality, "compliance_band", "")) - base = 6.0 - if has_quality: - band = result.quality.compliance_band - base = 8.5 if band == "HIGH" else 5.5 if band == "MODERATE" else 3.0 - denom = max(n_atoms, errors + warnings + infos, 1) - penalty = min(4.0, (errors / denom) * 30) + min(2.0, (warnings / denom) * 2) - return float(round(max(0.0, min(10.0, base - penalty)), 1)) - - -def _bar(score: float) -> str: - bar_width = min(20, get_term_width() - 26) - filled = round(bar_width * score / 10) - return "▓" * filled + "░" * (bar_width - filled) - - -def _group_by_rule(findings: list[Any]) -> dict[str, list[Any]]: - out: dict[str, list[Any]] = {} - for f in findings: - out.setdefault(f.rule, []).append(f) - return out - - -def _worst_severity(items: list[Any]) -> str: - return str(min(items, key=lambda f: _RULE_SEVERITY_RANK.get(f.severity, 3)).severity) - - -def _order_rules(by_rule: dict[str, list[Any]]) -> list[str]: - return sorted( - by_rule, - key=lambda r: (_RULE_SEVERITY_RANK.get(_worst_severity(by_rule[r]), 3), -len(by_rule[r]), r), - ) - - -def _shorten(text: str, width: int) -> str: - if width <= 8: - return text - snippet = text.split(".")[0].split("—")[0].strip() - if len(snippet) <= width: - return snippet - return snippet[: width - 1] + "…" - - -def print_listing_result( - result: Any, - capability: str, - agent: str, - candidate_paths: list[Path], - project_root: Path, - ruleset_map: Any = None, -) -> None: - """Render listing mode: capability + per-target scores. - - Output when the operator runs `ails check skill` (no name): - Reporails — skills (, N found) - - … - Run: ails check to focus on one. - """ - rels = [_to_rel(p, project_root) for p in candidate_paths] - name_extractor = _name_extractor_for_capability(capability) - - console.print() - console.print(f"[bold]Reporails[/bold] — {capability} ([dim]{agent}[/dim], {len(rels)} found)") - console.print() - - if not rels: - console.print(f" [dim]No {capability} files found for agent {agent}.[/dim]") - return - - per_file = _findings_per_file(result.findings) - rows = [] - for path in rels: - key = str(path) - file_findings = per_file.get(key, []) - atoms = _atoms_for_file(ruleset_map, path) - score = _focus_score(file_findings, len(atoms), result) - rows.append((name_extractor(path), key, score)) - - name_w = max((len(name) for name, _, _ in rows), default=12) - path_w = max((len(p) for _, p, _ in rows), default=20) - for name, key, score in sorted(rows, key=lambda r: r[0]): - color = "green" if score >= 7.0 else "yellow" if score >= 4.0 else "red" - console.print( - f" [bold]{name:<{name_w}}[/bold] [dim]{key:<{path_w}}[/dim] [{color}]{score:.1f}[/{color}] / 10" - ) - - console.print() - console.print(f" [dim]Run:[/dim] ails check {capability} ") - - -def _findings_per_file(findings: Any) -> dict[str, list[Any]]: - out: dict[str, list[Any]] = {} - for f in findings: - out.setdefault(f.file, []).append(f) - return out - - -def _name_extractor_for_capability(capability: str) -> Callable[[Path], str]: - parent_dir_caps = {"skills", "nested_context", "child_instruction"} - if capability in parent_dir_caps: - return lambda p: p.parent.name - return lambda p: p.stem - def filter_result_to_focus(result: Any, focus_paths: set[Path], project_root: Path) -> Any: - """Return a new CombinedResult containing only findings + cross-file pairs in the focus. + """Return a CombinedResult restricted to the rows in `focus_paths`. - Used by JSON / GitHub / focus text rendering so the envelope reflects - just the targeted file(s) and the score/Top-rules block can be - recomputed from the focused findings. + Filters findings, cross-file pairs, and per-file analysis so the + standard renderer's surface-health / file-card / scorecard blocks + all reflect the same subset. """ from dataclasses import replace as _replace @@ -326,6 +27,7 @@ def filter_result_to_focus(result: Any, focus_paths: set[Path], project_root: Pa rel_keys = {str(_to_rel(p, project_root)) for p in focus_paths} filtered_findings = tuple(f for f in result.findings if f.file in rel_keys) filtered_cross = tuple(cf for cf in result.cross_file if cf.file_1 in rel_keys or cf.file_2 in rel_keys) + filtered_per_file = tuple(fa for fa in result.per_file_analysis if fa.file in rel_keys) severity_counts = Counter(f.severity for f in filtered_findings) stats = CombinedStats( total_findings=len(filtered_findings), @@ -338,7 +40,27 @@ def filter_result_to_focus(result: Any, focus_paths: set[Path], project_root: Pa client_check_count=result.stats.client_check_count, server_diagnostic_count=result.stats.server_diagnostic_count, ) - return _replace(result, findings=filtered_findings, cross_file=filtered_cross, stats=stats) + return _replace( + result, + findings=filtered_findings, + cross_file=filtered_cross, + stats=stats, + per_file_analysis=filtered_per_file, + ) + + +def filter_ruleset_map_to_paths(ruleset_map: Any, focus_paths: set[Path], project_root: Path) -> Any: + """Return a RulesetMap restricted to `focus_paths` (matching files + their atoms).""" + from dataclasses import replace as _replace + + if ruleset_map is None or not focus_paths: + return ruleset_map + rel_keys = {str(_to_rel(p, project_root)) for p in focus_paths} + abs_keys = {str(p) for p in focus_paths} + keep = rel_keys | abs_keys + filtered_files = tuple(fr for fr in ruleset_map.files if str(fr.path) in keep) + filtered_atoms = tuple(a for a in ruleset_map.atoms if a.file_path in keep) + return _replace(ruleset_map, files=filtered_files, atoms=filtered_atoms) def _to_rel(path: Path, project_root: Path) -> Path: diff --git a/src/reporails_cli/interfaces/cli/main.py b/src/reporails_cli/interfaces/cli/main.py index 866a3b1..5637bd3 100644 --- a/src/reporails_cli/interfaces/cli/main.py +++ b/src/reporails_cli/interfaces/cli/main.py @@ -262,29 +262,23 @@ def check( # noqa: C901 # pylint: disable=too-many-locals ) elapsed_ms = (time.perf_counter() - start_time) * 1000 - # 7. Compute focus paths for per-capability mode - focus_paths, listing_candidates = _resolve_focus_targets( + # 7. Compute focus paths for per-capability mode (single-target OR full capability listing) + focus_paths = _resolve_focus_paths( capability_mode, capability, capability_name, effective_agent, project_root, excl ) - # 8. Display dispatch - display_result = filter_result_to_focus(result, focus_paths, project_root) if focus_paths else result + # 8. Display dispatch — filter result + ruleset_map to focus_paths so the + # standard whole-repo renderer shows the same shape with fewer rows. + from reporails_cli.formatters.text.focus import filter_ruleset_map_to_paths + + if focus_paths: + display_result = filter_result_to_focus(result, focus_paths, project_root) + display_map = filter_ruleset_map_to_paths(ruleset_map, focus_paths, project_root) + else: + display_result = result + display_map = ruleset_map _dispatch_output( - output_format, - display_result, - result, - ruleset_map, - elapsed_ms, - capability_mode, - capability, - capability_name, - effective_agent, - focus_paths, - listing_candidates, - project_root, - ascii, - verbose, - funnel_error, + output_format, display_result, display_map, elapsed_ms, focus_paths, project_root, ascii, verbose, funnel_error ) _show_agent_auto_detect_hint(effective_agent, output_format, assumed, mixed, detected) @@ -293,15 +287,21 @@ def check( # noqa: C901 # pylint: disable=too-many-locals raise typer.Exit(1) -def _resolve_focus_targets( +def _resolve_focus_paths( capability_mode: bool, capability: str, capability_name: str, effective_agent: str, project_root: Path, exclude_dirs: list[str] | tuple[str, ...] | None = None, -) -> tuple[set[Path], list[Path]]: - """Compute focus_paths (single-target) or listing_candidates (no name) for capability mode.""" +) -> set[Path]: + """Return the set of files to display. + + Whole-repo run (no capability arg): empty set. Capability listing + (`ails check skill`): every declared target for that capability. + Capability focus (`ails check skill backlog`): the single resolved + target (plus skill expansion for agents). + """ from reporails_cli.core.classify.capability_paths import ( available_capabilities, list_capability_targets, @@ -310,7 +310,7 @@ def _resolve_focus_targets( from reporails_cli.core.classify.focus_expansion import expand_focus if not capability_mode: - return set(), [] + return set() if not _capability_declared(capability, effective_agent, project_root): console.print( f"[red]Error:[/red] capability [bold]{capability}[/bold] is not declared " @@ -319,7 +319,7 @@ def _resolve_focus_targets( ) raise typer.Exit(2) if not capability_name: - return set(), list_capability_targets(effective_agent, capability, project_root, exclude_dirs) + return set(list_capability_targets(effective_agent, capability, project_root, exclude_dirs)) resolved = resolve_capability(effective_agent, capability, capability_name, project_root) if resolved is None: available = list_capability_targets(effective_agent, capability, project_root, exclude_dirs) @@ -333,7 +333,7 @@ def _resolve_focus_targets( focus_paths = {resolved} if capability == "agents": focus_paths = expand_focus(focus_paths, effective_agent, project_root) - return focus_paths, [] + return focus_paths def _capability_declared(capability: str, effective_agent: str, project_root: Path) -> bool: @@ -357,34 +357,22 @@ def _focus_paths_to_strings(focus_paths: set[Path], project_root: Path) -> set[s def _dispatch_output( output_format: str, display_result: Any, - full_result: Any, ruleset_map: Any, elapsed_ms: float, - capability_mode: bool, - capability: str, - capability_name: str, - effective_agent: str, focus_paths: set[Path], - listing_candidates: list[Path], project_root: Path, ascii_mode: bool, verbose: bool, funnel_error: Any, ) -> None: - """Route formatted output to JSON / GitHub / focus / listing / default text.""" + """Route formatted output to JSON / GitHub / text. Capability mode uses the same text shape.""" from reporails_cli.formatters import json as json_formatter - from reporails_cli.formatters.text.focus import print_focus_result, print_listing_result if output_format == "json": data = json_formatter.format_combined_result(display_result, ruleset_map=ruleset_map) data["elapsed_ms"] = round(elapsed_ms, 1) - if capability_mode: - data["focus"] = { - "capability": capability, - "name": capability_name, - "agent": effective_agent, - "paths": sorted(_focus_paths_to_strings(focus_paths, project_root)), - } + if focus_paths: + data["focus_paths"] = sorted(_focus_paths_to_strings(focus_paths, project_root)) print(json.dumps(data, indent=2)) return if output_format == "github": @@ -392,29 +380,9 @@ def _dispatch_output( print(github_formatter.format_combined_annotations(display_result)) return - if capability_mode and capability_name: - print_focus_result( - display_result, - capability=capability, - name=capability_name, - agent=effective_agent, - focus_paths=focus_paths, - project_root=project_root, - elapsed_ms=elapsed_ms, - ruleset_map=ruleset_map, - ) - return - if capability_mode: - print_listing_result( - full_result, - capability=capability, - agent=effective_agent, - candidate_paths=listing_candidates, - project_root=project_root, - ruleset_map=ruleset_map, - ) - return - print_text_result(full_result, elapsed_ms, ascii_mode, verbose, ruleset_map=ruleset_map, funnel_error=funnel_error) + print_text_result( + display_result, elapsed_ms, ascii_mode, verbose, ruleset_map=ruleset_map, funnel_error=funnel_error + ) def _should_exit_strict( From e30debe997220e9b3050b95f3ab03679bd285f75 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Mon, 18 May 2026 23:50:31 +0200 Subject: [PATCH 19/30] =?UTF-8?q?check:=20strip=20focus/listing=20concept?= =?UTF-8?q?=20=E2=80=94=20one=20display,=20capability=20args=20filter=20th?= =?UTF-8?q?e=20input?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #24 --- UNRELEASED.md | 2 +- src/reporails_cli/formatters/text/display.py | 58 ++++++++++++++ src/reporails_cli/formatters/text/focus.py | 80 -------------------- src/reporails_cli/interfaces/cli/main.py | 78 ++++++++++--------- 4 files changed, 102 insertions(+), 116 deletions(-) delete mode 100644 src/reporails_cli/formatters/text/focus.py diff --git a/UNRELEASED.md b/UNRELEASED.md index 083c614..4a0129c 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -20,7 +20,7 @@ - check: `[PATH]` positional argument is now `[ARG1] [ARG2]` — `ARG1` is sniffed as a capability keyword first, falling through to existing path semantics. No behaviour change for `ails check`, `ails check .`, or `ails check `. - agents: Added `CORE:S:0024 import-targets-resolve` to `codex` and `copilot` agent `excludes:` lists — neither agent's instruction files support `@` import syntax per their official documentation, so the rule has no antipattern to detect in those agents. - rules: `CORE:S:0024 import-targets-resolve`, `CORE:S:0033 import-depth-within-limit`, and `CORE:S:0056 broken-markdown-link` severity raised from `medium` to `high` — broken includes, links, and over-depth chains are functional context gaps (referenced content silently fails to load), not stylistic warnings. `CLAUDE:S:0010` and `CURSOR:S:0002` per-agent supersedes updated to match. -- check: Capability focus (`ails check skill backlog`) and capability listing (`ails check skills`) now render with the same shape as the whole-repo `ails check` — same surface sections, file cards, surface-health bars, scorecard — just filtered to the focused subset. The standalone focus and listing layouts in `formatters/text/focus.py` were dropped; both modes go through `print_text_result` with the `CombinedResult` and `RulesetMap` filtered to the focus paths. +- check: There is one display. Capability args (`ails check `, `ails check `) narrow the input to that subset; the standard whole-repo renderer prints the same shape with fewer rows. The `formatters/text/focus.py` module was dropped; filters live in `display.py` next to the renderer that uses them. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/src/reporails_cli/formatters/text/display.py b/src/reporails_cli/formatters/text/display.py index 49f896a..0557a6e 100644 --- a/src/reporails_cli/formatters/text/display.py +++ b/src/reporails_cli/formatters/text/display.py @@ -517,3 +517,61 @@ def _render_funnel_cta(funnel_error: object) -> None: console.print(f" {cta}") console.print(f" [dim]Did you see an error? Let us know: [link={bug_url}][bold]{bug_label}[/bold][/link][/dim]") console.print() + + +def filter_result_to_paths(result: Any, paths: set[Path], project_root: Path) -> Any: + """Return a CombinedResult containing only rows for `paths`. + + Filters findings, cross-file pairs, and per-file analysis so the + surface-health / file-card / scorecard blocks all see the same set. + """ + from dataclasses import replace as _replace + + from reporails_cli.core.platform.runtime.merger import CombinedStats + + rel_keys = {str(_relativize(p, project_root)) for p in paths} + findings = tuple(f for f in result.findings if f.file in rel_keys) + cross = tuple(cf for cf in result.cross_file if cf.file_1 in rel_keys or cf.file_2 in rel_keys) + per_file = tuple(fa for fa in result.per_file_analysis if fa.file in rel_keys) + sev = Counter(f.severity for f in findings) + stats = CombinedStats( + total_findings=len(findings), + errors=sev.get("error", 0), + warnings=sev.get("warning", 0), + infos=sev.get("info", 0), + cross_file_conflicts=sum(1 for c in cross if c.finding_type == "conflict"), + cross_file_repetitions=sum(1 for c in cross if c.finding_type == "repetition"), + m_probe_count=result.stats.m_probe_count, + client_check_count=result.stats.client_check_count, + server_diagnostic_count=result.stats.server_diagnostic_count, + ) + return _replace(result, findings=findings, cross_file=cross, stats=stats, per_file_analysis=per_file) + + +def filter_ruleset_map_to_paths(ruleset_map: Any, paths: set[Path], project_root: Path) -> Any: + """Return a RulesetMap restricted to `paths` (matching files + their atoms).""" + from dataclasses import replace as _replace + + if ruleset_map is None or not paths: + return ruleset_map + keep = {str(_relativize(p, project_root)) for p in paths} | {str(p) for p in paths} + files = tuple(fr for fr in ruleset_map.files if str(fr.path) in keep) + atoms = tuple(a for a in ruleset_map.atoms if a.file_path in keep) + return _replace(ruleset_map, files=files, atoms=atoms) + + +def _relativize(path: Path, project_root: Path) -> Path: + """Return `path` relative to `project_root` without resolving symlinks. + + Symlinks may point outside the project (e.g. hub-symlinked skills); + resolving would push the path outside `project_root` and force the + fallback. Use textual prefix stripping instead. + """ + try: + return path.relative_to(project_root) + except ValueError: + pass + try: + return Path(path).resolve().relative_to(project_root.resolve()) + except ValueError: + return path diff --git a/src/reporails_cli/formatters/text/focus.py b/src/reporails_cli/formatters/text/focus.py deleted file mode 100644 index 1abe6d5..0000000 --- a/src/reporails_cli/formatters/text/focus.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Focus filters for capability-mode `ails check`. - -Capability mode (`ails check []`) reuses the standard -whole-repo renderer (`print_text_result`); these helpers narrow the -`CombinedResult` and `RulesetMap` to the focused subset of files so the -rendered output has the same shape with fewer rows. -""" - -from __future__ import annotations - -from collections import Counter -from pathlib import Path -from typing import Any - - -def filter_result_to_focus(result: Any, focus_paths: set[Path], project_root: Path) -> Any: - """Return a CombinedResult restricted to the rows in `focus_paths`. - - Filters findings, cross-file pairs, and per-file analysis so the - standard renderer's surface-health / file-card / scorecard blocks - all reflect the same subset. - """ - from dataclasses import replace as _replace - - from reporails_cli.core.platform.runtime.merger import CombinedStats - - rel_keys = {str(_to_rel(p, project_root)) for p in focus_paths} - filtered_findings = tuple(f for f in result.findings if f.file in rel_keys) - filtered_cross = tuple(cf for cf in result.cross_file if cf.file_1 in rel_keys or cf.file_2 in rel_keys) - filtered_per_file = tuple(fa for fa in result.per_file_analysis if fa.file in rel_keys) - severity_counts = Counter(f.severity for f in filtered_findings) - stats = CombinedStats( - total_findings=len(filtered_findings), - errors=severity_counts.get("error", 0), - warnings=severity_counts.get("warning", 0), - infos=severity_counts.get("info", 0), - cross_file_conflicts=sum(1 for c in filtered_cross if c.finding_type == "conflict"), - cross_file_repetitions=sum(1 for c in filtered_cross if c.finding_type == "repetition"), - m_probe_count=result.stats.m_probe_count, - client_check_count=result.stats.client_check_count, - server_diagnostic_count=result.stats.server_diagnostic_count, - ) - return _replace( - result, - findings=filtered_findings, - cross_file=filtered_cross, - stats=stats, - per_file_analysis=filtered_per_file, - ) - - -def filter_ruleset_map_to_paths(ruleset_map: Any, focus_paths: set[Path], project_root: Path) -> Any: - """Return a RulesetMap restricted to `focus_paths` (matching files + their atoms).""" - from dataclasses import replace as _replace - - if ruleset_map is None or not focus_paths: - return ruleset_map - rel_keys = {str(_to_rel(p, project_root)) for p in focus_paths} - abs_keys = {str(p) for p in focus_paths} - keep = rel_keys | abs_keys - filtered_files = tuple(fr for fr in ruleset_map.files if str(fr.path) in keep) - filtered_atoms = tuple(a for a in ruleset_map.atoms if a.file_path in keep) - return _replace(ruleset_map, files=filtered_files, atoms=filtered_atoms) - - -def _to_rel(path: Path, project_root: Path) -> Path: - """Return path relative to project_root WITHOUT resolving symlinks. - - Symlinks may point outside the project (e.g. hub-symlinked skills); - resolving would push the path outside `project_root` and force the - fallback. Use textual prefix stripping instead. - """ - try: - return path.relative_to(project_root) - except ValueError: - pass - try: - return Path(path).resolve().relative_to(project_root.resolve()) - except ValueError: - return path diff --git a/src/reporails_cli/interfaces/cli/main.py b/src/reporails_cli/interfaces/cli/main.py index 5637bd3..ccaeef1 100644 --- a/src/reporails_cli/interfaces/cli/main.py +++ b/src/reporails_cli/interfaces/cli/main.py @@ -99,9 +99,10 @@ def check( # noqa: C901 # pylint: disable=too-many-locals ) -> None: """Validate AI instruction files against reporails rules. - Per-capability targeting: `ails check skill ` focuses output on a - single skill; `ails check skill` (no name) lists skills with per-target - scores. Capability vocabulary comes from the detected agent's + Capability args narrow which files the standard display shows. + `ails check ` covers every declared target for that + capability; `ails check ` covers the single named + target. Capability vocabulary comes from the detected agent's `framework/rules//config.yml` `file_types:` keys. """ from contextlib import nullcontext @@ -113,11 +114,10 @@ def check( # noqa: C901 # pylint: disable=too-many-locals from reporails_cli.core.platform.adapters.api_client import AilsClient from reporails_cli.core.platform.config.config import get_project_config from reporails_cli.core.platform.runtime.merger import merge_results - from reporails_cli.formatters.text.focus import filter_result_to_focus # Capability-vs-path sniffing: if arg1 matches a capability keyword for - # the detected agent, route to focus / listing mode. Otherwise treat - # arg1 as a path (existing behavior). + # the detected agent, capture it as a path filter for the display. + # Otherwise treat arg1 as a path (existing behavior). project_root = Path.cwd().resolve() capability_mode = False capability = "" @@ -262,32 +262,41 @@ def check( # noqa: C901 # pylint: disable=too-many-locals ) elapsed_ms = (time.perf_counter() - start_time) * 1000 - # 7. Compute focus paths for per-capability mode (single-target OR full capability listing) - focus_paths = _resolve_focus_paths( + # 7. Resolve capability args (if any) to a path filter for the display. + capability_paths = _resolve_capability_paths( capability_mode, capability, capability_name, effective_agent, project_root, excl ) - # 8. Display dispatch — filter result + ruleset_map to focus_paths so the - # standard whole-repo renderer shows the same shape with fewer rows. - from reporails_cli.formatters.text.focus import filter_ruleset_map_to_paths + # 8. Filter result + ruleset_map to capability_paths so every rendered + # block (file cards, surface-health, scorecard) sees the same set. + from reporails_cli.formatters.text.display import filter_result_to_paths, filter_ruleset_map_to_paths - if focus_paths: - display_result = filter_result_to_focus(result, focus_paths, project_root) - display_map = filter_ruleset_map_to_paths(ruleset_map, focus_paths, project_root) + if capability_paths: + display_result = filter_result_to_paths(result, capability_paths, project_root) + display_map = filter_ruleset_map_to_paths(ruleset_map, capability_paths, project_root) else: display_result = result display_map = ruleset_map + _dispatch_output( - output_format, display_result, display_map, elapsed_ms, focus_paths, project_root, ascii, verbose, funnel_error + output_format, + display_result, + display_map, + elapsed_ms, + capability_paths, + project_root, + ascii, + verbose, + funnel_error, ) _show_agent_auto_detect_hint(effective_agent, output_format, assumed, mixed, detected) - if _should_exit_strict(strict, capability_mode, focus_paths, project_root, result): + if _should_exit_strict(strict, capability_paths, project_root, result): raise typer.Exit(1) -def _resolve_focus_paths( +def _resolve_capability_paths( capability_mode: bool, capability: str, capability_name: str, @@ -295,12 +304,12 @@ def _resolve_focus_paths( project_root: Path, exclude_dirs: list[str] | tuple[str, ...] | None = None, ) -> set[Path]: - """Return the set of files to display. + """Resolve capability args to the set of files the display should cover. - Whole-repo run (no capability arg): empty set. Capability listing - (`ails check skill`): every declared target for that capability. - Capability focus (`ails check skill backlog`): the single resolved - target (plus skill expansion for agents). + No capability arg → empty set (whole project). `ails check ` + → every declared target for that capability. `ails check + ` → the single resolved target (plus subagent skill expansion + for `agents`). """ from reporails_cli.core.classify.capability_paths import ( available_capabilities, @@ -330,10 +339,10 @@ def _resolve_focus_paths( if available: console.print(f"[dim]Found {len(available)} {capability}(s) — run `ails check {capability}` to list.[/dim]") raise typer.Exit(2) - focus_paths = {resolved} + paths = {resolved} if capability == "agents": - focus_paths = expand_focus(focus_paths, effective_agent, project_root) - return focus_paths + paths = expand_focus(paths, effective_agent, project_root) + return paths def _capability_declared(capability: str, effective_agent: str, project_root: Path) -> bool: @@ -350,8 +359,8 @@ def _capability_declared(capability: str, effective_agent: str, project_root: Pa return bool(fold and any(f in decls for f in fold)) -def _focus_paths_to_strings(focus_paths: set[Path], project_root: Path) -> set[str]: - return {str(p.relative_to(project_root)) if p.is_relative_to(project_root) else str(p) for p in focus_paths} +def _relativize_paths(paths: set[Path], project_root: Path) -> set[str]: + return {str(p.relative_to(project_root)) if p.is_relative_to(project_root) else str(p) for p in paths} def _dispatch_output( @@ -359,20 +368,20 @@ def _dispatch_output( display_result: Any, ruleset_map: Any, elapsed_ms: float, - focus_paths: set[Path], + capability_paths: set[Path], project_root: Path, ascii_mode: bool, verbose: bool, funnel_error: Any, ) -> None: - """Route formatted output to JSON / GitHub / text. Capability mode uses the same text shape.""" + """Route formatted output to JSON / GitHub / text.""" from reporails_cli.formatters import json as json_formatter if output_format == "json": data = json_formatter.format_combined_result(display_result, ruleset_map=ruleset_map) data["elapsed_ms"] = round(elapsed_ms, 1) - if focus_paths: - data["focus_paths"] = sorted(_focus_paths_to_strings(focus_paths, project_root)) + if capability_paths: + data["capability_paths"] = sorted(_relativize_paths(capability_paths, project_root)) print(json.dumps(data, indent=2)) return if output_format == "github": @@ -387,15 +396,14 @@ def _dispatch_output( def _should_exit_strict( strict: bool, - capability_mode: bool, - focus_paths: set[Path], + capability_paths: set[Path], project_root: Path, result: Any, ) -> bool: if not strict: return False - if capability_mode and focus_paths: - rel_keys = _focus_paths_to_strings(focus_paths, project_root) + if capability_paths: + rel_keys = _relativize_paths(capability_paths, project_root) return any(f.file in rel_keys for f in result.findings) return bool(result.findings) From 9cab118f36ed47364942a2ccbb35125cede3db40 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 00:44:44 +0200 Subject: [PATCH 20/30] check: filter quality.compliance_band to subset + suppress duplicate single-surface health bar Refs #24 --- UNRELEASED.md | 2 ++ src/reporails_cli/formatters/text/display.py | 29 +++++++++++++++++-- .../formatters/text/scorecard.py | 14 ++++++--- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 4a0129c..33eb6a5 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -21,6 +21,8 @@ - agents: Added `CORE:S:0024 import-targets-resolve` to `codex` and `copilot` agent `excludes:` lists — neither agent's instruction files support `@` import syntax per their official documentation, so the rule has no antipattern to detect in those agents. - rules: `CORE:S:0024 import-targets-resolve`, `CORE:S:0033 import-depth-within-limit`, and `CORE:S:0056 broken-markdown-link` severity raised from `medium` to `high` — broken includes, links, and over-depth chains are functional context gaps (referenced content silently fails to load), not stylistic warnings. `CLAUDE:S:0010` and `CURSOR:S:0002` per-agent supersedes updated to match. - check: There is one display. Capability args (`ails check `, `ails check `) narrow the input to that subset; the standard whole-repo renderer prints the same shape with fewer rows. The `formatters/text/focus.py` module was dropped; filters live in `display.py` next to the renderer that uses them. +- check: Filter the result's aggregate `quality.compliance_band` to the subset majority when capability args narrow the display. Previously the band leaked from the whole project, so the top `Score:` used the project-wide base while the per-surface health row used the filtered base — the two scores disagreed. +- check: Surface-health row is suppressed when only one surface has data (single capability target / single-surface listing). The top `Score:` already represents that surface; a second bar would just restate it. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/src/reporails_cli/formatters/text/display.py b/src/reporails_cli/formatters/text/display.py index 0557a6e..69d591a 100644 --- a/src/reporails_cli/formatters/text/display.py +++ b/src/reporails_cli/formatters/text/display.py @@ -522,8 +522,10 @@ def _render_funnel_cta(funnel_error: object) -> None: def filter_result_to_paths(result: Any, paths: set[Path], project_root: Path) -> Any: """Return a CombinedResult containing only rows for `paths`. - Filters findings, cross-file pairs, and per-file analysis so the - surface-health / file-card / scorecard blocks all see the same set. + Filters findings, cross-file pairs, per-file analysis, AND the + aggregate `quality.compliance_band` — without filtering the band, + the top score uses whole-project base while surface-health uses the + filtered base and the two scores disagree. """ from dataclasses import replace as _replace @@ -545,7 +547,28 @@ def filter_result_to_paths(result: Any, paths: set[Path], project_root: Path) -> client_check_count=result.stats.client_check_count, server_diagnostic_count=result.stats.server_diagnostic_count, ) - return _replace(result, findings=findings, cross_file=cross, stats=stats, per_file_analysis=per_file) + quality = _filter_quality(result.quality, per_file) + return _replace( + result, + findings=findings, + cross_file=cross, + stats=stats, + per_file_analysis=per_file, + quality=quality, + ) + + +def _filter_quality(quality: Any, per_file: tuple[Any, ...]) -> Any: + """Rewrite the aggregate `compliance_band` from the filtered per-file bands.""" + if quality is None: + return None + from dataclasses import replace as _replace + + bands = [fa.compliance_band for fa in per_file if fa.compliance_band] + if not bands: + return None + majority = Counter(bands).most_common(1)[0][0] + return _replace(quality, compliance_band=majority) def filter_ruleset_map_to_paths(ruleset_map: Any, paths: set[Path], project_root: Path) -> Any: diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index 528c902..589d958 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -207,8 +207,13 @@ def _surface_cell(s: SurfaceHealth, bar_width: int = 15) -> str: def _render_surface_health(surfaces: list[SurfaceHealth]) -> None: - """Render compact 2-column per-surface health bars.""" - if not surfaces: + """Render compact 2-column per-surface health bars. + + Single-surface case is suppressed: the top `Score:` already + represents that surface, so a second bar would just restate the + same number. + """ + if len(surfaces) <= 1: return console.print() for i in range(0, len(surfaces), 2): @@ -436,10 +441,11 @@ def print_scorecard( agent_name = agent.title() if agent else "auto" console.print(f" Agent: {agent_name}") + multi_surface = bool(surface_health) and len(surface_health) > 1 if scope is not None: - _render_scope(scope, has_surface_health=bool(surface_health)) + _render_scope(scope, has_surface_health=multi_surface) - if surface_health: + if multi_surface: _render_surface_health(surface_health) _render_top_rules(result) From 7772dbfbfb9373cbe8147bbe16fdb70668341162 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 01:11:41 +0200 Subject: [PATCH 21/30] =?UTF-8?q?check:=20per-item=20health=20bars=20for?= =?UTF-8?q?=20capability=20listings=20=E2=80=94=20name=20+=20bar=20per=20i?= =?UTF-8?q?tem,=20sorted=20worst-first?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #25 --- UNRELEASED.md | 1 + src/reporails_cli/formatters/text/display.py | 18 ++- .../formatters/text/scorecard.py | 131 +++++++++++++++++- 3 files changed, 145 insertions(+), 5 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 33eb6a5..53cc2b1 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -23,6 +23,7 @@ - check: There is one display. Capability args (`ails check `, `ails check `) narrow the input to that subset; the standard whole-repo renderer prints the same shape with fewer rows. The `formatters/text/focus.py` module was dropped; filters live in `display.py` next to the renderer that uses them. - check: Filter the result's aggregate `quality.compliance_band` to the subset majority when capability args narrow the display. Previously the band leaked from the whole project, so the top `Score:` used the project-wide base while the per-surface health row used the filtered base — the two scores disagreed. - check: Surface-health row is suppressed when only one surface has data (single capability target / single-surface listing). The top `Score:` already represents that surface; a second bar would just restate it. +- check: Per-item health bars in capability listings — `ails check skills` / `ails check rules` / `ails check agents` etc. now render one bar per item (sorted worst-first) where the whole-repo view would render per-surface bars. Operator can see at a glance which item is the worst. 2-column layout collapses to 1-column when names exceed terminal width. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/src/reporails_cli/formatters/text/display.py b/src/reporails_cli/formatters/text/display.py index 69d591a..1f8075e 100644 --- a/src/reporails_cli/formatters/text/display.py +++ b/src/reporails_cli/formatters/text/display.py @@ -480,8 +480,14 @@ def _render_findings_and_scorecard( tier: str, elapsed_ms: float, ) -> None: - """Render file groups, cross-file coordinates, and the bottom scorecard.""" - from reporails_cli.formatters.text.scorecard import compute_surface_scores + """Render file groups, cross-file coordinates, and the bottom scorecard. + + Scorecard health-bars: multi-surface runs show per-surface; a + single-surface run with multiple files shows per-item bars (so + `ails check skills` lists each skill with its own score); + single-file runs show neither — the top `Score:` covers it. + """ + from reporails_cli.formatters.text.scorecard import compute_item_scores, compute_surface_scores sev_icons = get_sev_icons(ascii_mode) hints_idx = _build_hints_by_file(result.hints, Path.cwd()) @@ -489,6 +495,11 @@ def _render_findings_and_scorecard( _render_file_groups(_build_file_groups(result), sev_icons, verbose, ruleset_map, hints_idx, aliases_idx) _render_cross_file_coordinates(result, sev_icons) + surfaces = compute_surface_scores(result, ruleset_map=ruleset_map, project_root=Path.cwd()) + item_health = None + if len(surfaces) == 1 and surfaces[0].file_count > 1: + item_health = compute_item_scores(result, ruleset_map=ruleset_map, project_root=Path.cwd()) + print_scorecard( result, has_quality, @@ -497,7 +508,8 @@ def _render_findings_and_scorecard( elapsed_ms=elapsed_ms, agent=_detect_agent_name(ruleset_map), scope=scope, - surface_health=compute_surface_scores(result, ruleset_map=ruleset_map, project_root=Path.cwd()), + surface_health=surfaces, + item_health=item_health, ) diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index 589d958..d3804ad 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -193,6 +193,87 @@ def compute_surface_scores( return surfaces +def compute_item_scores( + result: Any, + ruleset_map: Any, + project_root: Any = None, +) -> list[SurfaceHealth]: + """Per-file health scores — name + bar per scanned file. + + Used by capability-listing mode (`ails check `) so the + operator sees which item is the worst at a glance. Score uses the + same formula as `compute_surface_scores` but at file granularity: + per-file compliance band, per-file errors/warnings/infos, per-file + atom count from `per_file_analysis`. + """ + from pathlib import Path + + from reporails_cli.core.platform.runtime.merger import normalize_finding_path + + if ruleset_map is None: + return [] + root = Path(project_root) if project_root is not None else Path.cwd() + + findings_by_file: dict[str, list[Any]] = {} + for f in result.findings: + findings_by_file.setdefault(f.file, []).append(f) + analysis_by_file: dict[str, Any] = {fa.file: fa for fa in result.per_file_analysis} + + items: list[SurfaceHealth] = [] + try: + files = list(ruleset_map.files) + except (AttributeError, TypeError): + return [] + for fr in files: + rel = normalize_finding_path(str(fr.path), root) + findings = findings_by_file.get(rel, []) + analysis = analysis_by_file.get(rel) + n_errors = sum(1 for f in findings if f.severity == "error") + n_warnings = sum(1 for f in findings if f.severity == "warning") + n_infos = sum(1 for f in findings if f.severity == "info") + n_atoms = (analysis.stats.get("atoms", 0) if analysis else 0) or 0 + band = analysis.compliance_band if analysis else "" + + if n_errors + n_warnings + n_infos == 0: + score = 10.0 + else: + base = 6.0 + if band: + base = 8.5 if band == "HIGH" else 5.5 if band == "MODERATE" else 3.0 + denom = max(n_atoms, n_errors + n_warnings + n_infos, 1) + penalty = min(4.0, (n_errors / denom) * 30) + min(2.0, (n_warnings / denom) * 2) + score = round(max(0.0, min(10.0, base - penalty)), 1) + + items.append( + SurfaceHealth( + name=_display_name_for_path(rel), + score=score, + file_count=1, + finding_count=len(findings), + errors=n_errors, + warnings=n_warnings, + infos=n_infos, + ) + ) + items.sort(key=lambda it: (it.score, it.name)) # worst first, alphabetical tiebreak + return items + + +def _display_name_for_path(rel: str) -> str: + """Return the per-item display label for a path. + + Skills (`.claude/skills//SKILL.md`) → `` (parent dir). + Everything else → file stem so `git.md` → `git`, + `agent-config-staleness.md` → `agent-config-staleness`. + """ + from pathlib import Path + + p = Path(rel) + if p.name == "SKILL.md": + return p.parent.name + return p.stem + + def _surface_cell(s: SurfaceHealth, bar_width: int = 15) -> str: """Format one surface as a Rich-markup cell: 'Name (N): ▓▓▓▓▓▓▓▓▓▓▓░░░░ 7.2'. @@ -223,6 +304,43 @@ def _render_surface_health(surfaces: list[SurfaceHealth]) -> None: console.print(f" {left}{sep}{right}") +def _item_cell(s: SurfaceHealth, label_w: int, bar_width: int = 15) -> str: + """Format one item as a Rich-markup cell: ': ▓▓▓▓░░░░░░░░░░░ 4.2'.""" + label = f"{s.name}:" + filled = round(bar_width * s.score / 10) + bar = "▓" * filled + "░" * (bar_width - filled) + color = "green" if s.score >= 7.0 else "yellow" if s.score >= 4.0 else "red" + return f"{label:<{label_w}} [{color}]{bar}[/{color}] [{color} bold]{s.score:>4.1f}[/{color} bold]" + + +def _render_item_health(items: list[SurfaceHealth]) -> None: + """Render per-item health bars in 1- or 2-column layout (capability-listing mode). + + Cell width is `label + bar + score ≈ label_w + bar_width + 8`. Two + cells plus a 4-space separator must fit the terminal; otherwise + fall back to one column so the bar and score never wrap onto a new + line behind the label. + """ + if not items: + return + label_w = max(len(s.name) for s in items) + 2 # name + ": " + bar_width = 15 + cell_w = label_w + bar_width + 8 + tw = get_term_width() + use_two_columns = (cell_w * 2 + 4) <= tw + + console.print() + if use_two_columns: + for i in range(0, len(items), 2): + left = _item_cell(items[i], label_w, bar_width) + right = _item_cell(items[i + 1], label_w, bar_width) if i + 1 < len(items) else "" + sep = " " if right else "" + console.print(f" {left}{sep}{right}") + else: + for s in items: + console.print(f" {_item_cell(s, label_w, bar_width)}") + + # ── Category bars ───────────────────────────────────────────────────── @@ -430,8 +548,14 @@ def print_scorecard( agent: str = "", scope: ScopeInfo | None = None, surface_health: list[SurfaceHealth] | None = None, + item_health: list[SurfaceHealth] | None = None, ) -> None: - """Print the bottom scorecard — the payoff users scroll to.""" + """Print the bottom scorecard — the payoff users scroll to. + + Exactly one of {surface_health (multi-surface), item_health + (capability listing)} renders below the scope block. Single-surface + single-file runs render neither — the top `Score:` covers it. + """ hint_errors, hint_warnings = _hint_totals(result) console.print(f" [dim]\u2500\u2500 Summary {HRULE}[/dim]\n") @@ -442,11 +566,14 @@ def print_scorecard( console.print(f" Agent: {agent_name}") multi_surface = bool(surface_health) and len(surface_health) > 1 + has_items = bool(item_health) and len(item_health) > 1 if scope is not None: - _render_scope(scope, has_surface_health=multi_surface) + _render_scope(scope, has_surface_health=multi_surface or has_items) if multi_surface: _render_surface_health(surface_health) + elif has_items: + _render_item_health(item_health) _render_top_rules(result) From d7ef46dc8f1f26c6e924f97509dc25e0696a9fbe Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 01:38:24 +0200 Subject: [PATCH 22/30] check: finding-count breakdown (N: Xe/Yw/Zi) after each item health score Refs #25 --- UNRELEASED.md | 1 + .../formatters/text/scorecard.py | 63 +++++++++++++++---- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 53cc2b1..6970fb1 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -24,6 +24,7 @@ - check: Filter the result's aggregate `quality.compliance_band` to the subset majority when capability args narrow the display. Previously the band leaked from the whole project, so the top `Score:` used the project-wide base while the per-surface health row used the filtered base — the two scores disagreed. - check: Surface-health row is suppressed when only one surface has data (single capability target / single-surface listing). The top `Score:` already represents that surface; a second bar would just restate it. - check: Per-item health bars in capability listings — `ails check skills` / `ails check rules` / `ails check agents` etc. now render one bar per item (sorted worst-first) where the whole-repo view would render per-surface bars. Operator can see at a glance which item is the worst. 2-column layout collapses to 1-column when names exceed terminal width. +- check: Each item-health row carries a finding-count breakdown `(N: Xe/Yw/Zi)` after the score — severity-colored, zero counts omitted. Operator sees both severity (the bar) and effort (the count) on one line, so they can distinguish "low score but only 3 findings" from "low score, 54 findings." ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index d3804ad..e9f455b 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -304,41 +304,82 @@ def _render_surface_health(surfaces: list[SurfaceHealth]) -> None: console.print(f" {left}{sep}{right}") -def _item_cell(s: SurfaceHealth, label_w: int, bar_width: int = 15) -> str: - """Format one item as a Rich-markup cell: ': ▓▓▓▓░░░░░░░░░░░ 4.2'.""" +def _item_cell(s: SurfaceHealth, label_w: int, breakdown_w: int, bar_width: int = 15) -> str: + """Format one item: ': ▓▓▓▓░░░░░░░░░░░ 4.2 (N: Xe/Yw/Zi)'. + + `breakdown_w` is the visible-width budget for the trailing + severity breakdown so 2-column layouts align across rows even when + individual items have different counts. + """ label = f"{s.name}:" filled = round(bar_width * s.score / 10) bar = "▓" * filled + "░" * (bar_width - filled) color = "green" if s.score >= 7.0 else "yellow" if s.score >= 4.0 else "red" - return f"{label:<{label_w}} [{color}]{bar}[/{color}] [{color} bold]{s.score:>4.1f}[/{color} bold]" + raw_breakdown = _severity_breakdown_plain(s) + rendered_breakdown = _severity_breakdown_markup(s) + pad = " " * max(0, breakdown_w - len(raw_breakdown)) + return ( + f"{label:<{label_w}} [{color}]{bar}[/{color}] " + f"[{color} bold]{s.score:>4.1f}[/{color} bold] {rendered_breakdown}{pad}" + ) + + +def _severity_breakdown_plain(s: SurfaceHealth) -> str: + """Visible-width representation: '(N: Xe/Yw/Zi)' — skips zero severities.""" + if s.finding_count == 0: + return "" + parts = [] + if s.errors: + parts.append(f"{s.errors}e") + if s.warnings: + parts.append(f"{s.warnings}w") + if s.infos: + parts.append(f"{s.infos}i") + return f"({s.finding_count}: {'/'.join(parts)})" if parts else f"({s.finding_count})" + + +def _severity_breakdown_markup(s: SurfaceHealth) -> str: + """Rich-markup version of the breakdown — same characters, severity colors.""" + if s.finding_count == 0: + return "" + parts = [] + if s.errors: + parts.append(f"[red]{s.errors}e[/red]") + if s.warnings: + parts.append(f"[yellow]{s.warnings}w[/yellow]") + if s.infos: + parts.append(f"[dim]{s.infos}i[/dim]") + inner = "/".join(parts) if parts else "" + return f"[dim]({s.finding_count}: {inner})[/dim]" if inner else f"[dim]({s.finding_count})[/dim]" def _render_item_health(items: list[SurfaceHealth]) -> None: """Render per-item health bars in 1- or 2-column layout (capability-listing mode). - Cell width is `label + bar + score ≈ label_w + bar_width + 8`. Two - cells plus a 4-space separator must fit the terminal; otherwise - fall back to one column so the bar and score never wrap onto a new - line behind the label. + Each cell carries `label + bar + score + (N: Xe/Yw/Zi)` so the operator + sees both severity (the bar / score) and effort (finding count + mix). + 2-column layout collapses to 1-column when names + breakdown widths + exceed the terminal width. """ if not items: return label_w = max(len(s.name) for s in items) + 2 # name + ": " + breakdown_w = max(len(_severity_breakdown_plain(s)) for s in items) bar_width = 15 - cell_w = label_w + bar_width + 8 + cell_w = label_w + 1 + bar_width + 2 + 4 + 2 + breakdown_w tw = get_term_width() use_two_columns = (cell_w * 2 + 4) <= tw console.print() if use_two_columns: for i in range(0, len(items), 2): - left = _item_cell(items[i], label_w, bar_width) - right = _item_cell(items[i + 1], label_w, bar_width) if i + 1 < len(items) else "" + left = _item_cell(items[i], label_w, breakdown_w, bar_width) + right = _item_cell(items[i + 1], label_w, breakdown_w, bar_width) if i + 1 < len(items) else "" sep = " " if right else "" console.print(f" {left}{sep}{right}") else: for s in items: - console.print(f" {_item_cell(s, label_w, bar_width)}") + console.print(f" {_item_cell(s, label_w, breakdown_w, bar_width)}") # ── Category bars ───────────────────────────────────────────────────── From 9c2d7d130a4830abb4ba3a2fc8d6372b6df4427a Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 02:09:06 +0200 Subject: [PATCH 23/30] =?UTF-8?q?check:=20item=20health=20bars=20one-per-l?= =?UTF-8?q?ine=20=E2=80=94=20drop=202-column=20layout=20+=20trailing=20pad?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #25 --- UNRELEASED.md | 2 +- .../formatters/text/scorecard.py | 57 +++---------------- 2 files changed, 10 insertions(+), 49 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 6970fb1..54b8d5c 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -23,7 +23,7 @@ - check: There is one display. Capability args (`ails check `, `ails check `) narrow the input to that subset; the standard whole-repo renderer prints the same shape with fewer rows. The `formatters/text/focus.py` module was dropped; filters live in `display.py` next to the renderer that uses them. - check: Filter the result's aggregate `quality.compliance_band` to the subset majority when capability args narrow the display. Previously the band leaked from the whole project, so the top `Score:` used the project-wide base while the per-surface health row used the filtered base — the two scores disagreed. - check: Surface-health row is suppressed when only one surface has data (single capability target / single-surface listing). The top `Score:` already represents that surface; a second bar would just restate it. -- check: Per-item health bars in capability listings — `ails check skills` / `ails check rules` / `ails check agents` etc. now render one bar per item (sorted worst-first) where the whole-repo view would render per-surface bars. Operator can see at a glance which item is the worst. 2-column layout collapses to 1-column when names exceed terminal width. +- check: Per-item health bars in capability listings — `ails check skills` / `ails check rules` / `ails check agents` etc. now render one bar per item (sorted worst-first) where the whole-repo view would render per-surface bars. Operator can see at a glance which item is the worst. One item per line — scannable top-down without horizontal eye movement. - check: Each item-health row carries a finding-count breakdown `(N: Xe/Yw/Zi)` after the score — severity-colored, zero counts omitted. Operator sees both severity (the bar) and effort (the count) on one line, so they can distinguish "low score but only 3 findings" from "low score, 54 findings." ### Fixed diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index e9f455b..84036a1 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -304,42 +304,22 @@ def _render_surface_health(surfaces: list[SurfaceHealth]) -> None: console.print(f" {left}{sep}{right}") -def _item_cell(s: SurfaceHealth, label_w: int, breakdown_w: int, bar_width: int = 15) -> str: - """Format one item: ': ▓▓▓▓░░░░░░░░░░░ 4.2 (N: Xe/Yw/Zi)'. - - `breakdown_w` is the visible-width budget for the trailing - severity breakdown so 2-column layouts align across rows even when - individual items have different counts. - """ +def _item_cell(s: SurfaceHealth, label_w: int, bar_width: int = 15) -> str: + """Format one item row: ': ▓▓▓▓░░░░░░░░░░░ 4.2 (N: Xe/Yw/Zi)'.""" label = f"{s.name}:" filled = round(bar_width * s.score / 10) bar = "▓" * filled + "░" * (bar_width - filled) color = "green" if s.score >= 7.0 else "yellow" if s.score >= 4.0 else "red" - raw_breakdown = _severity_breakdown_plain(s) - rendered_breakdown = _severity_breakdown_markup(s) - pad = " " * max(0, breakdown_w - len(raw_breakdown)) + breakdown = _severity_breakdown_markup(s) + suffix = f" {breakdown}" if breakdown else "" return ( f"{label:<{label_w}} [{color}]{bar}[/{color}] " - f"[{color} bold]{s.score:>4.1f}[/{color} bold] {rendered_breakdown}{pad}" + f"[{color} bold]{s.score:>4.1f}[/{color} bold]{suffix}" ) -def _severity_breakdown_plain(s: SurfaceHealth) -> str: - """Visible-width representation: '(N: Xe/Yw/Zi)' — skips zero severities.""" - if s.finding_count == 0: - return "" - parts = [] - if s.errors: - parts.append(f"{s.errors}e") - if s.warnings: - parts.append(f"{s.warnings}w") - if s.infos: - parts.append(f"{s.infos}i") - return f"({s.finding_count}: {'/'.join(parts)})" if parts else f"({s.finding_count})" - - def _severity_breakdown_markup(s: SurfaceHealth) -> str: - """Rich-markup version of the breakdown — same characters, severity colors.""" + """Severity-colored breakdown `(N: Xe/Yw/Zi)`; zero severities are omitted.""" if s.finding_count == 0: return "" parts = [] @@ -354,32 +334,13 @@ def _severity_breakdown_markup(s: SurfaceHealth) -> str: def _render_item_health(items: list[SurfaceHealth]) -> None: - """Render per-item health bars in 1- or 2-column layout (capability-listing mode). - - Each cell carries `label + bar + score + (N: Xe/Yw/Zi)` so the operator - sees both severity (the bar / score) and effort (finding count + mix). - 2-column layout collapses to 1-column when names + breakdown widths - exceed the terminal width. - """ + """Render per-item health bars one per line (capability-listing mode).""" if not items: return label_w = max(len(s.name) for s in items) + 2 # name + ": " - breakdown_w = max(len(_severity_breakdown_plain(s)) for s in items) - bar_width = 15 - cell_w = label_w + 1 + bar_width + 2 + 4 + 2 + breakdown_w - tw = get_term_width() - use_two_columns = (cell_w * 2 + 4) <= tw - console.print() - if use_two_columns: - for i in range(0, len(items), 2): - left = _item_cell(items[i], label_w, breakdown_w, bar_width) - right = _item_cell(items[i + 1], label_w, breakdown_w, bar_width) if i + 1 < len(items) else "" - sep = " " if right else "" - console.print(f" {left}{sep}{right}") - else: - for s in items: - console.print(f" {_item_cell(s, label_w, breakdown_w, bar_width)}") + for s in items: + console.print(f" {_item_cell(s, label_w)}") # ── Category bars ───────────────────────────────────────────────────── From e646f2b5f1f17a6cb24fe64b7cfca2a3aa7e3cc1 Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 02:25:44 +0200 Subject: [PATCH 24/30] =?UTF-8?q?check:=20split=20score=20bar=20markup=20?= =?UTF-8?q?=E2=80=94=20colored=20fill=20+=20dim=20gray=20empty,=20shared?= =?UTF-8?q?=20=5Fscore=5Fbar=20helper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #25 --- UNRELEASED.md | 1 + .../formatters/text/scorecard.py | 30 ++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 54b8d5c..0c6ce7b 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -25,6 +25,7 @@ - check: Surface-health row is suppressed when only one surface has data (single capability target / single-surface listing). The top `Score:` already represents that surface; a second bar would just restate it. - check: Per-item health bars in capability listings — `ails check skills` / `ails check rules` / `ails check agents` etc. now render one bar per item (sorted worst-first) where the whole-repo view would render per-surface bars. Operator can see at a glance which item is the worst. One item per line — scannable top-down without horizontal eye movement. - check: Each item-health row carries a finding-count breakdown `(N: Xe/Yw/Zi)` after the score — severity-colored, zero counts omitted. Operator sees both severity (the bar) and effort (the count) on one line, so they can distinguish "low score but only 3 findings" from "low score, 54 findings." +- check: Score bars (top `Score:`, surface health, item health) split the markup span at the fill boundary — filled `▓` in the score color, empty `░` in dim gray. Previously the entire bar inherited the score color so empty segments looked like muted red/yellow; now every bar shares a consistent gray baseline and only the colored fill varies. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index 84036a1..5d00127 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -281,10 +281,22 @@ def _surface_cell(s: SurfaceHealth, bar_width: int = 15) -> str: under integer rounding — at width 10, scores 6.5-7.4 all map to 7 filled cells. """ label = f"{s.name} ({s.file_count}):" - filled = round(bar_width * s.score / 10) - bar = "\u2593" * filled + "\u2591" * (bar_width - filled) color = "green" if s.score >= 7.0 else "yellow" if s.score >= 4.0 else "red" - return f"{label:13s} [{color}]{bar}[/{color}] [{color} bold]{s.score:>4.1f}[/{color} bold]" + bar = _score_bar(s.score, bar_width, color) + return f"{label:13s} {bar} [{color} bold]{s.score:>4.1f}[/{color} bold]" + + +def _score_bar(score: float, bar_width: int, color: str) -> str: + """Render a score bar with colored fill + dim gray empty. + + Splitting the markup at the fill boundary gives every bar a + consistent gray baseline so the colored fill is the only visual + variable that changes across rows. + """ + filled = round(bar_width * score / 10) + fill = "\u2593" * filled + empty = "\u2591" * (bar_width - filled) + return f"[{color}]{fill}[/{color}][dim]{empty}[/dim]" def _render_surface_health(surfaces: list[SurfaceHealth]) -> None: @@ -307,13 +319,12 @@ def _render_surface_health(surfaces: list[SurfaceHealth]) -> None: def _item_cell(s: SurfaceHealth, label_w: int, bar_width: int = 15) -> str: """Format one item row: ': ▓▓▓▓░░░░░░░░░░░ 4.2 (N: Xe/Yw/Zi)'.""" label = f"{s.name}:" - filled = round(bar_width * s.score / 10) - bar = "▓" * filled + "░" * (bar_width - filled) color = "green" if s.score >= 7.0 else "yellow" if s.score >= 4.0 else "red" + bar = _score_bar(s.score, bar_width, color) breakdown = _severity_breakdown_markup(s) suffix = f" {breakdown}" if breakdown else "" return ( - f"{label:<{label_w}} [{color}]{bar}[/{color}] " + f"{label:<{label_w}} {bar} " f"[{color} bold]{s.score:>4.1f}[/{color} bold]{suffix}" ) @@ -389,15 +400,14 @@ def _render_score_bar( n_atoms: int, elapsed_ms: float, ) -> None: - """Render score line with progress bar.""" + """Render score line with progress bar (colored fill + dim empty).""" tw = get_term_width() score = compute_score(result, has_quality, n_atoms) bar_width = min(30, tw - 40) - filled = round(bar_width * score / 10) - bar = "\u2593" * filled + "\u2591" * (bar_width - filled) color = "green" if score >= 7.0 else "yellow" if score >= 4.0 else "red" + bar = _score_bar(score, bar_width, color) elapsed_s = f" [dim]({elapsed_ms / 1000:.1f}s)[/dim]" if elapsed_ms else "" - console.print(f" Score: [{color} bold]{score:.1f}[/{color} bold] / 10 [dim]{bar}[/dim]{elapsed_s}") + console.print(f" Score: [{color} bold]{score:.1f}[/{color} bold] / 10 {bar}{elapsed_s}") @dataclass From 58aecc6dc5b762aeb9f3765bd7b95ca088799b9d Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 02:37:31 +0200 Subject: [PATCH 25/30] check: blank line between item-health severity bands (red/yellow/green chunking) Refs #25 --- UNRELEASED.md | 1 + src/reporails_cli/formatters/text/scorecard.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index 0c6ce7b..a85c67d 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -26,6 +26,7 @@ - check: Per-item health bars in capability listings — `ails check skills` / `ails check rules` / `ails check agents` etc. now render one bar per item (sorted worst-first) where the whole-repo view would render per-surface bars. Operator can see at a glance which item is the worst. One item per line — scannable top-down without horizontal eye movement. - check: Each item-health row carries a finding-count breakdown `(N: Xe/Yw/Zi)` after the score — severity-colored, zero counts omitted. Operator sees both severity (the bar) and effort (the count) on one line, so they can distinguish "low score but only 3 findings" from "low score, 54 findings." - check: Score bars (top `Score:`, surface health, item health) split the markup span at the fill boundary — filled `▓` in the score color, empty `░` in dim gray. Previously the entire bar inherited the score color so empty segments looked like muted red/yellow; now every bar shares a consistent gray baseline and only the colored fill varies. +- check: Item-health listing inserts a blank line between severity bands (red → yellow → green) so the eye chunks the list into "needs attention" / "moderate" / "healthy" clusters without adding excessive whitespace. ### Fixed - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index 5d00127..61cd3df 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -345,13 +345,23 @@ def _severity_breakdown_markup(s: SurfaceHealth) -> str: def _render_item_health(items: list[SurfaceHealth]) -> None: - """Render per-item health bars one per line (capability-listing mode).""" + """Render per-item health bars one per line with breathing room. + + Adds a blank line between severity bands (red → yellow → green) so + the eye naturally chunks the list into "needs attention", "moderate", + "healthy" clusters. + """ if not items: return label_w = max(len(s.name) for s in items) + 2 # name + ": " console.print() + prev_band: str | None = None for s in items: + band = "red" if s.score < 4.0 else "yellow" if s.score < 7.0 else "green" + if prev_band is not None and band != prev_band: + console.print() console.print(f" {_item_cell(s, label_w)}") + prev_band = band # ── Category bars ───────────────────────────────────────────────────── From c2c9d25bf8635ed55c8d2d2162f57597926d87ee Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 05:15:49 +0200 Subject: [PATCH 26/30] Fix lint pipeline scope: code-span strip in extractors, exclude_dirs in glob targets, project-scope attribution Refs #26 --- UNRELEASED.md | 3 ++ .../core/classify/link_walker.py | 21 +++++++++- .../core/lint/mechanical/checks.py | 33 +++++++++++++++- .../core/lint/mechanical/checks_advanced.py | 11 ++++++ .../core/lint/mechanical/runner.py | 39 ++++++++++++++++--- 5 files changed, 98 insertions(+), 9 deletions(-) diff --git a/UNRELEASED.md b/UNRELEASED.md index a85c67d..6990511 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -29,6 +29,9 @@ - check: Item-health listing inserts a blank line between severity bands (red → yellow → green) so the eye chunks the list into "needs attention" / "moderate" / "healthy" clusters without adding excessive whitespace. ### Fixed +- check: `CORE:S:0056 broken-markdown-link` and the generic-class link walker now strip fenced code blocks and inline code spans before extracting `[text](path)` references. Previously the rule false-positived on documentation that mentioned link syntax inside backticks — e.g. a `CHANGELOG.md` entry describing `[text](path)` semantics reported a broken link to `path`. Code-span stripping mirrors between `core/lint/mechanical/checks_advanced.py` and `core/classify/link_walker.py` so the broken-target rule and the generic-class classifier agree on what counts as a real link. +- check: Mechanical-check glob targets honor `.ails/config.yml: exclude_dirs`. Previously `_resolve_glob_targets` in `core/lint/mechanical/checks.py` globbed `**/*.md` (and similar patterns declared in `checks.yml` `args.path`) against the project root without applying the project's exclude_dirs filter, so files under excluded directories like `specs/` and `docs/` got scanned by rules that hard-code their own path glob. Project exclude_dirs are now loaded once per root and filtered against every glob result. +- check: Mechanical violation attribution no longer points to `~/.claude/CLAUDE.md` (or any user/managed-scope file) when the project has no project-scope main. `_first_classified_path` and the wildcard-match fallback in `core/lint/mechanical/runner.py` skip user-scope and managed-scope files; `_relativize` now emits `~/` for paths under the home directory instead of bare basename, matching `normalize_finding_path`. Project-wide rules (`CORE:E:0001 total-instruction-size-limit`, `CORE:S:0024 import-targets-resolve`) now attribute to a project-scope file when one exists, and surface honestly when none does. - gemini: `memory` block replaced the retired `## Gemini Added Memories` in-section locator with the current upstream model — private project memory at `~/.gemini/tmp/*/memory/` (`MEMORY.md` + sibling `*.md` notes), mirroring Claude's directory-glob shape. The legacy section header has 0 occurrences in `google-gemini/gemini-cli` source; the locator was targeting a surface that no longer exists. `memory_locator` enumerates entries through the same directory-glob dispatch Claude uses. - gemini: All `source:` URLs in the agent config now point to the rendered `geminicli.com` docs site instead of GitHub raw markdown links. 13 file_types updated; no behavior change. - check: Deterministic message text for the broad-scope client check — `client_checks._check_broad_scope` now sorts the matched broad terms before formatting the message, so output is reproducible across runs regardless of `PYTHONHASHSEED`. The set-iteration order previously caused `"Broad terms (any, integrations)"` vs `"Broad terms (integrations, any)"` drift on identical inputs. diff --git a/src/reporails_cli/core/classify/link_walker.py b/src/reporails_cli/core/classify/link_walker.py index e6caaa2..7edbfad 100644 --- a/src/reporails_cli/core/classify/link_walker.py +++ b/src/reporails_cli/core/classify/link_walker.py @@ -21,6 +21,17 @@ # Capture the path without the leading `@`. _IMPORT_RE = re.compile(r"@([\w./-]+)") +# Code-span stripping — `[text](path)` inside backticks is documentation, +# not a real link. Mirror this with `checks_advanced._strip_code_spans`. +_CODE_FENCE_RE = re.compile(r"```.*?```", re.DOTALL) +_INLINE_CODE_RE = re.compile(r"`[^`\n]*`") + + +def _strip_code_spans(text: str) -> str: + """Remove fenced code blocks and inline code spans before link extraction.""" + text = _CODE_FENCE_RE.sub("", text) + return _INLINE_CODE_RE.sub("", text) + @dataclass(frozen=True) class LinkEdge: @@ -98,16 +109,22 @@ def _outgoing_links(file_path: Path) -> list[tuple[Path, str]]: logger.debug("link_walker: cannot read %s: %s", file_path, exc) return [] + # Strip code spans so `[text](path)` examples inside backticks don't + # surface as walkable links. `@` imports keep working because the + # import regex runs on the full text (imports inside code spans are + # still imports per Claude's `@import` semantics). + link_text = _strip_code_spans(text) + base_dir = file_path.parent out: list[tuple[Path, str]] = [] - for match in _INLINE_LINK_RE.finditer(text): + for match in _INLINE_LINK_RE.finditer(link_text): target = match.group(1).strip() resolved = _resolve_md_target(base_dir, target) if resolved is not None: out.append((resolved, "read")) - for match in _REF_DEFINITION_RE.finditer(text): + for match in _REF_DEFINITION_RE.finditer(link_text): target = match.group(1).strip() resolved = _resolve_md_target(base_dir, target) if resolved is not None: diff --git a/src/reporails_cli/core/lint/mechanical/checks.py b/src/reporails_cli/core/lint/mechanical/checks.py index c1a6c19..43f7fa4 100644 --- a/src/reporails_cli/core/lint/mechanical/checks.py +++ b/src/reporails_cli/core/lint/mechanical/checks.py @@ -29,20 +29,49 @@ class CheckResult: _glob_cache: dict[tuple[str, str], list[Path]] = {} +_exclude_cache: dict[str, frozenset[str]] = {} def _resolve_glob_targets(pattern: str, root: Path) -> list[Path]: - """Resolve a glob pattern relative to root (cached per session).""" + """Resolve a glob pattern relative to root, filtered by project exclude_dirs.""" key = (pattern, str(root)) cached = _glob_cache.get(key) if cached is not None: return cached resolved = str(root / pattern) - result = [Path(p) for p in globmod.glob(resolved, recursive=True)] + matches = [Path(p) for p in globmod.glob(resolved, recursive=True)] + excl = _load_project_excludes(root) + result = [p for p in matches if not _is_under_excluded_dir(p, root, excl)] _glob_cache[key] = result return result +def _load_project_excludes(root: Path) -> frozenset[str]: + """Load exclude_dirs from project config, cached per root.""" + cached = _exclude_cache.get(str(root)) + if cached is not None: + return cached + try: + from reporails_cli.core.platform.config.config import get_project_config + + excl = frozenset(get_project_config(root).exclude_dirs or ()) + except (OSError, ValueError, AttributeError): + excl = frozenset() + _exclude_cache[str(root)] = excl + return excl + + +def _is_under_excluded_dir(path: Path, root: Path, excl: frozenset[str]) -> bool: + """True when any ancestor dir name (relative to root) is in `excl`.""" + if not excl: + return False + try: + rel = path.relative_to(root) + except ValueError: + return False + return any(part in excl for part in rel.parts[:-1]) + + def _get_target_files( args: dict[str, Any], classified_files: list[ClassifiedFile], diff --git a/src/reporails_cli/core/lint/mechanical/checks_advanced.py b/src/reporails_cli/core/lint/mechanical/checks_advanced.py index f96d58e..0d6169c 100644 --- a/src/reporails_cli/core/lint/mechanical/checks_advanced.py +++ b/src/reporails_cli/core/lint/mechanical/checks_advanced.py @@ -356,6 +356,16 @@ def check_import_targets_exist( # classifier agree on what counts as a Markdown link. _INLINE_LINK_RE = re.compile(r"\[(?:[^\]]+)\]\(([^)]+)\)") _REF_DEFINITION_RE = re.compile(r"^\s*\[(?:[^\]]+)\]:\s*(\S+)", re.MULTILINE) +# Code-span stripping — `[text](path)` inside backticks is documentation, +# not a real link. Mirror this with `link_walker._strip_code_spans`. +_CODE_FENCE_RE = re.compile(r"```.*?```", re.DOTALL) +_INLINE_CODE_RE = re.compile(r"`[^`\n]*`") + + +def _strip_code_spans(text: str) -> str: + """Remove fenced code blocks and inline code spans before link extraction.""" + text = _CODE_FENCE_RE.sub("", text) + return _INLINE_CODE_RE.sub("", text) def _is_external_link(target: str) -> bool: @@ -395,6 +405,7 @@ def extract_markdown_links( text = match.read_text(encoding="utf-8", errors="replace") except OSError: continue + text = _strip_code_spans(text) rel = match.relative_to(root).as_posix() if match.is_relative_to(root) else str(match) targets: list[str] = [] targets.extend(m.group(1).strip() for m in _INLINE_LINK_RE.finditer(text)) diff --git a/src/reporails_cli/core/lint/mechanical/runner.py b/src/reporails_cli/core/lint/mechanical/runner.py index bc4742c..0e27dc4 100644 --- a/src/reporails_cli/core/lint/mechanical/runner.py +++ b/src/reporails_cli/core/lint/mechanical/runner.py @@ -133,12 +133,17 @@ def run_mechanical_checks( def _relativize(path: Path, root: Path | None) -> str: - """Return path relative to root, or just the name as fallback.""" + """Return path relative to root, or `~/...` for user-scope paths.""" if root is not None: try: return path.relative_to(root).as_posix() except ValueError: pass + if path.is_absolute(): + try: + return "~/" + path.relative_to(Path.home()).as_posix() + except ValueError: + pass return path.name @@ -147,14 +152,31 @@ def _first_classified_path( root: Path | None, *type_names: str, ) -> str | None: - """Return first relative path from classified files matching any type name.""" + """Return first relative path from classified files matching any type name. + + Only return project-scope files (under `root`). User-scope and + managed-scope files are not viable attribution points for project-wide + findings — attributing `Total instruction size exceeds limit` to + `~/.claude/CLAUDE.md` is misleading when the project itself has no + project-scope main file. + """ for type_name in type_names: for cf in classified_files: - if cf.file_type == type_name: + if cf.file_type != type_name: + continue + if root is None or _is_under_root(cf.path, root): return _relativize(cf.path, root) return None +def _is_under_root(path: Path, root: Path) -> bool: + """True when `path` resolves to a location under `root`.""" + try: + return path.resolve().is_relative_to(root.resolve()) + except (OSError, ValueError): + return False + + def resolve_location( rule: Rule, classified_files: list[ClassifiedFile], @@ -195,12 +217,19 @@ def _resolve_location_path( type_names = rule.match.type if isinstance(rule.match.type, list) else [rule.match.type] return _first_classified_path(classified_files, root, *type_names) - # Wildcard match (type is None) — prefer main file, then any file + # Wildcard match (type is None) — prefer main file, then any project-scope file path = _first_classified_path(classified_files, root, "main") if path: return path - if classified_files: + # Fallback: first project-scope file (under root). Skip user-scope and + # managed-scope so project-wide rules don't misattribute findings to + # `~/.claude/CLAUDE.md` when the project has no project-scope main. + if classified_files and root is not None: + for cf in classified_files: + if _is_under_root(cf.path, root): + return _relativize(cf.path, root) + elif classified_files: return _relativize(classified_files[0].path, root) return None From 6a948aae264cbce96569dd42d54e741ce7e3ff6e Mon Sep 17 00:00:00 2001 From: cleverhoods Date: Tue, 19 May 2026 05:16:43 +0200 Subject: [PATCH 27/30] =?UTF-8?q?Add=20capability=20level=20display=20to?= =?UTF-8?q?=20scorecard=20=E2=80=94=20re-align=20engine=20to=20L0=E2=80=93?= =?UTF-8?q?L7=20docs=20ladder;=20drop=20dead=20levels.yml=20+=20schema?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #23 --- UNRELEASED.md | 2 + docs/capability-levels.md | 68 +++++++++++++++++++ docs/faq.md | 2 +- docs/score-guide.md | 2 +- framework/registry/levels.yml | 31 --------- framework/schemas/levels.schema.yml | 61 ----------------- hatch_build.py | 1 - src/reporails_cli/core/discovery/features.py | 45 +++++++++++- src/reporails_cli/core/platform/dto/models.py | 15 ++-- .../core/platform/dto/results.py | 8 ++- .../core/platform/policy/levels.py | 65 +++++++++--------- .../core/platform/runtime/merger.py | 5 +- .../formatters/text/scorecard.py | 20 +++--- src/reporails_cli/interfaces/cli/main.py | 10 ++- 14 files changed, 188 insertions(+), 147 deletions(-) create mode 100644 docs/capability-levels.md delete mode 100644 framework/registry/levels.yml delete mode 100644 framework/schemas/levels.schema.yml diff --git a/UNRELEASED.md b/UNRELEASED.md index 6990511..27ad74d 100644 --- a/UNRELEASED.md +++ b/UNRELEASED.md @@ -1,6 +1,7 @@ # Unreleased ### Added +- check: Re-introduced project capability level as a `Level: L#