From 88d448c81d1c20b44a2ca09219f234cfc8c67452 Mon Sep 17 00:00:00 2001 From: metaphorics <152830360+metaphorics@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:08:23 +0900 Subject: [PATCH] feat(coding-agent/tools): added ast_grep empty-result hints and enriched AST descriptions - Added a pure ast-pattern-hints module (getPatternHint + regex-misuse and language-shape detectors + best-effort language inference) and wired it into ast_grep so a zero-match result appends a one-line hint naming the likely mistake and routing text/alternation/cross-language searches to the search tool. - Enriched the ast_grep description with an anti-regex routing block and per-language gotchas/examples (Python no-trailing-colon, Go/Rust/TS need params+body), and the ast_edit description with the dry-run to resolve apply flow and its stale-preview guard. - Added AST shape-routing and parallel-exploration guidance to the system prompt, and a focused regression test for the hint detectors and language inference. --- packages/coding-agent/CHANGELOG.md | 8 ++ .../src/prompts/system/system-prompt.md | 3 +- .../src/prompts/tools/ast-edit.md | 3 + .../src/prompts/tools/ast-grep.md | 3 + packages/coding-agent/src/tools/ast-grep.ts | 17 ++- .../src/tools/ast-pattern-hints.test.ts | 67 +++++++++++ .../src/tools/ast-pattern-hints.ts | 109 ++++++++++++++++++ 7 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 packages/coding-agent/src/tools/ast-pattern-hints.test.ts create mode 100644 packages/coding-agent/src/tools/ast-pattern-hints.ts diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 93cfaebf9f..4540f34dc5 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +### Added + +- Added an advisory hint on zero-match `ast_grep` results that names the likely mistake — regex leaking into an AST pattern (`foo|bar`, `.*`, `\w`, `[a-z]`) or a structurally incomplete declaration (a Python pattern with a trailing colon, a bare `function $NAME`) — and routes text/alternation/cross-language searches to `search`. + +### Changed + +- Enriched the `ast_grep` and `ast_edit` tool descriptions (anti-regex routing, per-language `ast_grep` examples, the `ast_edit` dry-run → `resolve` apply flow with its stale-preview guard) and added AST shape-routing and parallel-exploration guidance to the system prompt. + ## [15.13.3] - 2026-06-15 ### Added diff --git a/packages/coding-agent/src/prompts/system/system-prompt.md b/packages/coding-agent/src/prompts/system/system-prompt.md index 76a1f79423..732566dae9 100644 --- a/packages/coding-agent/src/prompts/system/system-prompt.md +++ b/packages/coding-agent/src/prompts/system/system-prompt.md @@ -57,6 +57,7 @@ You NEVER open a file hoping. Hope is not a strategy. {{#has tools "find"}}- Use `{{toolRefs.find}}` to map structure.{{/has}} {{#has tools "read"}}- Use `{{toolRefs.read}}` with offset or limit rather than whole-file reads when practical.{{/has}} {{#has tools "task"}}- Use `{{toolRefs.task}}` to map unknown parts of the codebase instead of reading file after file yourself.{{/has}} +{{#has tools "search"}}- When context is thin, fan `{{toolRefs.search}}`{{#has tools "find"}}/`{{toolRefs.find}}`{{/has}}/`{{toolRefs.read}}` calls in parallel to map the surface before reading any file deeply.{{/has}} {{#has tools "lsp"}} # LSP @@ -74,7 +75,7 @@ You NEVER blindly use search or manual edits for code intelligence when a langua You SHOULD use syntax-aware tools before text hacks: {{#has tools "ast_grep"}}- `{{toolRefs.ast_grep}}` for structural discovery{{/has}} {{#has tools "ast_edit"}}- `{{toolRefs.ast_edit}}` for codemods{{/has}} -- You MUST use `search` only for plain text lookup when structure is irrelevant. +- Route by shape: structure → {{#has tools "ast_grep"}}`{{toolRefs.ast_grep}}`{{/has}}{{#has tools "ast_edit"}} / `{{toolRefs.ast_edit}}`{{/has}}{{#has tools "search"}}; literal text, regex, alternation, or cross-language → `{{toolRefs.search}}`{{/has}}{{#has tools "find"}}; filenames → `{{toolRefs.find}}`{{/has}}. Pattern syntax (metavariables, `$$$` spreads) is in each tool's description. {{/ifAny}} diff --git a/packages/coding-agent/src/prompts/tools/ast-edit.md b/packages/coding-agent/src/prompts/tools/ast-edit.md index 66bc3b0cd4..3ed480a485 100644 --- a/packages/coding-agent/src/prompts/tools/ast-edit.md +++ b/packages/coding-agent/src/prompts/tools/ast-edit.md @@ -11,6 +11,8 @@ Performs structural AST-aware rewrites via native ast-grep. - For TS declarations/methods, tolerate unknown annotations: `async function $NAME($$$ARGS): $_ { $$$BODY }` or `class $_ { method($ARG: $_): $_ { $$$BODY } }` - Delete matched code with empty `out`: `{"pat":"console.log($$$)","out":""}` - Each rewrite is a 1:1 structural substitution — cannot split one capture across multiple nodes or merge multiple captures into one +- **Regex does NOT work in the rewrite either** — both `pat` and `out` are AST. Captures reuse matched nodes (`{"pat":"console.log($MSG)","out":"logger.info($MSG)"}` carries `$MSG` through), but `|`, `.*`, `\w`, `[a-z]` are literal. For text-only replacement use `edit` +- **Dry-run, then `resolve`.** The call previews the rewrite and stages a pending action; `resolve` applies (or discards) it. No files change until you apply @@ -21,4 +23,5 @@ Performs structural AST-aware rewrites via native ast-grep. - Parse issues mean the rewrite is malformed or mis-scoped — fix the pattern before assuming a clean no-op - For one-off local text edits, you SHOULD prefer the Edit tool +- The apply re-runs the rewrite against the files at apply time and is rejected as a **stale preview** if the match set drifted since the preview (e.g. the file was edited in between) — re-run `ast_edit` to refresh, then `resolve` diff --git a/packages/coding-agent/src/prompts/tools/ast-grep.md b/packages/coding-agent/src/prompts/tools/ast-grep.md index 506c529928..582d85cd7d 100644 --- a/packages/coding-agent/src/prompts/tools/ast-grep.md +++ b/packages/coding-agent/src/prompts/tools/ast-grep.md @@ -8,9 +8,11 @@ Performs structural code search using AST matching via native ast-grep. - **Patterns match AST structure, not text** — whitespace/formatting is ignored - `$NAME` captures one node; `$_` matches one without binding; `$$$NAME` captures zero-or-more (lazy — stops at next matchable element); `$$$` matches zero-or-more without binding. Use `$$$NAME`, NOT `$$NAME` — the two-dollar form is invalid and produces a parse error - Metavariable names are UPPERCASE and must be the whole AST node — partial-text like `prefix$VAR`, `"hello $NAME"`, or `a $OP b` does NOT work; match the whole node instead +- **Regex does NOT work** — these match as literal AST and silently miss: `foo|bar` (alternation → one call per alternative), `.*`/`.+` (wildcards → `$$$` between AST fragments), `\w`/`\d`/`\s`/`\b` (escapes → `$VAR` for any identifier), `[a-z]` (class ranges → no AST equivalent). For text, alternation, or cross-language search use `search`; for filename patterns use `find` - When the same metavariable appears twice, both occurrences MUST match identical code (`$A == $A` matches `x == x`, not `x == y`) - Patterns MUST parse as a single valid AST node for the inferred target language. For method fragments or body snippets that don't parse standalone, wrap in valid context (e.g. `class $_ { … }`) - C++ qualified calls used as expression statements need the statement semicolon in the pattern: use `ns::doThing($ARG);`, `$CALLEE($ARG);`, or wrap a statement snippet. Without `;`, tree-sitter-cpp may parse `ns::doThing($ARG)` as declaration-like syntax and return no matches +- Python declarations take NO trailing colon — `def $FUNC($$$)` / `class $C($$$)`, never `def $FUNC($$$):`. Go/Rust/TS function patterns need params and a body — `func $NAME($$$) { $$$ }`, `fn $NAME($$$) { $$$ }`, `function $NAME($$$) { $$$ }`, not a bare `function $NAME` - For TS declarations/methods, tolerate unknown annotations: `async function $NAME($$$ARGS): $_ { $$$BODY }` or `class $_ { method($ARG: $_): $_ { $$$BODY } }` - Declaration forms are structurally distinct — top-level `function foo`, class method `foo()`, and `const foo = () => {}` are different AST shapes; search the right form before concluding absence - Loosest existence check: `pat: "executeBash"` with narrow `paths` @@ -26,4 +28,5 @@ Performs structural code search using AST matching via native ast-grep. - AVOID repo-root scans — narrow `paths` first - Parse issues are query failure, not evidence of absence: repair the pattern or tighten `paths` before concluding "no matches" - For broad/open-ended exploration across subsystems, you SHOULD use the Task tool with the explore subagent first +- On zero matches the tool may append a one-line hint naming the likely mistake; if the pattern is fundamentally text-shaped, stop retrying and use `search` diff --git a/packages/coding-agent/src/tools/ast-grep.ts b/packages/coding-agent/src/tools/ast-grep.ts index 6681b798c2..6eb0746505 100644 --- a/packages/coding-agent/src/tools/ast-grep.ts +++ b/packages/coding-agent/src/tools/ast-grep.ts @@ -14,6 +14,7 @@ import astGrepDescription from "../prompts/tools/ast-grep.md" with { type: "text import { Ellipsis, fileHyperlink, renderStatusLine, renderTreeList, truncateToWidth } from "../tui"; import { resolveFileDisplayMode } from "../utils/file-display-mode"; import type { ToolSession } from "."; +import { getPatternHint, inferAstLanguage } from "./ast-pattern-hints"; import { createFileRecorder, formatResultPath } from "./file-recorder"; import { classifyGroupedLines, formatGroupedFiles, groupLineIndicesByBlank } from "./grouped-file-output"; import { formatMatchLine } from "./match-line-format"; @@ -153,6 +154,18 @@ export class AstGrepTool implements AgentTool $RET { $$$ }", paths: ["src/**/*.rs"] }, + }, ]; readonly loadMode = "discoverable"; @@ -245,9 +258,11 @@ export class AstGrepTool implements AgentTool { + it("flags alternation and routes to search", () => { + const hint = getPatternHint("foo|bar", undefined); + expect(hint).toContain("alternation"); + expect(hint).toContain("search"); + }); + + it("flags wildcards, escapes, and character classes", () => { + expect(getPatternHint("a.*b", undefined)).toContain("wildcard"); + expect(getPatternHint("foo\\w", undefined)).toContain("regex escapes"); + expect(getPatternHint("name[a-z]", undefined)).toContain("character classes"); + }); + + it("stays silent for a valid AST pattern", () => { + expect(getPatternHint("console.log($$$)", undefined)).toBeUndefined(); + expect(getPatternHint("const $X = $Y", "typescript")).toBeUndefined(); + }); + + it("prefers the regex hint over the language hint", () => { + // `foo|bar` is alternation regardless of language context. + expect(getPatternHint("foo|bar", "python")).toContain("alternation"); + }); +}); + +describe("getPatternHint language-specific shape", () => { + it("flags a Python declaration with a trailing colon and shows the fix", () => { + const hint = getPatternHint("def $F($$$):", "python"); + expect(hint).toContain("trailing colon"); + expect(hint).toContain('"def $F($$$)"'); + }); + + it("flags a bare function pattern lacking params and body", () => { + expect(detectLanguageSpecificMistake("function $NAME", "typescript")).toContain("params and a body"); + expect(detectLanguageSpecificMistake("func $NAME", "go")).toContain("params and a body"); + expect(detectLanguageSpecificMistake("fn $NAME", "rust")).toContain("params and a body"); + }); + + it("stays silent without a known language or for a complete pattern", () => { + expect(detectLanguageSpecificMistake("def $F($$$):", undefined)).toBeUndefined(); + expect(detectLanguageSpecificMistake("def $F($$$)", "python")).toBeUndefined(); + }); +}); + +describe("inferAstLanguage", () => { + it("resolves a single extension from globs and files", () => { + expect(inferAstLanguage(["src/**/*.py"])).toBe("python"); + expect(inferAstLanguage(["src/worker.ts"])).toBe("typescript"); + expect(inferAstLanguage(["cmd/main.go"])).toBe("go"); + }); + + it("returns undefined when extensions disagree or are absent", () => { + expect(inferAstLanguage(["src/a.ts", "src/b.tsx"])).toBeUndefined(); + expect(inferAstLanguage(["src/"])).toBeUndefined(); + expect(inferAstLanguage([])).toBeUndefined(); + }); +}); diff --git a/packages/coding-agent/src/tools/ast-pattern-hints.ts b/packages/coding-agent/src/tools/ast-pattern-hints.ts new file mode 100644 index 0000000000..5a2ae6832a --- /dev/null +++ b/packages/coding-agent/src/tools/ast-pattern-hints.ts @@ -0,0 +1,109 @@ +/** + * Advisory hints for `ast_grep` patterns that returned zero matches. + * + * The single most common `ast_grep` failure is treating the pattern as regex + * or text — `\w`, `[a-z]`, `.*`, `foo|bar` parse as literal AST and never match. + * The second is a structurally incomplete declaration (a Python pattern with a + * trailing `:`, a bare `function $NAME` with no params/body). These detectors + * turn an empty result into a one-line nudge toward the right tool or shape. + * + * Every detector is best-effort and side-effect-free: it returns `undefined` + * when no rule matches, so the caller appends nothing rather than guessing. + */ + +/** Language tokens the language-specific detector understands. */ +type AstHintLanguage = "python" | "javascript" | "typescript" | "tsx" | "go" | "rust"; + +const EXTENSION_LANGUAGES: Record = { + py: "python", + ts: "typescript", + tsx: "tsx", + js: "javascript", + jsx: "javascript", + mjs: "javascript", + cjs: "javascript", + go: "go", + rs: "rust", +}; + +/** + * Best-effort language inference from the search paths/globs. Returns a language + * only when every path with a recognized extension agrees on it; mixed or + * unrecognized inputs yield `undefined` (the language-specific hints then stay + * silent and only the regex-misuse hints can fire). + */ +export function inferAstLanguage(paths: readonly string[]): AstHintLanguage | undefined { + let resolved: AstHintLanguage | undefined; + for (const candidate of paths) { + const ext = /\.([a-z0-9]+)$/i.exec(candidate)?.[1]?.toLowerCase(); + if (!ext) continue; + const lang = EXTENSION_LANGUAGES[ext]; + if (!lang) continue; + if (resolved && resolved !== lang) return undefined; + resolved = lang; + } + return resolved; +} + +/** Detect regex/text constructs that do not work in ast-grep patterns. */ +export function detectRegexMisuse(pattern: string): string | undefined { + const src = pattern.trim(); + + if (/\\[wWdDsSbB]/.test(src)) { + return 'Hint: "\\w", "\\d", "\\s", "\\b" are regex escapes. ast_grep matches AST nodes, not text — use $VAR for one identifier, $$$ for a node list, or the `search` tool for text.'; + } + + if (/\[[a-zA-Z0-9]-[a-zA-Z0-9]\]/.test(src)) { + return 'Hint: "[a-z]" and similar character classes are regex, not AST. Use $VAR to match any identifier, or the `search` tool for text search.'; + } + + if (!src.includes("$") && /\w\.[*+]/.test(src)) { + return 'Hint: ".*" and ".+" are regex wildcards. In ast_grep use $$$ for multiple AST nodes and $VAR for a single node; for text patterns use the `search` tool.'; + } + + if (/^[-\w.*]+\|[-\w.*|]+$/.test(src)) { + return 'Hint: "|" is regex alternation and does NOT work in ast_grep patterns. Either fire one ast_grep call per alternative, or use the `search` tool with a regex like "foo|bar".'; + } + + return undefined; +} + +/** Detect structurally incomplete declarations for a known language. */ +export function detectLanguageSpecificMistake(pattern: string, lang: AstHintLanguage | undefined): string | undefined { + if (!lang) return undefined; + const src = pattern.trim(); + + if (lang === "python") { + if (src.startsWith("class ") && src.endsWith(":")) { + return `Hint: drop the trailing colon — ast_grep patterns are not full statements. Try: "${src.slice(0, -1)}"`; + } + if ((src.startsWith("def ") || src.startsWith("async def ")) && src.endsWith(":")) { + return `Hint: drop the trailing colon — ast_grep patterns are not full statements. Try: "${src.slice(0, -1)}"`; + } + } + + if (lang === "javascript" || lang === "typescript" || lang === "tsx") { + if (/^(export\s+)?(async\s+)?function\s+\$[A-Z_]+\s*$/i.test(src)) { + return 'Hint: function patterns need params and a body. Try "function $NAME($$$) { $$$ }".'; + } + } + + if (lang === "go" && /^func\s+\$[A-Z_]+\s*$/i.test(src)) { + return 'Hint: Go function patterns need params and a body. Try "func $NAME($$$) { $$$ }".'; + } + + if (lang === "rust" && /^fn\s+\$[A-Z_]+\s*$/i.test(src)) { + return 'Hint: Rust fn patterns need params and a body. Try "fn $NAME($$$) { $$$ }".'; + } + + return undefined; +} + +/** + * Return a one-line advisory hint for a zero-match `ast_grep` pattern, or + * `undefined` when no rule applies. Regex-misuse detection runs first (it needs + * no language); the language-specific shape check runs only when `lang` is known. + */ +export function getPatternHint(pattern: string, lang: AstHintLanguage | undefined): string | undefined { + return detectRegexMisuse(pattern) ?? detectLanguageSpecificMistake(pattern, lang); +}