Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .claude/skills/erk-exec/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Quick reference for all `erk exec` subcommands.
| `setup-impl` | Consolidated implementation setup. |
| `setup-impl-from-pr` | Set up .erk/impl-context/ folder from GitHub PR in current worktree. |
| `store-tripwire-candidates` | Store tripwire candidates as a metadata comment on a plan. |
| `summarize-impl-failure` | Summarize an implementation failure using Haiku. |
| `track-learn-evaluation` | Track learn evaluation completion on a plan. |
| `track-learn-result` | Track learn workflow result on a plan. |
| `update-issue-body` | Update an issue's body using REST API (avoids GraphQL rate limits). |
Expand Down Expand Up @@ -1297,6 +1298,20 @@ Store tripwire candidates as a metadata comment on a plan.
| `--pr-number` | INTEGER | Yes | Sentinel.UNSET | PR number |
| `--candidates-file` | TEXT | Yes | Sentinel.UNSET | Path to tripwire-candidates.json |

### summarize-impl-failure

Summarize an implementation failure using Haiku.

**Usage:** `erk exec summarize-impl-failure`

**Options:**

| Flag | Type | Required | Default | Description |
| ---------------- | ------- | -------- | -------------- | -------------------------- |
| `--session-file` | PATH | Yes | Sentinel.UNSET | Path to session JSONL file |
| `--pr-number` | INTEGER | Yes | Sentinel.UNSET | PR number |
| `--exit-code` | INTEGER | No | - | Exit code |

### track-learn-evaluation

Track learn evaluation completion on a plan.
Expand Down
22 changes: 22 additions & 0 deletions .github/prompts/impl-failure-summarize.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Analyze this implementation session log and produce a failure diagnosis.

Focus on:

1. What was the agent doing when it stopped (which file, which task)
2. Did it encounter an error, or did it just stop mid-task
3. What specific error messages or failures appeared
4. Which files or operations were involved

Rules:

- 3-7 concise bullet points
- Use backticks for file paths, commands, and error messages
- Do NOT suggest fixes
- Do NOT include session IDs, timestamps, or GitHub URLs
- If the session is too short to analyze, say so

## Exit Code: {{ EXIT_CODE }}

## Session tail (last entries):

{{ SESSION_TAIL }}
16 changes: 16 additions & 0 deletions .github/workflows/plan-implement.yml
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,22 @@ jobs:
"last_remote_impl_session_id=$SESSION_ID" \
"branch_name=$BRANCH_NAME"

- name: Summarize implementation failure
if: steps.implement.outputs.implementation_success != 'true' && steps.session.outputs.session_file
continue-on-error: true
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GH_TOKEN: ${{ github.token }}
SESSION_FILE: ${{ steps.session.outputs.session_file }}
PR_NUMBER: ${{ inputs.pr_number }}
EXIT_CODE: ${{ steps.implement.outputs.exit_code }}
run: |
SUMMARY=$(erk exec summarize-impl-failure \
--session-file "$SESSION_FILE" \
--pr-number "$PR_NUMBER" \
--exit-code "$EXIT_CODE")
echo "$SUMMARY" >> "$GITHUB_STEP_SUMMARY"

- name: Handle implementation outcome
id: handle_outcome
if: steps.implement.outputs.implementation_success == 'true'
Expand Down
4 changes: 4 additions & 0 deletions src/erk/cli/commands/exec/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@
from erk.cli.commands.exec.scripts.store_tripwire_candidates import (
store_tripwire_candidates,
)
from erk.cli.commands.exec.scripts.summarize_impl_failure import (
summarize_impl_failure,
)
from erk.cli.commands.exec.scripts.track_learn_evaluation import (
track_learn_evaluation,
)
Expand Down Expand Up @@ -270,6 +273,7 @@ def exec_group() -> None:
exec_group.add_command(setup_impl, name="setup-impl")
exec_group.add_command(setup_impl_from_pr, name="setup-impl-from-pr")
exec_group.add_command(store_tripwire_candidates, name="store-tripwire-candidates")
exec_group.add_command(summarize_impl_failure, name="summarize-impl-failure")
exec_group.add_command(track_learn_evaluation, name="track-learn-evaluation")
exec_group.add_command(track_learn_result, name="track-learn-result")
exec_group.add_command(update_issue_body, name="update-issue-body")
Expand Down
243 changes: 243 additions & 0 deletions src/erk/cli/commands/exec/scripts/summarize_impl_failure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
"""Summarize implementation failures using Haiku.

Reads a raw session JSONL file, extracts the tail entries, sends them
to Claude Haiku for failure diagnosis, posts the summary as a PR comment,
and prints the summary to stdout for use as a GitHub Actions job summary.

Usage:
erk exec summarize-impl-failure \
--session-file /path/to/session.jsonl --pr-number 42
erk exec summarize-impl-failure \
--session-file /path/to/session.jsonl --pr-number 42 \
--exit-code 1

Output:
Markdown failure summary to stdout (for GITHUB_STEP_SUMMARY)

Exit Codes:
0: Always (diagnostic tool, never blocks workflow)
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path

import click

from erk.artifacts.paths import get_bundled_github_dir
from erk_shared.context.helpers import require_cwd, require_prompt_executor
from erk_shared.learn.extraction.session_preprocessing import (
deduplicate_assistant_messages,
generate_compressed_xml,
process_log_content,
)
from erk_shared.subprocess_utils import run_subprocess_with_context


@dataclass(frozen=True)
class SessionTail:
"""Extracted tail of a session JSONL file."""

total_events: int
last_entries_xml: str
has_result_event: bool


def _extract_session_tail(session_file: Path, *, max_entries: int) -> SessionTail | None:
"""Read JSONL session file and extract the last N entries as compressed XML.

Uses the erk_shared session preprocessing pipeline (Stage 1 mechanical
reduction) — same as the learn sessions workflow.

Args:
session_file: Path to session JSONL file
max_entries: Maximum number of entries to include from the tail

Returns:
SessionTail with compressed XML, or None if file is empty/unreadable
"""
if not session_file.exists():
return None

text = session_file.read_text(encoding="utf-8")
if not text.strip():
return None

# Stage 1: mechanical reduction via erk_shared pipeline
reduced_entries, total_entries, _ = process_log_content(text)
if not reduced_entries:
return None

# Deduplicate assistant messages (deterministic)
reduced_entries = deduplicate_assistant_messages(reduced_entries)

# Take the tail for failure diagnosis
tail_entries = reduced_entries[-max_entries:]

# Check if session has a result event (indicates natural completion)
has_result_event = any(entry.get("type") == "result" for entry in tail_entries)

# Generate XML via erk_shared pipeline
last_entries_xml = generate_compressed_xml(tail_entries)

return SessionTail(
total_events=total_entries,
last_entries_xml=last_entries_xml,
has_result_event=has_result_event,
)


def _build_failure_prompt(
*,
session_tail: SessionTail,
exit_code: int | None,
prompts_dir: Path,
) -> str:
"""Build the failure diagnosis prompt from template with variable substitution.

Args:
session_tail: Extracted session tail data
exit_code: Process exit code, or None if unknown
prompts_dir: Path to directory containing prompts/ subdirectory

Returns:
Prompt string for Haiku
"""
exit_code_str = str(exit_code) if exit_code is not None else "unknown"

template_path = prompts_dir / "prompts" / "impl-failure-summarize.md"
if template_path.exists():
template = template_path.read_text(encoding="utf-8")
result = template.replace("{{ EXIT_CODE }}", exit_code_str)
result = result.replace("{{ SESSION_TAIL }}", session_tail.last_entries_xml)
return result

return (
f"Analyze this implementation session that failed with exit code {exit_code_str}. "
f"What was the agent doing when it stopped? Did it encounter an error?\n\n"
f"{session_tail.last_entries_xml}"
)


def _build_comment_body(*, summary: str, exit_code: int | None, total_events: int) -> str:
"""Build PR comment body from failure summary.

Args:
summary: Haiku-generated summary text
exit_code: Process exit code, or None if unknown
total_events: Total session events count

Returns:
Markdown comment body
"""
exit_code_str = str(exit_code) if exit_code is not None else "unknown"
lines = [
"## Implementation Failure Summary",
"",
f"**Exit code:** {exit_code_str} | **Session events:** {total_events}",
"",
summary,
]
return "\n".join(lines)


def _post_failure_comment(*, pr_number: int, comment_body: str, cwd: Path) -> None:
"""Post failure summary as a PR comment.

Args:
pr_number: PR number to comment on
comment_body: Markdown comment body
cwd: Repository root directory
"""
run_subprocess_with_context(
cmd=[
"gh",
"pr",
"comment",
str(pr_number),
"--body",
comment_body,
],
operation_context=f"post impl failure summary on PR #{pr_number}",
cwd=cwd,
check=False,
)


@click.command(name="summarize-impl-failure")
@click.option(
"--session-file",
required=True,
type=click.Path(path_type=Path),
help="Path to session JSONL file",
)
@click.option("--pr-number", required=True, type=int, help="PR number")
@click.option("--exit-code", type=int, help="Exit code")
@click.pass_context
def summarize_impl_failure(
ctx: click.Context,
*,
session_file: Path,
pr_number: int,
exit_code: int | None,
) -> None:
"""Summarize an implementation failure using Haiku.

Reads the session JSONL, extracts the tail, sends it to Haiku for
diagnosis, posts the summary as a PR comment, and prints the markdown
to stdout for use as GITHUB_STEP_SUMMARY.

Always exits 0 — this is a diagnostic tool that should never block
the workflow.
"""
cwd = require_cwd(ctx)
executor = require_prompt_executor(ctx)

# Extract session tail
session_tail = _extract_session_tail(session_file, max_entries=50)
if session_tail is None:
minimal = "Session file is empty or not found — unable to analyze failure."
comment_body = _build_comment_body(
summary=minimal,
exit_code=exit_code,
total_events=0,
)
_post_failure_comment(pr_number=pr_number, comment_body=comment_body, cwd=cwd)
click.echo(comment_body)
return

# Build prompt and call Haiku
prompts_dir = get_bundled_github_dir()
prompt = _build_failure_prompt(
session_tail=session_tail,
exit_code=exit_code,
prompts_dir=prompts_dir,
)

prompt_result = executor.execute_prompt(
prompt,
model="claude-haiku-4-5-20251001",
tools=None,
cwd=cwd,
system_prompt=None,
dangerous=False,
)

if prompt_result.success and prompt_result.output and prompt_result.output.strip():
summary = prompt_result.output.strip()
else:
summary = "(Failure summarization was unable to produce a diagnosis.)"

# Build comment body
comment_body = _build_comment_body(
summary=summary,
exit_code=exit_code,
total_events=session_tail.total_events,
)

# Post to PR
_post_failure_comment(pr_number=pr_number, comment_body=comment_body, cwd=cwd)

# Print to stdout for GITHUB_STEP_SUMMARY
click.echo(comment_body)
Loading