|
| 1 | +"""ConversationSummarizer — LLM-free conversation turn summarization. |
| 2 | +
|
| 3 | +Inspired by Claude Code's AutoCompact: when a conversation exceeds a token |
| 4 | +budget, older turns are collapsed into a structured summary block. |
| 5 | +
|
| 6 | +Unlike Claude Code (which calls the LLM for summarization), this module uses |
| 7 | +deterministic extraction to avoid API calls and latency: |
| 8 | + - Extracts key decisions, file paths, function names, and error patterns |
| 9 | + - Preserves user instructions and system messages verbatim |
| 10 | + - Collapses assistant responses to their first sentence + action items |
| 11 | + - Collapses tool results to one-line summaries |
| 12 | +
|
| 13 | +The summarized turns are replaced by a single system message with subtype |
| 14 | +``compact_boundary`` (compatible with Claude Code's format) so downstream |
| 15 | +consumers can detect and handle compacted history. |
| 16 | +
|
| 17 | +Part of claw-compactor v8. License: MIT. |
| 18 | +""" |
| 19 | +from __future__ import annotations |
| 20 | + |
| 21 | +import json |
| 22 | +import re |
| 23 | +from typing import Any |
| 24 | + |
| 25 | +from claw_compactor.tokens import estimate_tokens |
| 26 | + |
| 27 | + |
| 28 | +# Summarization fires when total message tokens exceed this fraction of budget. |
| 29 | +DEFAULT_TRIGGER_PCT = 0.80 |
| 30 | + |
| 31 | +# After summarization, the summary should be at most this many tokens. |
| 32 | +MAX_SUMMARY_TOKENS = 20_000 |
| 33 | + |
| 34 | +# Keep the N most recent turns unsummarized (a "turn" = one user + one assistant). |
| 35 | +DEFAULT_PRESERVE_RECENT_TURNS = 4 |
| 36 | + |
| 37 | +# Patterns to extract from assistant messages. |
| 38 | +_FILE_PATH_RE = re.compile(r'[`"\']?(/[\w./-]+\.\w{1,10})[`"\']?') |
| 39 | +_FUNCTION_RE = re.compile(r'(?:def|function|class|fn|func)\s+(\w+)') |
| 40 | +_ERROR_RE = re.compile(r'(?:Error|Exception|FAIL|error|failed|bug)[:. ]\s*(.{10,80})') |
| 41 | +_DECISION_RE = re.compile( |
| 42 | + r'(?:decided|decision|chose|choosing|will use|going with|plan is|approach:)\s+(.{10,120})', |
| 43 | + re.IGNORECASE, |
| 44 | +) |
| 45 | + |
| 46 | + |
| 47 | +def summarize_conversation( |
| 48 | + messages: list[dict[str, Any]], |
| 49 | + token_budget: int = 200_000, |
| 50 | + trigger_pct: float = DEFAULT_TRIGGER_PCT, |
| 51 | + preserve_recent_turns: int = DEFAULT_PRESERVE_RECENT_TURNS, |
| 52 | +) -> tuple[list[dict[str, Any]], dict[str, Any]]: |
| 53 | + """Summarize older conversation turns if total tokens exceed budget threshold. |
| 54 | +
|
| 55 | + Parameters |
| 56 | + ---------- |
| 57 | + messages: |
| 58 | + OpenAI-format message list. |
| 59 | + token_budget: |
| 60 | + The context window size in tokens. |
| 61 | + trigger_pct: |
| 62 | + Fraction of token_budget at which summarization activates. |
| 63 | + preserve_recent_turns: |
| 64 | + Number of recent user+assistant turn pairs to keep verbatim. |
| 65 | +
|
| 66 | + Returns |
| 67 | + ------- |
| 68 | + (new_messages, stats) — stats includes tokens_before, tokens_after, turns_summarized. |
| 69 | + """ |
| 70 | + total_tokens = sum(estimate_tokens(m.get("content", "") if isinstance(m.get("content"), str) else str(m.get("content", ""))) for m in messages) |
| 71 | + threshold = int(token_budget * trigger_pct) |
| 72 | + |
| 73 | + stats: dict[str, Any] = { |
| 74 | + "total_tokens_before": total_tokens, |
| 75 | + "total_tokens_after": total_tokens, |
| 76 | + "turns_summarized": 0, |
| 77 | + "triggered": False, |
| 78 | + "threshold": threshold, |
| 79 | + } |
| 80 | + |
| 81 | + if total_tokens < threshold: |
| 82 | + return messages, stats |
| 83 | + |
| 84 | + stats["triggered"] = True |
| 85 | + |
| 86 | + # Split messages into: system prefix, conversation body, recent tail. |
| 87 | + system_msgs, body_msgs, recent_msgs = _split_messages( |
| 88 | + messages, preserve_recent_turns |
| 89 | + ) |
| 90 | + |
| 91 | + if len(body_msgs) < 2: |
| 92 | + # Not enough to summarize. |
| 93 | + return messages, stats |
| 94 | + |
| 95 | + # Build a deterministic summary of the body. |
| 96 | + summary_lines = _extract_summary(body_msgs) |
| 97 | + summary_text = "\n".join(summary_lines) |
| 98 | + |
| 99 | + # Enforce MAX_SUMMARY_TOKENS. |
| 100 | + summary_tokens = estimate_tokens(summary_text) |
| 101 | + if summary_tokens > MAX_SUMMARY_TOKENS: |
| 102 | + # Truncate to budget. |
| 103 | + lines = summary_lines |
| 104 | + while estimate_tokens("\n".join(lines)) > MAX_SUMMARY_TOKENS and len(lines) > 5: |
| 105 | + lines = lines[:len(lines) - 1] |
| 106 | + summary_text = "\n".join(lines) + "\n[...truncated summary]" |
| 107 | + |
| 108 | + # Build compact_boundary message. |
| 109 | + boundary_msg = _make_compact_boundary( |
| 110 | + summary_text, |
| 111 | + turns_summarized=len(body_msgs), |
| 112 | + original_tokens=sum( |
| 113 | + estimate_tokens(m.get("content", "") if isinstance(m.get("content"), str) else "") |
| 114 | + for m in body_msgs |
| 115 | + ), |
| 116 | + ) |
| 117 | + |
| 118 | + # Reassemble. |
| 119 | + new_messages = system_msgs + [boundary_msg] + recent_msgs |
| 120 | + |
| 121 | + new_total = sum( |
| 122 | + estimate_tokens(m.get("content", "") if isinstance(m.get("content"), str) else str(m.get("content", ""))) |
| 123 | + for m in new_messages |
| 124 | + ) |
| 125 | + stats["total_tokens_after"] = new_total |
| 126 | + stats["turns_summarized"] = len(body_msgs) |
| 127 | + |
| 128 | + return new_messages, stats |
| 129 | + |
| 130 | + |
| 131 | +def _split_messages( |
| 132 | + messages: list[dict[str, Any]], |
| 133 | + preserve_recent_turns: int, |
| 134 | +) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]: |
| 135 | + """Split messages into (system_prefix, compactable_body, recent_tail).""" |
| 136 | + # System messages at the start. |
| 137 | + system_msgs: list[dict[str, Any]] = [] |
| 138 | + i = 0 |
| 139 | + while i < len(messages) and messages[i].get("role") == "system": |
| 140 | + system_msgs.append(messages[i]) |
| 141 | + i += 1 |
| 142 | + |
| 143 | + remaining = messages[i:] |
| 144 | + |
| 145 | + # Count turns from the end (a turn = user msg followed by any non-user msgs). |
| 146 | + if preserve_recent_turns <= 0: |
| 147 | + return system_msgs, remaining, [] |
| 148 | + |
| 149 | + # Walk backwards counting user messages as turn boundaries. |
| 150 | + turns_found = 0 |
| 151 | + split_idx = len(remaining) |
| 152 | + for j in range(len(remaining) - 1, -1, -1): |
| 153 | + if remaining[j].get("role") == "user": |
| 154 | + turns_found += 1 |
| 155 | + if turns_found >= preserve_recent_turns: |
| 156 | + split_idx = j |
| 157 | + break |
| 158 | + |
| 159 | + body = remaining[:split_idx] |
| 160 | + recent = remaining[split_idx:] |
| 161 | + return system_msgs, body, recent |
| 162 | + |
| 163 | + |
| 164 | +def _extract_summary(messages: list[dict[str, Any]]) -> list[str]: |
| 165 | + """Extract a structured summary from a list of conversation messages.""" |
| 166 | + lines: list[str] = ["## Conversation Summary (auto-compacted)"] |
| 167 | + lines.append("") |
| 168 | + |
| 169 | + decisions: list[str] = [] |
| 170 | + files_mentioned: set[str] = set() |
| 171 | + functions_mentioned: set[str] = set() |
| 172 | + errors: list[str] = [] |
| 173 | + user_instructions: list[str] = [] |
| 174 | + actions_taken: list[str] = [] |
| 175 | + |
| 176 | + for msg in messages: |
| 177 | + role = msg.get("role", "") |
| 178 | + content = msg.get("content", "") |
| 179 | + if not isinstance(content, str): |
| 180 | + content = str(content) |
| 181 | + |
| 182 | + if role == "user": |
| 183 | + # Preserve user instructions (first 200 chars each). |
| 184 | + trimmed = content.strip()[:200] |
| 185 | + if trimmed: |
| 186 | + user_instructions.append(trimmed) |
| 187 | + |
| 188 | + elif role == "assistant": |
| 189 | + # Extract decisions. |
| 190 | + for m in _DECISION_RE.finditer(content): |
| 191 | + decisions.append(m.group(1).strip()) |
| 192 | + # Extract first sentence as action summary. |
| 193 | + first_sentence = content.split("\n")[0][:150].strip() |
| 194 | + if first_sentence: |
| 195 | + actions_taken.append(first_sentence) |
| 196 | + |
| 197 | + elif role == "tool": |
| 198 | + # One-line summary. |
| 199 | + tool_name = msg.get("name", "tool") |
| 200 | + token_count = estimate_tokens(content) |
| 201 | + actions_taken.append(f"[{tool_name}: {token_count} tokens]") |
| 202 | + |
| 203 | + # Extract file paths, functions, errors from any role. |
| 204 | + files_mentioned.update(_FILE_PATH_RE.findall(content)) |
| 205 | + functions_mentioned.update(_FUNCTION_RE.findall(content)) |
| 206 | + for m in _ERROR_RE.finditer(content): |
| 207 | + errors.append(m.group(1).strip()[:100]) |
| 208 | + |
| 209 | + # Build summary sections. |
| 210 | + if user_instructions: |
| 211 | + lines.append("### User Instructions") |
| 212 | + for instr in user_instructions[-10:]: # cap at 10 |
| 213 | + lines.append(f"- {instr}") |
| 214 | + lines.append("") |
| 215 | + |
| 216 | + if decisions: |
| 217 | + lines.append("### Key Decisions") |
| 218 | + for d in decisions[-10:]: |
| 219 | + lines.append(f"- {d}") |
| 220 | + lines.append("") |
| 221 | + |
| 222 | + if actions_taken: |
| 223 | + lines.append("### Actions Taken") |
| 224 | + for a in actions_taken[-15:]: |
| 225 | + lines.append(f"- {a}") |
| 226 | + lines.append("") |
| 227 | + |
| 228 | + if files_mentioned: |
| 229 | + lines.append("### Files Referenced") |
| 230 | + for f in sorted(files_mentioned)[:20]: |
| 231 | + lines.append(f"- `{f}`") |
| 232 | + lines.append("") |
| 233 | + |
| 234 | + if errors: |
| 235 | + lines.append("### Errors Encountered") |
| 236 | + for e in errors[-5:]: |
| 237 | + lines.append(f"- {e}") |
| 238 | + lines.append("") |
| 239 | + |
| 240 | + return lines |
| 241 | + |
| 242 | + |
| 243 | +def _make_compact_boundary( |
| 244 | + summary: str, |
| 245 | + turns_summarized: int, |
| 246 | + original_tokens: int, |
| 247 | +) -> dict[str, Any]: |
| 248 | + """Create a compact_boundary system message (Claude Code compatible format).""" |
| 249 | + return { |
| 250 | + "role": "system", |
| 251 | + "content": json.dumps({ |
| 252 | + "type": "system", |
| 253 | + "subtype": "compact_boundary", |
| 254 | + "summary": summary, |
| 255 | + "compactMetadata": { |
| 256 | + "turnsSummarized": turns_summarized, |
| 257 | + "originalTokens": original_tokens, |
| 258 | + "compressedTokens": estimate_tokens(summary), |
| 259 | + "preservedSegment": True, |
| 260 | + }, |
| 261 | + }), |
| 262 | + } |
0 commit comments