Skip to content

Commit 8d26315

Browse files
Bot777claude
andcommitted
feat: tiered conversation compaction inspired by Claude Code architecture
Add three-level conversation compaction system (MicroCompact/AutoCompact/Full) modeled after Claude Code's leaked context management architecture: - ToolResultBudget: age-based tool result truncation with keepRecent=5, exempt tools (MCP/agent/memory), and oversized result trimming - ConversationSummarizer: deterministic conversation summarization that extracts decisions, file paths, errors, and user instructions into compact_boundary system messages (Claude Code compatible format) - TieredCompaction: three-level strategy (micro/auto/full) that selects compaction aggressiveness based on context window pressure (60/80/95%) - CircuitBreaker: prevents infinite compaction loops after 3 consecutive failures (same bug Claude Code discovered wasting 250K API calls/day) - FileAccessTracker: tracks recently accessed files for re-injection after full compaction (5K tokens/file, 30K total budget) New API: engine.compact_messages(messages, token_budget) for conversation- level compaction. Original compress_messages() unchanged for per-message text compression. 44 new tests, 1697 total passed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 004b294 commit 8d26315

File tree

8 files changed

+1264
-0
lines changed

8 files changed

+1264
-0
lines changed

scripts/lib/fusion/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,24 @@
3535
"FusionResult",
3636
"FusionPipelineResult",
3737
]
38+
39+
# v8: Conversation-level compaction (inspired by Claude Code architecture)
40+
from claw_compactor.fusion.tool_result_budget import budget_tool_results
41+
from claw_compactor.fusion.conversation_summarizer import summarize_conversation
42+
from claw_compactor.fusion.tiered_compaction import (
43+
CompactionLevel,
44+
CircuitBreaker,
45+
FileAccessTracker,
46+
compact,
47+
determine_level,
48+
)
49+
50+
__all__ += [
51+
"budget_tool_results",
52+
"summarize_conversation",
53+
"CompactionLevel",
54+
"CircuitBreaker",
55+
"FileAccessTracker",
56+
"compact",
57+
"determine_level",
58+
]
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
"""ConversationSummarizer — LLM-free conversation turn summarization.
2+
3+
Inspired by Claude Code's AutoCompact: when a conversation exceeds a token
4+
budget, older turns are collapsed into a structured summary block.
5+
6+
Unlike Claude Code (which calls the LLM for summarization), this module uses
7+
deterministic extraction to avoid API calls and latency:
8+
- Extracts key decisions, file paths, function names, and error patterns
9+
- Preserves user instructions and system messages verbatim
10+
- Collapses assistant responses to their first sentence + action items
11+
- Collapses tool results to one-line summaries
12+
13+
The summarized turns are replaced by a single system message with subtype
14+
``compact_boundary`` (compatible with Claude Code's format) so downstream
15+
consumers can detect and handle compacted history.
16+
17+
Part of claw-compactor v8. License: MIT.
18+
"""
19+
from __future__ import annotations
20+
21+
import json
22+
import re
23+
from typing import Any
24+
25+
from claw_compactor.tokens import estimate_tokens
26+
27+
28+
# Summarization fires when total message tokens exceed this fraction of budget.
29+
DEFAULT_TRIGGER_PCT = 0.80
30+
31+
# After summarization, the summary should be at most this many tokens.
32+
MAX_SUMMARY_TOKENS = 20_000
33+
34+
# Keep the N most recent turns unsummarized (a "turn" = one user + one assistant).
35+
DEFAULT_PRESERVE_RECENT_TURNS = 4
36+
37+
# Patterns to extract from assistant messages.
38+
_FILE_PATH_RE = re.compile(r'[`"\']?(/[\w./-]+\.\w{1,10})[`"\']?')
39+
_FUNCTION_RE = re.compile(r'(?:def|function|class|fn|func)\s+(\w+)')
40+
_ERROR_RE = re.compile(r'(?:Error|Exception|FAIL|error|failed|bug)[:. ]\s*(.{10,80})')
41+
_DECISION_RE = re.compile(
42+
r'(?:decided|decision|chose|choosing|will use|going with|plan is|approach:)\s+(.{10,120})',
43+
re.IGNORECASE,
44+
)
45+
46+
47+
def summarize_conversation(
48+
messages: list[dict[str, Any]],
49+
token_budget: int = 200_000,
50+
trigger_pct: float = DEFAULT_TRIGGER_PCT,
51+
preserve_recent_turns: int = DEFAULT_PRESERVE_RECENT_TURNS,
52+
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
53+
"""Summarize older conversation turns if total tokens exceed budget threshold.
54+
55+
Parameters
56+
----------
57+
messages:
58+
OpenAI-format message list.
59+
token_budget:
60+
The context window size in tokens.
61+
trigger_pct:
62+
Fraction of token_budget at which summarization activates.
63+
preserve_recent_turns:
64+
Number of recent user+assistant turn pairs to keep verbatim.
65+
66+
Returns
67+
-------
68+
(new_messages, stats) — stats includes tokens_before, tokens_after, turns_summarized.
69+
"""
70+
total_tokens = sum(estimate_tokens(m.get("content", "") if isinstance(m.get("content"), str) else str(m.get("content", ""))) for m in messages)
71+
threshold = int(token_budget * trigger_pct)
72+
73+
stats: dict[str, Any] = {
74+
"total_tokens_before": total_tokens,
75+
"total_tokens_after": total_tokens,
76+
"turns_summarized": 0,
77+
"triggered": False,
78+
"threshold": threshold,
79+
}
80+
81+
if total_tokens < threshold:
82+
return messages, stats
83+
84+
stats["triggered"] = True
85+
86+
# Split messages into: system prefix, conversation body, recent tail.
87+
system_msgs, body_msgs, recent_msgs = _split_messages(
88+
messages, preserve_recent_turns
89+
)
90+
91+
if len(body_msgs) < 2:
92+
# Not enough to summarize.
93+
return messages, stats
94+
95+
# Build a deterministic summary of the body.
96+
summary_lines = _extract_summary(body_msgs)
97+
summary_text = "\n".join(summary_lines)
98+
99+
# Enforce MAX_SUMMARY_TOKENS.
100+
summary_tokens = estimate_tokens(summary_text)
101+
if summary_tokens > MAX_SUMMARY_TOKENS:
102+
# Truncate to budget.
103+
lines = summary_lines
104+
while estimate_tokens("\n".join(lines)) > MAX_SUMMARY_TOKENS and len(lines) > 5:
105+
lines = lines[:len(lines) - 1]
106+
summary_text = "\n".join(lines) + "\n[...truncated summary]"
107+
108+
# Build compact_boundary message.
109+
boundary_msg = _make_compact_boundary(
110+
summary_text,
111+
turns_summarized=len(body_msgs),
112+
original_tokens=sum(
113+
estimate_tokens(m.get("content", "") if isinstance(m.get("content"), str) else "")
114+
for m in body_msgs
115+
),
116+
)
117+
118+
# Reassemble.
119+
new_messages = system_msgs + [boundary_msg] + recent_msgs
120+
121+
new_total = sum(
122+
estimate_tokens(m.get("content", "") if isinstance(m.get("content"), str) else str(m.get("content", "")))
123+
for m in new_messages
124+
)
125+
stats["total_tokens_after"] = new_total
126+
stats["turns_summarized"] = len(body_msgs)
127+
128+
return new_messages, stats
129+
130+
131+
def _split_messages(
132+
messages: list[dict[str, Any]],
133+
preserve_recent_turns: int,
134+
) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]:
135+
"""Split messages into (system_prefix, compactable_body, recent_tail)."""
136+
# System messages at the start.
137+
system_msgs: list[dict[str, Any]] = []
138+
i = 0
139+
while i < len(messages) and messages[i].get("role") == "system":
140+
system_msgs.append(messages[i])
141+
i += 1
142+
143+
remaining = messages[i:]
144+
145+
# Count turns from the end (a turn = user msg followed by any non-user msgs).
146+
if preserve_recent_turns <= 0:
147+
return system_msgs, remaining, []
148+
149+
# Walk backwards counting user messages as turn boundaries.
150+
turns_found = 0
151+
split_idx = len(remaining)
152+
for j in range(len(remaining) - 1, -1, -1):
153+
if remaining[j].get("role") == "user":
154+
turns_found += 1
155+
if turns_found >= preserve_recent_turns:
156+
split_idx = j
157+
break
158+
159+
body = remaining[:split_idx]
160+
recent = remaining[split_idx:]
161+
return system_msgs, body, recent
162+
163+
164+
def _extract_summary(messages: list[dict[str, Any]]) -> list[str]:
165+
"""Extract a structured summary from a list of conversation messages."""
166+
lines: list[str] = ["## Conversation Summary (auto-compacted)"]
167+
lines.append("")
168+
169+
decisions: list[str] = []
170+
files_mentioned: set[str] = set()
171+
functions_mentioned: set[str] = set()
172+
errors: list[str] = []
173+
user_instructions: list[str] = []
174+
actions_taken: list[str] = []
175+
176+
for msg in messages:
177+
role = msg.get("role", "")
178+
content = msg.get("content", "")
179+
if not isinstance(content, str):
180+
content = str(content)
181+
182+
if role == "user":
183+
# Preserve user instructions (first 200 chars each).
184+
trimmed = content.strip()[:200]
185+
if trimmed:
186+
user_instructions.append(trimmed)
187+
188+
elif role == "assistant":
189+
# Extract decisions.
190+
for m in _DECISION_RE.finditer(content):
191+
decisions.append(m.group(1).strip())
192+
# Extract first sentence as action summary.
193+
first_sentence = content.split("\n")[0][:150].strip()
194+
if first_sentence:
195+
actions_taken.append(first_sentence)
196+
197+
elif role == "tool":
198+
# One-line summary.
199+
tool_name = msg.get("name", "tool")
200+
token_count = estimate_tokens(content)
201+
actions_taken.append(f"[{tool_name}: {token_count} tokens]")
202+
203+
# Extract file paths, functions, errors from any role.
204+
files_mentioned.update(_FILE_PATH_RE.findall(content))
205+
functions_mentioned.update(_FUNCTION_RE.findall(content))
206+
for m in _ERROR_RE.finditer(content):
207+
errors.append(m.group(1).strip()[:100])
208+
209+
# Build summary sections.
210+
if user_instructions:
211+
lines.append("### User Instructions")
212+
for instr in user_instructions[-10:]: # cap at 10
213+
lines.append(f"- {instr}")
214+
lines.append("")
215+
216+
if decisions:
217+
lines.append("### Key Decisions")
218+
for d in decisions[-10:]:
219+
lines.append(f"- {d}")
220+
lines.append("")
221+
222+
if actions_taken:
223+
lines.append("### Actions Taken")
224+
for a in actions_taken[-15:]:
225+
lines.append(f"- {a}")
226+
lines.append("")
227+
228+
if files_mentioned:
229+
lines.append("### Files Referenced")
230+
for f in sorted(files_mentioned)[:20]:
231+
lines.append(f"- `{f}`")
232+
lines.append("")
233+
234+
if errors:
235+
lines.append("### Errors Encountered")
236+
for e in errors[-5:]:
237+
lines.append(f"- {e}")
238+
lines.append("")
239+
240+
return lines
241+
242+
243+
def _make_compact_boundary(
244+
summary: str,
245+
turns_summarized: int,
246+
original_tokens: int,
247+
) -> dict[str, Any]:
248+
"""Create a compact_boundary system message (Claude Code compatible format)."""
249+
return {
250+
"role": "system",
251+
"content": json.dumps({
252+
"type": "system",
253+
"subtype": "compact_boundary",
254+
"summary": summary,
255+
"compactMetadata": {
256+
"turnsSummarized": turns_summarized,
257+
"originalTokens": original_tokens,
258+
"compressedTokens": estimate_tokens(summary),
259+
"preservedSegment": True,
260+
},
261+
}),
262+
}

scripts/lib/fusion/engine.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,3 +607,61 @@ def _aggregate_stats(
607607
"total_timing_ms": round(timing_ms, 3),
608608
"message_count": message_count,
609609
}
610+
611+
612+
# ---------------------------------------------------------------------------
613+
# v8: Conversation-level compaction API
614+
# ---------------------------------------------------------------------------
615+
616+
from claw_compactor.fusion.tiered_compaction import (
617+
CompactionLevel,
618+
CircuitBreaker,
619+
FileAccessTracker,
620+
compact as _tiered_compact,
621+
)
622+
623+
# Re-export for convenience.
624+
FusionEngine.CompactionLevel = CompactionLevel # type: ignore[attr-defined]
625+
626+
627+
def _compact_messages_method(
628+
self,
629+
messages: list[dict[str, Any]],
630+
token_budget: int = 200_000,
631+
level: CompactionLevel | None = None,
632+
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
633+
"""Apply tiered compaction to a message list.
634+
635+
Combines tool result budgeting, conversation summarization, and
636+
per-message Fusion Pipeline compression based on context pressure.
637+
638+
Parameters
639+
----------
640+
messages:
641+
OpenAI-format message list.
642+
token_budget:
643+
Context window size in tokens.
644+
level:
645+
Force a specific compaction level. If None, auto-detected.
646+
647+
Returns
648+
-------
649+
(compacted_messages, stats)
650+
"""
651+
if not hasattr(self, "_circuit_breaker"):
652+
self._circuit_breaker = CircuitBreaker()
653+
if not hasattr(self, "_file_tracker"):
654+
self._file_tracker = FileAccessTracker()
655+
656+
return _tiered_compact(
657+
messages=messages,
658+
token_budget=token_budget,
659+
circuit_breaker=self._circuit_breaker,
660+
file_tracker=self._file_tracker,
661+
fusion_engine=self,
662+
level_override=level,
663+
)
664+
665+
666+
# Monkey-patch onto FusionEngine (avoids modifying the class definition above).
667+
FusionEngine.compact_messages = _compact_messages_method # type: ignore[attr-defined]

0 commit comments

Comments
 (0)