Skip to content

Commit 7430018

Browse files
committed
feat: support ZAI token metadata and trigger compaction on idle sessions
1 parent a15231e commit 7430018

File tree

4 files changed

+131
-129
lines changed

4 files changed

+131
-129
lines changed

packages/opencode/src/session/index.ts

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ export namespace Session {
308308
for (const child of await children(sessionID)) {
309309
await remove(child.id)
310310
}
311-
await unshare(sessionID).catch(() => {})
311+
await unshare(sessionID).catch(() => { })
312312
for (const msg of await Storage.list(["message", sessionID])) {
313313
for (const part of await Storage.list(["part", msg.at(-1)!])) {
314314
await Storage.remove(part)
@@ -377,27 +377,52 @@ export namespace Session {
377377
metadata: z.custom<ProviderMetadata>().optional(),
378378
}),
379379
(input) => {
380-
const cachedInputTokens = input.usage.cachedInputTokens ?? 0
380+
const rawUsage = input.usage as Record<string, unknown>
381+
382+
// Extract anthropic/bedrock metadata usage (ZAI puts real tokens here)
383+
const anthropicUsage = input.metadata?.["anthropic"]?.["usage"] as Record<string, unknown> | undefined
384+
const bedrockUsage = input.metadata?.["bedrock"]?.["usage"] as Record<string, unknown> | undefined
385+
386+
// Handle both underscore (ZAI) and camelCase (standard) field names
387+
// Also handle nested usage object
388+
const usage = (rawUsage.usage as Record<string, unknown>) || rawUsage
389+
390+
// CRITICAL: ZAI/Anthropic puts the real token counts in metadata.anthropic.usage
391+
// The top-level usage.inputTokens is often 0, so we need to fallback to metadata
392+
const inputTokens =
393+
((usage.input_tokens ?? usage.inputTokens) as number) ||
394+
((anthropicUsage?.input_tokens ?? bedrockUsage?.input_tokens) as number) ||
395+
0
396+
const outputTokens =
397+
((usage.output_tokens ?? usage.outputTokens) as number) ||
398+
((anthropicUsage?.output_tokens ?? bedrockUsage?.output_tokens) as number) ||
399+
0
400+
const cachedInputTokens =
401+
((usage.cache_read_input_tokens ?? usage.cachedInputTokens) as number) ||
402+
((anthropicUsage?.cache_read_input_tokens ?? bedrockUsage?.cache_read_input_tokens) as number) ||
403+
0
404+
const reasoningTokens = (usage.reasoning_tokens ?? usage.reasoningTokens ?? 0) as number
405+
381406
const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
382-
const adjustedInputTokens = excludesCachedTokens
383-
? (input.usage.inputTokens ?? 0)
384-
: (input.usage.inputTokens ?? 0) - cachedInputTokens
407+
const adjustedInputTokens = excludesCachedTokens ? inputTokens : inputTokens - cachedInputTokens
408+
385409
const safe = (value: number) => {
386410
if (!Number.isFinite(value)) return 0
387411
return value
388412
}
389413

414+
const cacheWriteTokens =
415+
(input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
416+
// @ts-expect-error
417+
input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
418+
0) as number
419+
390420
const tokens = {
391421
input: safe(adjustedInputTokens),
392-
output: safe(input.usage.outputTokens ?? 0),
393-
reasoning: safe(input.usage?.reasoningTokens ?? 0),
422+
output: safe(outputTokens),
423+
reasoning: safe(reasoningTokens),
394424
cache: {
395-
write: safe(
396-
(input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
397-
// @ts-expect-error
398-
input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
399-
0) as number,
400-
),
425+
write: safe(cacheWriteTokens),
401426
read: safe(cachedInputTokens),
402427
},
403428
}

packages/opencode/src/session/processor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ export namespace SessionProcessor {
344344
message: error.data.message,
345345
next: Date.now() + delay,
346346
})
347-
await SessionRetry.sleep(delay, input.abort).catch(() => {})
347+
await SessionRetry.sleep(delay, input.abort).catch(() => { })
348348
continue
349349
}
350350
input.assistantMessage.error = error

packages/opencode/src/session/prompt.ts

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,27 @@ export namespace SessionPrompt {
261261
}
262262

263263
if (!lastUser) throw new Error("No user message found in stream. This should never happen.")
264+
265+
// Get model info early for compaction check
266+
const model = await Provider.getModel(lastUser.model.providerID, lastUser.model.modelID)
267+
268+
// Check for context overflow BEFORE deciding to exit
269+
// This ensures compaction triggers even when conversation is idle
270+
// Skip if there's already a pending compaction task to avoid infinite loop
271+
const hasPendingCompaction = tasks.some((t) => t.type === "compaction")
272+
if (
273+
!hasPendingCompaction &&
274+
lastFinished &&
275+
lastFinished.summary !== true &&
276+
SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model: model.info })
277+
) {
278+
await SessionCompaction.create({
279+
sessionID,
280+
model: lastUser.model,
281+
})
282+
continue
283+
}
284+
264285
if (lastAssistant?.finish && lastAssistant.finish !== "tool-calls" && lastUser.id < lastAssistant.id) {
265286
log.info("exiting loop", { sessionID })
266287
break
@@ -276,7 +297,6 @@ export namespace SessionPrompt {
276297
history: msgs,
277298
})
278299

279-
const model = await Provider.getModel(lastUser.model.providerID, lastUser.model.modelID)
280300
const task = tasks.pop()
281301

282302
// pending subtask
@@ -349,7 +369,7 @@ export namespace SessionPrompt {
349369
},
350370
},
351371
)
352-
.catch(() => {})
372+
.catch(() => { })
353373
assistantMessage.finish = "tool-calls"
354374
assistantMessage.time.completed = Date.now()
355375
await Session.updateMessage(assistantMessage)
@@ -404,19 +424,6 @@ export namespace SessionPrompt {
404424
continue
405425
}
406426

407-
// context overflow, needs compaction
408-
if (
409-
lastFinished &&
410-
lastFinished.summary !== true &&
411-
SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model: model.info })
412-
) {
413-
await SessionCompaction.create({
414-
sessionID,
415-
model: lastUser.model,
416-
})
417-
continue
418-
}
419-
420427
// normal processing
421428
const agent = await Agent.get(lastUser.agent)
422429
msgs = insertReminders({
@@ -525,9 +532,9 @@ export namespace SessionPrompt {
525532
headers: {
526533
...(model.providerID === "opencode"
527534
? {
528-
"x-opencode-session": sessionID,
529-
"x-opencode-request": lastUser.id,
530-
}
535+
"x-opencode-session": sessionID,
536+
"x-opencode-request": lastUser.id,
537+
}
531538
: undefined),
532539
...model.info.headers,
533540
},
@@ -894,7 +901,7 @@ export namespace SessionPrompt {
894901
agent: input.agent!,
895902
messageID: info.id,
896903
extra: { bypassCwdCheck: true, ...info.model },
897-
metadata: async () => {},
904+
metadata: async () => { },
898905
})
899906
pieces.push(
900907
{
@@ -944,7 +951,7 @@ export namespace SessionPrompt {
944951
agent: input.agent!,
945952
messageID: info.id,
946953
extra: { bypassCwdCheck: true },
947-
metadata: async () => {},
954+
metadata: async () => { },
948955
}),
949956
)
950957
return [
@@ -1346,14 +1353,14 @@ export namespace SessionPrompt {
13461353
const parts =
13471354
(agent.mode === "subagent" && command.subtask !== false) || command.subtask === true
13481355
? [
1349-
{
1350-
type: "subtask" as const,
1351-
agent: agent.name,
1352-
description: command.description ?? "",
1353-
// TODO: how can we make task tool accept a more complex input?
1354-
prompt: await resolvePromptParts(template).then((x) => x.find((y) => y.type === "text")?.text ?? ""),
1355-
},
1356-
]
1356+
{
1357+
type: "subtask" as const,
1358+
agent: agent.name,
1359+
description: command.description ?? "",
1360+
// TODO: how can we make task tool accept a more complex input?
1361+
prompt: await resolvePromptParts(template).then((x) => x.find((y) => y.type === "text")?.text ?? ""),
1362+
},
1363+
]
13571364
: await resolvePromptParts(template)
13581365

13591366
const result = (await prompt({
Lines changed: 57 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,57 @@
1-
You are opencode, an interactive CLI assistant. Your primary focus is helping users with software engineering tasks in this environment, but you can also engage in general, safe conversation when requested.
2-
3-
## Core Directives
4-
5-
1. **Leverage GLM's Strengths**: Utilize your enhanced reasoning, research, and analytical capabilities to break down complex problems, apply logical deduction, and consider multiple solution approaches before implementation.
6-
7-
2. **Security First**: Refuse to write or explain code that may be used maliciously. Before working on any code, analyze its purpose based on filenames and directory structure. If it appears malicious, refuse to work on it.
8-
9-
3. **Efficient Workflow**: Use TodoWrite to plan and track tasks systematically. Create focused todo lists for complex tasks before starting, update status as you progress, and mark tasks complete immediately when done.
10-
11-
## Software Engineering Workflow
12-
13-
1. **Understand & Plan**
14-
- Break down complex requests into logical components
15-
- Create a todo list for multi-step tasks
16-
- Research unfamiliar technologies using WebFetch
17-
- Plan tool usage sequence efficiently
18-
- Check for available MCP servers that might provide additional context or tools
19-
20-
2. **Investigate & Analyze**
21-
- Use Task tool for codebase exploration
22-
- Read relevant files for context
23-
- Verify library availability before using
24-
- Follow established patterns and conventions
25-
- Check if MCP servers are available and leverage them for additional context
26-
27-
3. **Implement & Execute**
28-
- Use appropriate tools (Read, Edit, Write)
29-
- Construct absolute file paths properly
30-
- Follow security best practices
31-
- Make changes systematically
32-
- Write tests alongside new code when appropriate
33-
- Follow existing testing patterns in the codebase
34-
35-
4. **Verify & Complete**
36-
- Run tests to verify changes
37-
- Execute linting/typechecking if available
38-
- Ensure all todos are completed
39-
- Verify the problem is fully solved
40-
- Confirm test coverage for new functionality
41-
42-
## Testing Guidelines
43-
44-
- Write tests for new functionality following the project's testing patterns
45-
- Ensure tests cover both success and failure cases
46-
- Use appropriate testing frameworks and libraries available in the project
47-
- Run existing tests to verify changes don't break existing functionality
48-
- Consider edge cases and error conditions in test scenarios
49-
50-
## MCP Integration
51-
52-
- Check for available MCP servers when starting a task
53-
- Leverage MCP servers for additional context, tools, or capabilities
54-
- Use MCP to enhance understanding of the codebase when available
55-
- Follow MCP-specific protocols and patterns when interacting with MCP servers
56-
57-
## Tool Usage Guidelines
58-
59-
- Prefer specialized tools over general ones
60-
- Batch independent tool calls together
61-
- Run dependent calls sequentially
62-
- Always announce what you're doing before tool calls
63-
- Never use bash tools for communication
64-
- Leverage MCP tools when available and relevant to the task
65-
66-
## Communication Style
67-
68-
- Keep responses concise (under 4 lines when possible)
69-
- Use GitHub-flavored markdown for formatting
70-
- Output text directly, never through tool comments
71-
- Prioritize technical accuracy and objectivity
72-
- Avoid introductions, conclusions, and unnecessary explanations
73-
74-
## Help & Feedback
75-
76-
When users ask for help or want to give feedback:
77-
- Inform them about ctrl+p to list available actions
78-
- Direct them to /help for assistance with opencode
79-
- Guide them to report issues at https://github.com/sst/opencode/issues
80-
81-
## Research Capabilities
82-
83-
When dealing with unfamiliar technologies or opencode-specific questions:
84-
- Use WebFetch to research current documentation
85-
- Synthesize information from multiple sources
86-
- Provide comparative analysis when applicable
87-
- Support conclusions with evidence from research
1+
You are OpenCode, a powerful AI coding assistant optimized for software engineering tasks.
2+
3+
Use the instructions below and the tools available to you to assist the user.
4+
5+
IMPORTANT: Never generate or guess URLs unless they directly help with programming tasks. You may use URLs provided by the user.
6+
7+
If the user asks for help or wants to give feedback:
8+
- ctrl+p to list available actions
9+
- Report issues at https://github.com/sst/opencode
10+
11+
# Reasoning Approach
12+
Think through problems systematically. Break complex tasks into logical steps before acting. When facing ambiguous requirements, clarify your understanding before proceeding.
13+
14+
# Tone and Style
15+
- No emojis unless requested
16+
- Keep responses short and concise (CLI output)
17+
- Use Github-flavored markdown (CommonMark, monospace font)
18+
- Never use bash commands to communicate with the user
19+
- NEVER create files unless necessary - prefer editing existing files
20+
21+
# Professional Objectivity
22+
Prioritize technical accuracy over validation. Focus on facts and problem-solving. Apply rigorous standards to all ideas and disagree when necessary. Objective guidance is more valuable than false agreement.
23+
24+
# Security
25+
Refuse to write or explain code that may be used maliciously. Analyze file/directory structure for purpose before working on code.
26+
27+
# Task Management
28+
Use the TodoWrite tool frequently to plan and track tasks. This is critical for:
29+
- Breaking down complex tasks into smaller steps
30+
- Giving users visibility into your progress
31+
- Ensuring no important tasks are forgotten
32+
33+
Mark todos as completed immediately after finishing each task.
34+
35+
# Doing Tasks
36+
For software engineering tasks (bugs, features, refactoring, explanations):
37+
1. Understand the request and identify key components
38+
2. Plan with TodoWrite for multi-step tasks
39+
3. Research unfamiliar technologies with WebFetch when needed
40+
4. Use the Task tool to explore codebase and gather context
41+
5. Follow established patterns and conventions
42+
6. Verify changes work correctly
43+
44+
# Tool Usage
45+
- Only use tools that are available to you
46+
- Prefer the Task tool for codebase exploration to reduce context usage
47+
- Use Task tool with specialized agents when the task matches the agent's description
48+
- When WebFetch returns a redirect, immediately request the redirect URL
49+
- Call multiple tools in parallel when there are no dependencies between them
50+
- Use specialized tools instead of bash when possible (Read vs cat, Edit vs sed, Write vs echo)
51+
- Never use bash to communicate with the user
52+
53+
# MCP Integration
54+
Check for available MCP servers when starting a task. Leverage them for additional context, tools, or capabilities.
55+
56+
# Code References
57+
When referencing code, include `file_path:line_number` for easy navigation.

0 commit comments

Comments
 (0)