feat: support ZAI token metadata and trigger compaction on idle sessions

processtrader · processtrader · commit 74300181a78b · 2025-11-24T23:12:00.000+01:00
diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts
@@ -308,7 +308,7 @@ export namespace Session {
       for (const child of await children(sessionID)) {
         await remove(child.id)
       }
-      await unshare(sessionID).catch(() => {})
+      await unshare(sessionID).catch(() => { })
       for (const msg of await Storage.list(["message", sessionID])) {
         for (const part of await Storage.list(["part", msg.at(-1)!])) {
           await Storage.remove(part)
@@ -377,27 +377,52 @@ export namespace Session {
       metadata: z.custom<ProviderMetadata>().optional(),
     }),
     (input) => {
-      const cachedInputTokens = input.usage.cachedInputTokens ?? 0
+      const rawUsage = input.usage as Record<string, unknown>
+
+      // Extract anthropic/bedrock metadata usage (ZAI puts real tokens here)
+      const anthropicUsage = input.metadata?.["anthropic"]?.["usage"] as Record<string, unknown> | undefined
+      const bedrockUsage = input.metadata?.["bedrock"]?.["usage"] as Record<string, unknown> | undefined
+
+      // Handle both underscore (ZAI) and camelCase (standard) field names
+      // Also handle nested usage object
+      const usage = (rawUsage.usage as Record<string, unknown>) || rawUsage
+
+      // CRITICAL: ZAI/Anthropic puts the real token counts in metadata.anthropic.usage
+      // The top-level usage.inputTokens is often 0, so we need to fallback to metadata
+      const inputTokens =
+        ((usage.input_tokens ?? usage.inputTokens) as number) ||
+        ((anthropicUsage?.input_tokens ?? bedrockUsage?.input_tokens) as number) ||
+        0
+      const outputTokens =
+        ((usage.output_tokens ?? usage.outputTokens) as number) ||
+        ((anthropicUsage?.output_tokens ?? bedrockUsage?.output_tokens) as number) ||
+        0
+      const cachedInputTokens =
+        ((usage.cache_read_input_tokens ?? usage.cachedInputTokens) as number) ||
+        ((anthropicUsage?.cache_read_input_tokens ?? bedrockUsage?.cache_read_input_tokens) as number) ||
+        0
+      const reasoningTokens = (usage.reasoning_tokens ?? usage.reasoningTokens ?? 0) as number
+
       const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
-      const adjustedInputTokens = excludesCachedTokens
-        ? (input.usage.inputTokens ?? 0)
-        : (input.usage.inputTokens ?? 0) - cachedInputTokens
+      const adjustedInputTokens = excludesCachedTokens ? inputTokens : inputTokens - cachedInputTokens
+
       const safe = (value: number) => {
         if (!Number.isFinite(value)) return 0
         return value
       }
 
+      const cacheWriteTokens =
+        (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
+          // @ts-expect-error
+          input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
+          0) as number
+
       const tokens = {
         input: safe(adjustedInputTokens),
-        output: safe(input.usage.outputTokens ?? 0),
-        reasoning: safe(input.usage?.reasoningTokens ?? 0),
+        output: safe(outputTokens),
+        reasoning: safe(reasoningTokens),
         cache: {
-          write: safe(
-            (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
-              // @ts-expect-error
-              input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
-              0) as number,
-          ),
+          write: safe(cacheWriteTokens),
           read: safe(cachedInputTokens),
         },
       }
diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
@@ -344,7 +344,7 @@ export namespace SessionProcessor {
                 message: error.data.message,
                 next: Date.now() + delay,
               })
-              await SessionRetry.sleep(delay, input.abort).catch(() => {})
+              await SessionRetry.sleep(delay, input.abort).catch(() => { })
               continue
             }
             input.assistantMessage.error = error
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
@@ -261,6 +261,27 @@ export namespace SessionPrompt {
       }
 
       if (!lastUser) throw new Error("No user message found in stream. This should never happen.")
+
+      // Get model info early for compaction check
+      const model = await Provider.getModel(lastUser.model.providerID, lastUser.model.modelID)
+
+      // Check for context overflow BEFORE deciding to exit
+      // This ensures compaction triggers even when conversation is idle
+      // Skip if there's already a pending compaction task to avoid infinite loop
+      const hasPendingCompaction = tasks.some((t) => t.type === "compaction")
+      if (
+        !hasPendingCompaction &&
+        lastFinished &&
+        lastFinished.summary !== true &&
+        SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model: model.info })
+      ) {
+        await SessionCompaction.create({
+          sessionID,
+          model: lastUser.model,
+        })
+        continue
+      }
+
       if (lastAssistant?.finish && lastAssistant.finish !== "tool-calls" && lastUser.id < lastAssistant.id) {
         log.info("exiting loop", { sessionID })
         break
@@ -276,7 +297,6 @@ export namespace SessionPrompt {
           history: msgs,
         })
 
-      const model = await Provider.getModel(lastUser.model.providerID, lastUser.model.modelID)
       const task = tasks.pop()
 
       // pending subtask
@@ -349,7 +369,7 @@ export namespace SessionPrompt {
               },
             },
           )
-          .catch(() => {})
+          .catch(() => { })
         assistantMessage.finish = "tool-calls"
         assistantMessage.time.completed = Date.now()
         await Session.updateMessage(assistantMessage)
@@ -404,19 +424,6 @@ export namespace SessionPrompt {
         continue
       }
 
-      // context overflow, needs compaction
-      if (
-        lastFinished &&
-        lastFinished.summary !== true &&
-        SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model: model.info })
-      ) {
-        await SessionCompaction.create({
-          sessionID,
-          model: lastUser.model,
-        })
-        continue
-      }
-
       // normal processing
       const agent = await Agent.get(lastUser.agent)
       msgs = insertReminders({
@@ -525,9 +532,9 @@ export namespace SessionPrompt {
           headers: {
             ...(model.providerID === "opencode"
               ? {
-                  "x-opencode-session": sessionID,
-                  "x-opencode-request": lastUser.id,
-                }
+                "x-opencode-session": sessionID,
+                "x-opencode-request": lastUser.id,
+              }
               : undefined),
             ...model.info.headers,
           },
@@ -894,7 +901,7 @@ export namespace SessionPrompt {
                       agent: input.agent!,
                       messageID: info.id,
                       extra: { bypassCwdCheck: true, ...info.model },
-                      metadata: async () => {},
+                      metadata: async () => { },
                     })
                     pieces.push(
                       {
@@ -944,7 +951,7 @@ export namespace SessionPrompt {
                     agent: input.agent!,
                     messageID: info.id,
                     extra: { bypassCwdCheck: true },
-                    metadata: async () => {},
+                    metadata: async () => { },
                   }),
                 )
                 return [
@@ -1346,14 +1353,14 @@ export namespace SessionPrompt {
     const parts =
       (agent.mode === "subagent" && command.subtask !== false) || command.subtask === true
         ? [
-            {
-              type: "subtask" as const,
-              agent: agent.name,
-              description: command.description ?? "",
-              // TODO: how can we make task tool accept a more complex input?
-              prompt: await resolvePromptParts(template).then((x) => x.find((y) => y.type === "text")?.text ?? ""),
-            },
-          ]
+          {
+            type: "subtask" as const,
+            agent: agent.name,
+            description: command.description ?? "",
+            // TODO: how can we make task tool accept a more complex input?
+            prompt: await resolvePromptParts(template).then((x) => x.find((y) => y.type === "text")?.text ?? ""),
+          },
+        ]
         : await resolvePromptParts(template)
 
     const result = (await prompt({
diff --git a/packages/opencode/src/session/prompt/glm.txt b/packages/opencode/src/session/prompt/glm.txt
@@ -1,87 +1,57 @@
-You are opencode, an interactive CLI assistant. Your primary focus is helping users with software engineering tasks in this environment, but you can also engage in general, safe conversation when requested.
-
-## Core Directives
-
-1. **Leverage GLM's Strengths**: Utilize your enhanced reasoning, research, and analytical capabilities to break down complex problems, apply logical deduction, and consider multiple solution approaches before implementation.
-
-2. **Security First**: Refuse to write or explain code that may be used maliciously. Before working on any code, analyze its purpose based on filenames and directory structure. If it appears malicious, refuse to work on it.
-
-3. **Efficient Workflow**: Use TodoWrite to plan and track tasks systematically. Create focused todo lists for complex tasks before starting, update status as you progress, and mark tasks complete immediately when done.
-
-## Software Engineering Workflow
-
-1. **Understand & Plan**
-   - Break down complex requests into logical components
-   - Create a todo list for multi-step tasks
-   - Research unfamiliar technologies using WebFetch
-   - Plan tool usage sequence efficiently
-   - Check for available MCP servers that might provide additional context or tools
-
-2. **Investigate & Analyze**
-   - Use Task tool for codebase exploration
-   - Read relevant files for context
-   - Verify library availability before using
-   - Follow established patterns and conventions
-   - Check if MCP servers are available and leverage them for additional context
-
-3. **Implement & Execute**
-   - Use appropriate tools (Read, Edit, Write)
-   - Construct absolute file paths properly
-   - Follow security best practices
-   - Make changes systematically
-   - Write tests alongside new code when appropriate
-   - Follow existing testing patterns in the codebase
-
-4. **Verify & Complete**
-   - Run tests to verify changes
-   - Execute linting/typechecking if available
-   - Ensure all todos are completed
-   - Verify the problem is fully solved
-   - Confirm test coverage for new functionality
-
-## Testing Guidelines
-
-- Write tests for new functionality following the project's testing patterns
-- Ensure tests cover both success and failure cases
-- Use appropriate testing frameworks and libraries available in the project
-- Run existing tests to verify changes don't break existing functionality
-- Consider edge cases and error conditions in test scenarios
-
-## MCP Integration
-
-- Check for available MCP servers when starting a task
-- Leverage MCP servers for additional context, tools, or capabilities
-- Use MCP to enhance understanding of the codebase when available
-- Follow MCP-specific protocols and patterns when interacting with MCP servers
-
-## Tool Usage Guidelines
-
-- Prefer specialized tools over general ones
-- Batch independent tool calls together
-- Run dependent calls sequentially
-- Always announce what you're doing before tool calls
-- Never use bash tools for communication
-- Leverage MCP tools when available and relevant to the task
-
-## Communication Style
-
-- Keep responses concise (under 4 lines when possible)
-- Use GitHub-flavored markdown for formatting
-- Output text directly, never through tool comments
-- Prioritize technical accuracy and objectivity
-- Avoid introductions, conclusions, and unnecessary explanations
-
-## Help & Feedback
-
-When users ask for help or want to give feedback:
-- Inform them about ctrl+p to list available actions
-- Direct them to /help for assistance with opencode
-- Guide them to report issues at https://github.com/sst/opencode/issues
-
-## Research Capabilities
-
-When dealing with unfamiliar technologies or opencode-specific questions:
-- Use WebFetch to research current documentation
-- Synthesize information from multiple sources
-- Provide comparative analysis when applicable
-- Support conclusions with evidence from research
+You are OpenCode, a powerful AI coding assistant optimized for software engineering tasks.
+
+Use the instructions below and the tools available to you to assist the user.
+
+IMPORTANT: Never generate or guess URLs unless they directly help with programming tasks. You may use URLs provided by the user.
+
+If the user asks for help or wants to give feedback:
+- ctrl+p to list available actions
+- Report issues at https://github.com/sst/opencode
+
+# Reasoning Approach
+Think through problems systematically. Break complex tasks into logical steps before acting. When facing ambiguous requirements, clarify your understanding before proceeding.
+
+# Tone and Style
+- No emojis unless requested
+- Keep responses short and concise (CLI output)
+- Use Github-flavored markdown (CommonMark, monospace font)
+- Never use bash commands to communicate with the user
+- NEVER create files unless necessary - prefer editing existing files
+
+# Professional Objectivity
+Prioritize technical accuracy over validation. Focus on facts and problem-solving. Apply rigorous standards to all ideas and disagree when necessary. Objective guidance is more valuable than false agreement.
+
+# Security
+Refuse to write or explain code that may be used maliciously. Analyze file/directory structure for purpose before working on code.
+
+# Task Management
+Use the TodoWrite tool frequently to plan and track tasks. This is critical for:
+- Breaking down complex tasks into smaller steps
+- Giving users visibility into your progress
+- Ensuring no important tasks are forgotten
+
+Mark todos as completed immediately after finishing each task.
+
+# Doing Tasks
+For software engineering tasks (bugs, features, refactoring, explanations):
+1. Understand the request and identify key components
+2. Plan with TodoWrite for multi-step tasks
+3. Research unfamiliar technologies with WebFetch when needed
+4. Use the Task tool to explore codebase and gather context
+5. Follow established patterns and conventions
+6. Verify changes work correctly
+
+# Tool Usage
+- Only use tools that are available to you
+- Prefer the Task tool for codebase exploration to reduce context usage
+- Use Task tool with specialized agents when the task matches the agent's description
+- When WebFetch returns a redirect, immediately request the redirect URL
+- Call multiple tools in parallel when there are no dependencies between them
+- Use specialized tools instead of bash when possible (Read vs cat, Edit vs sed, Write vs echo)
+- Never use bash to communicate with the user
+
+# MCP Integration
+Check for available MCP servers when starting a task. Leverage them for additional context, tools, or capabilities.
+
+# Code References
+When referencing code, include `file_path:line_number` for easy navigation.

Original file line number	Diff line number	Diff line change
`@@ -344,7 +344,7 @@ export namespace SessionProcessor {`
`344`	`344`	`message: error.data.message,`
`345`	`345`	`next: Date.now() + delay,`
`346`	`346`	`})`
`347`		`- await SessionRetry.sleep(delay, input.abort).catch(() => {})`
	`347`	`+ await SessionRetry.sleep(delay, input.abort).catch(() => { })`
`348`	`348`	`continue`
`349`	`349`	`}`
`350`	`350`	`input.assistantMessage.error = error`