diff --git a/src/__tests__/errors.test.ts b/src/__tests__/errors.test.ts index 3c8d0080..dee79611 100644 --- a/src/__tests__/errors.test.ts +++ b/src/__tests__/errors.test.ts @@ -193,6 +193,12 @@ describe("classifyError", () => { expect(isExtraUsageRequiredError("extra usage is required for 1m context")).toBe(true) }) + it("detects 'out of extra usage' variant", () => { + expect(isExtraUsageRequiredError( + "Claude Code returned an error result: API Error: 400 You're out of extra usage." + )).toBe(true) + }) + it("returns false for unrelated errors", () => { expect(isExtraUsageRequiredError("rate limit exceeded")).toBe(false) expect(isExtraUsageRequiredError("authentication failed")).toBe(false) diff --git a/src/__tests__/proxy-env-stripping.test.ts b/src/__tests__/proxy-env-stripping.test.ts index 20ef2105..23a9207d 100644 --- a/src/__tests__/proxy-env-stripping.test.ts +++ b/src/__tests__/proxy-env-stripping.test.ts @@ -220,6 +220,19 @@ describe("SDK model pin injection (fixes #419)", () => { expect(capturedQueryOptions.env.ANTHROPIC_DEFAULT_HAIKU_MODEL).toBe("claude-haiku-4-5") }) + it("explicit claude-opus-4-6 requests pin the SDK env to 4.6", async () => { + const app = createTestApp() + await post(app, { ...BASIC_REQUEST, model: "claude-opus-4-6" }) + expect(capturedQueryOptions.env.ANTHROPIC_DEFAULT_OPUS_MODEL).toBe("claude-opus-4-6") + }) + + it("explicit claude-opus-4-7 requests beat inherited env pins", async () => { + process.env.ANTHROPIC_DEFAULT_OPUS_MODEL = "claude-opus-4-6" + const app = createTestApp() + await post(app, { ...BASIC_REQUEST, model: "claude-opus-4-7" }) + expect(capturedQueryOptions.env.ANTHROPIC_DEFAULT_OPUS_MODEL).toBe("claude-opus-4-7") + }) + it("shell ANTHROPIC_DEFAULT_* values win over Meridian's pins", async () => { process.env.ANTHROPIC_DEFAULT_OPUS_MODEL = "claude-opus-4-1-20250805" process.env.ANTHROPIC_DEFAULT_SONNET_MODEL = "claude-sonnet-4-20250514" diff --git a/src/__tests__/proxy-extra-usage-fallback.test.ts b/src/__tests__/proxy-extra-usage-fallback.test.ts index 0a1f0934..f1f39f8b 100644 --- a/src/__tests__/proxy-extra-usage-fallback.test.ts +++ b/src/__tests__/proxy-extra-usage-fallback.test.ts @@ -19,13 +19,14 @@ import { } from "./helpers" // Track query calls to verify retry behavior -let queryCalls: Array<{ model: string; callIndex: number }> = [] +let queryCalls: Array<{ model: string; callIndex: number; resume?: string }> = [] let queryCallCount = 0 // Control what the mock does -let mockBehavior: "extra_usage_then_succeed" | "always_extra_usage" | "succeed" | "error_assistant_then_ratelimit" = "succeed" +let mockBehavior: "extra_usage_then_succeed" | "always_extra_usage" | "out_of_extra_usage_then_succeed" | "resume_extra_usage_then_succeed" | "succeed" | "error_assistant_then_ratelimit" = "succeed" const EXTRA_USAGE_ERROR = "Claude Code returned an error result: API Error: Extra usage is required for 1M context · enable extra usage at claude.ai/settings/usage, or use --model to switch" +const OUT_OF_EXTRA_USAGE_ERROR = "Claude Code returned an error result: API Error: 400 You're out of extra usage." // Pass through the real resolveSdkModelDefaults — mock.module is process-global // in Bun, and stubbing it as () => ({}) leaks to proxy-env-stripping.test.ts. @@ -48,10 +49,10 @@ mock.module("../proxy/models", () => ({ mock.module("@anthropic-ai/claude-agent-sdk", () => ({ query: (opts: any) => { queryCallCount++ - const callIndex = queryCallCount - const model = opts.options?.model || "sonnet" - queryCalls.push({ model, callIndex }) - const isStreaming = opts.options?.includePartialMessages === true + const callIndex = queryCallCount + const model = opts.options?.model || "sonnet" + queryCalls.push({ model, callIndex, resume: opts.options?.resume }) + const isStreaming = opts.options?.includePartialMessages === true return (async function* () { if (mockBehavior === "always_extra_usage") { @@ -62,6 +63,18 @@ mock.module("@anthropic-ai/claude-agent-sdk", () => ({ throw new Error(EXTRA_USAGE_ERROR) } + if (mockBehavior === "out_of_extra_usage_then_succeed" && callIndex === 1) { + throw new Error(OUT_OF_EXTRA_USAGE_ERROR) + } + + if ( + mockBehavior === "resume_extra_usage_then_succeed" && + opts.options?.resume === "sdk-session-1" && + (model === "sonnet[1m]" || model === "sonnet") + ) { + throw new Error(OUT_OF_EXTRA_USAGE_ERROR) + } + // Simulates real SDK behaviour: emits an error assistant event first, // then throws a rate_limit error (which is what the SDK does when the // rate_limit_event with status:"rejected" is received). @@ -179,6 +192,22 @@ describe("Extra usage required fallback", () => { // should eventually propagate since the base model also fails expect(response.status).toBe(500) }) + + it("falls back on the short 'out of extra usage' error", async () => { + mockBehavior = "out_of_extra_usage_then_succeed" + const app = createTestApp() + + const response = await post(app, { + model: "sonnet", + stream: false, + messages: [{ role: "user", content: "hello" }], + }) + + expect(response.status).toBe(200) + expect(queryCalls.length).toBe(2) + expect(queryCalls[0]!.model).toBe("sonnet[1m]") + expect(queryCalls[1]!.model).toBe("sonnet") + }) }) describe("Streaming", () => { @@ -214,6 +243,41 @@ describe("Extra usage required fallback", () => { const errorEvent = events.find((e) => e.event === "error") expect(errorEvent).toBeDefined() }) + + it("retries resumed base model as a fresh session after extra usage", async () => { + mockBehavior = "resume_extra_usage_then_succeed" + const app = createTestApp() + + await post(app, { + model: "sonnet", + stream: false, + messages: [{ role: "user", content: "hello" }], + }, { "x-opencode-session": "sess-1" }) + + const response = await post(app, { + model: "sonnet", + stream: true, + messages: [ + { role: "user", content: "hello" }, + { role: "assistant", content: [ + { type: "text", text: "Running task." }, + { type: "tool_use", id: "toolu_1", name: "write", input: { path: "a.txt" } }, + ] }, + { role: "user", content: [ + { type: "tool_result", tool_use_id: "toolu_1", content: "done" }, + ] }, + ], + }, { "x-opencode-session": "sess-1" }) + + expect(response.status).toBe(200) + const text = await response.text() + expect(text).toContain("event: message_start") + expect(queryCalls.slice(-3)).toEqual([ + { model: "sonnet[1m]", callIndex: 2, resume: "sdk-session-1" }, + { model: "sonnet", callIndex: 3, resume: "sdk-session-1" }, + { model: "sonnet", callIndex: 4, resume: undefined }, + ]) + }) }) describe("No backoff needed", () => { diff --git a/src/__tests__/query.test.ts b/src/__tests__/query.test.ts index c29993b6..a9c1b84d 100644 --- a/src/__tests__/query.test.ts +++ b/src/__tests__/query.test.ts @@ -16,6 +16,7 @@ function makeContext(overrides: Partial = {}): QueryContext { stream: false, sdkAgents: {}, cleanEnv: {}, + envOverrides: undefined, hasDeferredTools: false, isUndo: false, blockedTools: BLOCKED_BUILTIN_TOOLS, @@ -45,6 +46,14 @@ describe("buildQueryOptions", () => { expect((result.options as any).includePartialMessages).toBeUndefined() }) + it("applies envOverrides after inherited env", () => { + const result = buildQueryOptions(makeContext({ + cleanEnv: { ANTHROPIC_DEFAULT_OPUS_MODEL: "claude-opus-4-6" }, + envOverrides: { ANTHROPIC_DEFAULT_OPUS_MODEL: "claude-opus-4-7" }, + })) + expect(result.options.env?.ANTHROPIC_DEFAULT_OPUS_MODEL).toBe("claude-opus-4-7") + }) + it("sets includePartialMessages for streaming", () => { const result = buildQueryOptions(makeContext({ stream: true })) expect((result.options as any).includePartialMessages).toBe(true) diff --git a/src/proxy/errors.ts b/src/proxy/errors.ts index eaf36818..8393090a 100644 --- a/src/proxy/errors.ts +++ b/src/proxy/errors.ts @@ -182,7 +182,7 @@ export function isRateLimitError(errMsg: string): boolean { */ export function isExtraUsageRequiredError(errMsg: string): boolean { const lower = errMsg.toLowerCase() - return lower.includes("extra usage") && lower.includes("1m") + return (lower.includes("extra usage") && lower.includes("1m")) || lower.includes("out of extra usage") } /** diff --git a/src/proxy/query.ts b/src/proxy/query.ts index 80b23fe8..449f1b6d 100644 --- a/src/proxy/query.ts +++ b/src/proxy/query.ts @@ -58,6 +58,8 @@ export interface QueryContext { passthroughMcp?: ReturnType /** Cleaned environment variables (API keys stripped) */ cleanEnv: Record + /** Per-request env overrides that must win over inherited env */ + envOverrides?: Record /** Whether any passthrough tools use deferred loading */ hasDeferredTools: boolean /** SDK session ID for resume (if continuing a session) */ @@ -299,6 +301,7 @@ export function buildQueryOptions(ctx: QueryContext): BuildQueryResult { // "--dangerously-skip-permissions cannot be used with root/sudo" // See: https://github.com/rynfar/meridian/issues/256 ...(process.getuid?.() === 0 ? { IS_SANDBOX: "1" } : {}), + ...ctx.envOverrides, }, ...(Object.keys(sdkAgents).length > 0 ? { agents: sdkAgents } : {}), ...(resumeSessionId ? { resume: resumeSessionId } : {}), diff --git a/src/proxy/server.ts b/src/proxy/server.ts index 049c8059..e95c9f3c 100644 --- a/src/proxy/server.ts +++ b/src/proxy/server.ts @@ -441,7 +441,11 @@ export function createProxyServer(config: Partial = {}): ProxyServe // Logged for observability; fork-*/subagent-* values also skip fingerprint cache (see below). // Examples: "main", "fork-memory-extract", "subagent-scout". const requestSource = c.req.header("x-meridian-source")?.slice(0, 64) || undefined - let model = mapModelToClaudeModel(body.model || "sonnet", authStatus?.subscriptionType, agentMode) + const requestedModel = typeof body.model === "string" ? body.model : "sonnet" + let model = mapModelToClaudeModel(requestedModel, authStatus?.subscriptionType, agentMode) + const envOverrides = requestedModel.startsWith("claude-opus-") + ? { ANTHROPIC_DEFAULT_OPUS_MODEL: requestedModel } + : undefined // workingDirectory = SDK subprocess cwd (must exist on the proxy host). // clientWorkingDirectory = the client's local path (may not exist here); // used for per-project fingerprint bucketing and a system-prompt hint @@ -983,6 +987,7 @@ export function createProxyServer(config: Partial = {}): ProxyServe await ensureFreshToken().catch(() => { /* reactive path handles */ }) let tokenRefreshed = false + let didFreshBaseRetry = false while (true) { // Track whether response content was yielded. // The SDK emits metadata (session_id etc.) before the API call; @@ -991,7 +996,7 @@ export function createProxyServer(config: Partial = {}): ProxyServe try { for await (const event of query(buildQueryOptions({ prompt: makePrompt(), model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable, - passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools, + passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools, resumeSessionId, isUndo, undoRollbackUuid, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr, effort, thinking, taskBudget, betas, settingSources, codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined, @@ -1038,7 +1043,7 @@ export function createProxyServer(config: Partial = {}): ProxyServe yield* query(buildQueryOptions({ prompt: buildFreshPrompt(allMessages, sanitizeOpts), model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable, - passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools, + passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools, resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr, effort, thinking, taskBudget, betas, settingSources, codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined, @@ -1072,6 +1077,35 @@ export function createProxyServer(config: Partial = {}): ProxyServe continue } + if (isExtraUsageRequiredError(errMsg) && resumeSessionId && !didFreshBaseRetry) { + didFreshBaseRetry = true + claudeLog("upstream.session_fallback", { + mode: "non_stream", + model, + reason: "extra_usage_required_resume", + }) + console.error(`[PROXY] ${requestMeta.requestId} extra usage persisted on resumed ${model}, retrying as fresh session`) + evictSession(profileSessionId, profileScopedCwd, allMessages) + sdkUuidMap.length = 0 + for (let i = 0; i < allMessages.length; i++) sdkUuidMap.push(null) + yield* query(buildQueryOptions({ + prompt: buildFreshPrompt(allMessages, sanitizeOpts), + model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable, + passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools, + resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr, + effort, thinking, taskBudget, betas, settingSources, + codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined, + memory: sdkFeatures.memory, dreaming: sdkFeatures.dreaming, sharedMemory: sdkFeatures.sharedMemory, + maxBudgetUsd: sdkFeatures.maxBudgetUsd, fallbackModel: sdkFeatures.fallbackModel, + sdkDebug: sdkFeatures.sdkDebug, + additionalDirectories: sdkFeatures.additionalDirectories + ? sdkFeatures.additionalDirectories.split(",").map(d => d.trim()).filter(Boolean) + : undefined, + advisorModel, + })) + return + } + // Expired OAuth token: refresh once and retry if (isExpiredTokenError(errMsg) && !tokenRefreshed) { tokenRefreshed = true @@ -1452,6 +1486,7 @@ export function createProxyServer(config: Partial = {}): ProxyServe await ensureFreshToken().catch(() => { /* reactive path handles */ }) let tokenRefreshed = false + let didFreshBaseRetry = false while (true) { // Track whether client-visible SSE events were yielded. @@ -1462,7 +1497,7 @@ export function createProxyServer(config: Partial = {}): ProxyServe try { for await (const event of query(buildQueryOptions({ prompt: makePrompt(), model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable, - passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools, + passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools, resumeSessionId, isUndo, undoRollbackUuid, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr, effort, thinking, taskBudget, betas, settingSources, codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined, @@ -1504,7 +1539,7 @@ export function createProxyServer(config: Partial = {}): ProxyServe yield* query(buildQueryOptions({ prompt: buildFreshPrompt(allMessages, sanitizeOpts), model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable, - passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools, + passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools, resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr, effort, thinking, taskBudget, betas, settingSources, codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined, @@ -1534,6 +1569,35 @@ export function createProxyServer(config: Partial = {}): ProxyServe continue } + if (isExtraUsageRequiredError(errMsg) && resumeSessionId && !didFreshBaseRetry) { + didFreshBaseRetry = true + claudeLog("upstream.session_fallback", { + mode: "stream", + model, + reason: "extra_usage_required_resume", + }) + console.error(`[PROXY] ${requestMeta.requestId} extra usage persisted on resumed ${model}, retrying as fresh session`) + evictSession(profileSessionId, profileScopedCwd, allMessages) + sdkUuidMap.length = 0 + for (let i = 0; i < allMessages.length; i++) sdkUuidMap.push(null) + yield* query(buildQueryOptions({ + prompt: buildFreshPrompt(allMessages, sanitizeOpts), + model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable, + passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools, + resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr, + effort, thinking, taskBudget, betas, settingSources, + codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined, + memory: sdkFeatures.memory, dreaming: sdkFeatures.dreaming, sharedMemory: sdkFeatures.sharedMemory, + maxBudgetUsd: sdkFeatures.maxBudgetUsd, fallbackModel: sdkFeatures.fallbackModel, + sdkDebug: sdkFeatures.sdkDebug, + additionalDirectories: sdkFeatures.additionalDirectories + ? sdkFeatures.additionalDirectories.split(",").map(d => d.trim()).filter(Boolean) + : undefined, + advisorModel, + })) + return + } + // Expired OAuth token: refresh once and retry if (isExpiredTokenError(errMsg) && !tokenRefreshed) { tokenRefreshed = true