Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/__tests__/errors.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ describe("classifyError", () => {
expect(isExtraUsageRequiredError("extra usage is required for 1m context")).toBe(true)
})

it("detects 'out of extra usage' variant", () => {
expect(isExtraUsageRequiredError(
"Claude Code returned an error result: API Error: 400 You're out of extra usage."
)).toBe(true)
})

it("returns false for unrelated errors", () => {
expect(isExtraUsageRequiredError("rate limit exceeded")).toBe(false)
expect(isExtraUsageRequiredError("authentication failed")).toBe(false)
Expand Down
13 changes: 13 additions & 0 deletions src/__tests__/proxy-env-stripping.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,19 @@ describe("SDK model pin injection (fixes #419)", () => {
expect(capturedQueryOptions.env.ANTHROPIC_DEFAULT_HAIKU_MODEL).toBe("claude-haiku-4-5")
})

it("explicit claude-opus-4-6 requests pin the SDK env to 4.6", async () => {
const app = createTestApp()
await post(app, { ...BASIC_REQUEST, model: "claude-opus-4-6" })
expect(capturedQueryOptions.env.ANTHROPIC_DEFAULT_OPUS_MODEL).toBe("claude-opus-4-6")
})

it("explicit claude-opus-4-7 requests beat inherited env pins", async () => {
process.env.ANTHROPIC_DEFAULT_OPUS_MODEL = "claude-opus-4-6"
const app = createTestApp()
await post(app, { ...BASIC_REQUEST, model: "claude-opus-4-7" })
expect(capturedQueryOptions.env.ANTHROPIC_DEFAULT_OPUS_MODEL).toBe("claude-opus-4-7")
})

it("shell ANTHROPIC_DEFAULT_* values win over Meridian's pins", async () => {
process.env.ANTHROPIC_DEFAULT_OPUS_MODEL = "claude-opus-4-1-20250805"
process.env.ANTHROPIC_DEFAULT_SONNET_MODEL = "claude-sonnet-4-20250514"
Expand Down
76 changes: 70 additions & 6 deletions src/__tests__/proxy-extra-usage-fallback.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ import {
} from "./helpers"

// Track query calls to verify retry behavior
let queryCalls: Array<{ model: string; callIndex: number }> = []
let queryCalls: Array<{ model: string; callIndex: number; resume?: string }> = []
let queryCallCount = 0

// Control what the mock does
let mockBehavior: "extra_usage_then_succeed" | "always_extra_usage" | "succeed" | "error_assistant_then_ratelimit" = "succeed"
let mockBehavior: "extra_usage_then_succeed" | "always_extra_usage" | "out_of_extra_usage_then_succeed" | "resume_extra_usage_then_succeed" | "succeed" | "error_assistant_then_ratelimit" = "succeed"

const EXTRA_USAGE_ERROR = "Claude Code returned an error result: API Error: Extra usage is required for 1M context · enable extra usage at claude.ai/settings/usage, or use --model to switch"
const OUT_OF_EXTRA_USAGE_ERROR = "Claude Code returned an error result: API Error: 400 You're out of extra usage."

// Pass through the real resolveSdkModelDefaults — mock.module is process-global
// in Bun, and stubbing it as () => ({}) leaks to proxy-env-stripping.test.ts.
Expand All @@ -48,10 +49,10 @@ mock.module("../proxy/models", () => ({
mock.module("@anthropic-ai/claude-agent-sdk", () => ({
query: (opts: any) => {
queryCallCount++
const callIndex = queryCallCount
const model = opts.options?.model || "sonnet"
queryCalls.push({ model, callIndex })
const isStreaming = opts.options?.includePartialMessages === true
const callIndex = queryCallCount
const model = opts.options?.model || "sonnet"
queryCalls.push({ model, callIndex, resume: opts.options?.resume })
const isStreaming = opts.options?.includePartialMessages === true

return (async function* () {
if (mockBehavior === "always_extra_usage") {
Expand All @@ -62,6 +63,18 @@ mock.module("@anthropic-ai/claude-agent-sdk", () => ({
throw new Error(EXTRA_USAGE_ERROR)
}

if (mockBehavior === "out_of_extra_usage_then_succeed" && callIndex === 1) {
throw new Error(OUT_OF_EXTRA_USAGE_ERROR)
}

if (
mockBehavior === "resume_extra_usage_then_succeed" &&
opts.options?.resume === "sdk-session-1" &&
(model === "sonnet[1m]" || model === "sonnet")
) {
throw new Error(OUT_OF_EXTRA_USAGE_ERROR)
}

// Simulates real SDK behaviour: emits an error assistant event first,
// then throws a rate_limit error (which is what the SDK does when the
// rate_limit_event with status:"rejected" is received).
Expand Down Expand Up @@ -179,6 +192,22 @@ describe("Extra usage required fallback", () => {
// should eventually propagate since the base model also fails
expect(response.status).toBe(500)
})

it("falls back on the short 'out of extra usage' error", async () => {
mockBehavior = "out_of_extra_usage_then_succeed"
const app = createTestApp()

const response = await post(app, {
model: "sonnet",
stream: false,
messages: [{ role: "user", content: "hello" }],
})

expect(response.status).toBe(200)
expect(queryCalls.length).toBe(2)
expect(queryCalls[0]!.model).toBe("sonnet[1m]")
expect(queryCalls[1]!.model).toBe("sonnet")
})
})

describe("Streaming", () => {
Expand Down Expand Up @@ -214,6 +243,41 @@ describe("Extra usage required fallback", () => {
const errorEvent = events.find((e) => e.event === "error")
expect(errorEvent).toBeDefined()
})

it("retries resumed base model as a fresh session after extra usage", async () => {
mockBehavior = "resume_extra_usage_then_succeed"
const app = createTestApp()

await post(app, {
model: "sonnet",
stream: false,
messages: [{ role: "user", content: "hello" }],
}, { "x-opencode-session": "sess-1" })

const response = await post(app, {
model: "sonnet",
stream: true,
messages: [
{ role: "user", content: "hello" },
{ role: "assistant", content: [
{ type: "text", text: "Running task." },
{ type: "tool_use", id: "toolu_1", name: "write", input: { path: "a.txt" } },
] },
{ role: "user", content: [
{ type: "tool_result", tool_use_id: "toolu_1", content: "done" },
] },
],
}, { "x-opencode-session": "sess-1" })

expect(response.status).toBe(200)
const text = await response.text()
expect(text).toContain("event: message_start")
expect(queryCalls.slice(-3)).toEqual([
{ model: "sonnet[1m]", callIndex: 2, resume: "sdk-session-1" },
{ model: "sonnet", callIndex: 3, resume: "sdk-session-1" },
{ model: "sonnet", callIndex: 4, resume: undefined },
])
})
})

describe("No backoff needed", () => {
Expand Down
9 changes: 9 additions & 0 deletions src/__tests__/query.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ function makeContext(overrides: Partial<QueryContext> = {}): QueryContext {
stream: false,
sdkAgents: {},
cleanEnv: {},
envOverrides: undefined,
hasDeferredTools: false,
isUndo: false,
blockedTools: BLOCKED_BUILTIN_TOOLS,
Expand Down Expand Up @@ -45,6 +46,14 @@ describe("buildQueryOptions", () => {
expect((result.options as any).includePartialMessages).toBeUndefined()
})

it("applies envOverrides after inherited env", () => {
const result = buildQueryOptions(makeContext({
cleanEnv: { ANTHROPIC_DEFAULT_OPUS_MODEL: "claude-opus-4-6" },
envOverrides: { ANTHROPIC_DEFAULT_OPUS_MODEL: "claude-opus-4-7" },
}))
expect(result.options.env?.ANTHROPIC_DEFAULT_OPUS_MODEL).toBe("claude-opus-4-7")
})

it("sets includePartialMessages for streaming", () => {
const result = buildQueryOptions(makeContext({ stream: true }))
expect((result.options as any).includePartialMessages).toBe(true)
Expand Down
2 changes: 1 addition & 1 deletion src/proxy/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ export function isRateLimitError(errMsg: string): boolean {
*/
export function isExtraUsageRequiredError(errMsg: string): boolean {
const lower = errMsg.toLowerCase()
return lower.includes("extra usage") && lower.includes("1m")
return (lower.includes("extra usage") && lower.includes("1m")) || lower.includes("out of extra usage")
}

/**
Expand Down
3 changes: 3 additions & 0 deletions src/proxy/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ export interface QueryContext {
passthroughMcp?: ReturnType<typeof createPassthroughMcpServer>
/** Cleaned environment variables (API keys stripped) */
cleanEnv: Record<string, string | undefined>
/** Per-request env overrides that must win over inherited env */
envOverrides?: Record<string, string | undefined>
/** Whether any passthrough tools use deferred loading */
hasDeferredTools: boolean
/** SDK session ID for resume (if continuing a session) */
Expand Down Expand Up @@ -299,6 +301,7 @@ export function buildQueryOptions(ctx: QueryContext): BuildQueryResult {
// "--dangerously-skip-permissions cannot be used with root/sudo"
// See: https://github.com/rynfar/meridian/issues/256
...(process.getuid?.() === 0 ? { IS_SANDBOX: "1" } : {}),
...ctx.envOverrides,
},
...(Object.keys(sdkAgents).length > 0 ? { agents: sdkAgents } : {}),
...(resumeSessionId ? { resume: resumeSessionId } : {}),
Expand Down
74 changes: 69 additions & 5 deletions src/proxy/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,11 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
// Logged for observability; fork-*/subagent-* values also skip fingerprint cache (see below).
// Examples: "main", "fork-memory-extract", "subagent-scout".
const requestSource = c.req.header("x-meridian-source")?.slice(0, 64) || undefined
let model = mapModelToClaudeModel(body.model || "sonnet", authStatus?.subscriptionType, agentMode)
const requestedModel = typeof body.model === "string" ? body.model : "sonnet"
let model = mapModelToClaudeModel(requestedModel, authStatus?.subscriptionType, agentMode)
const envOverrides = requestedModel.startsWith("claude-opus-")
? { ANTHROPIC_DEFAULT_OPUS_MODEL: requestedModel }
: undefined
// workingDirectory = SDK subprocess cwd (must exist on the proxy host).
// clientWorkingDirectory = the client's local path (may not exist here);
// used for per-project fingerprint bucketing and a system-prompt hint
Expand Down Expand Up @@ -983,6 +987,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
await ensureFreshToken().catch(() => { /* reactive path handles */ })

let tokenRefreshed = false
let didFreshBaseRetry = false
while (true) {
// Track whether response content was yielded.
// The SDK emits metadata (session_id etc.) before the API call;
Expand All @@ -991,7 +996,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
try {
for await (const event of query(buildQueryOptions({
prompt: makePrompt(), model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable,
passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools,
passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools,
resumeSessionId, isUndo, undoRollbackUuid, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr,
effort, thinking, taskBudget, betas, settingSources,
codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined,
Expand Down Expand Up @@ -1038,7 +1043,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
yield* query(buildQueryOptions({
prompt: buildFreshPrompt(allMessages, sanitizeOpts),
model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable,
passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools,
passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools,
resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr,
effort, thinking, taskBudget, betas, settingSources,
codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined,
Expand Down Expand Up @@ -1072,6 +1077,35 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
continue
}

if (isExtraUsageRequiredError(errMsg) && resumeSessionId && !didFreshBaseRetry) {
didFreshBaseRetry = true
claudeLog("upstream.session_fallback", {
mode: "non_stream",
model,
reason: "extra_usage_required_resume",
})
console.error(`[PROXY] ${requestMeta.requestId} extra usage persisted on resumed ${model}, retrying as fresh session`)
evictSession(profileSessionId, profileScopedCwd, allMessages)
sdkUuidMap.length = 0
for (let i = 0; i < allMessages.length; i++) sdkUuidMap.push(null)
yield* query(buildQueryOptions({
prompt: buildFreshPrompt(allMessages, sanitizeOpts),
model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable,
passthrough, stream: false, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools,
resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr,
effort, thinking, taskBudget, betas, settingSources,
codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined,
memory: sdkFeatures.memory, dreaming: sdkFeatures.dreaming, sharedMemory: sdkFeatures.sharedMemory,
maxBudgetUsd: sdkFeatures.maxBudgetUsd, fallbackModel: sdkFeatures.fallbackModel,
sdkDebug: sdkFeatures.sdkDebug,
additionalDirectories: sdkFeatures.additionalDirectories
? sdkFeatures.additionalDirectories.split(",").map(d => d.trim()).filter(Boolean)
: undefined,
advisorModel,
}))
return
}

// Expired OAuth token: refresh once and retry
if (isExpiredTokenError(errMsg) && !tokenRefreshed) {
tokenRefreshed = true
Expand Down Expand Up @@ -1452,6 +1486,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
await ensureFreshToken().catch(() => { /* reactive path handles */ })

let tokenRefreshed = false
let didFreshBaseRetry = false

while (true) {
// Track whether client-visible SSE events were yielded.
Expand All @@ -1462,7 +1497,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
try {
for await (const event of query(buildQueryOptions({
prompt: makePrompt(), model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable,
passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools,
passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools,
resumeSessionId, isUndo, undoRollbackUuid, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr,
effort, thinking, taskBudget, betas, settingSources,
codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined,
Expand Down Expand Up @@ -1504,7 +1539,7 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
yield* query(buildQueryOptions({
prompt: buildFreshPrompt(allMessages, sanitizeOpts),
model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable,
passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, hasDeferredTools,
passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools,
resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr,
effort, thinking, taskBudget, betas, settingSources,
codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined,
Expand Down Expand Up @@ -1534,6 +1569,35 @@ export function createProxyServer(config: Partial<ProxyConfig> = {}): ProxyServe
continue
}

if (isExtraUsageRequiredError(errMsg) && resumeSessionId && !didFreshBaseRetry) {
didFreshBaseRetry = true
claudeLog("upstream.session_fallback", {
mode: "stream",
model,
reason: "extra_usage_required_resume",
})
console.error(`[PROXY] ${requestMeta.requestId} extra usage persisted on resumed ${model}, retrying as fresh session`)
evictSession(profileSessionId, profileScopedCwd, allMessages)
sdkUuidMap.length = 0
for (let i = 0; i < allMessages.length; i++) sdkUuidMap.push(null)
yield* query(buildQueryOptions({
prompt: buildFreshPrompt(allMessages, sanitizeOpts),
model, workingDirectory, clientWorkingDirectory, systemContext, claudeExecutable,
passthrough, stream: true, sdkAgents, passthroughMcp, cleanEnv: profileEnv, envOverrides, hasDeferredTools,
resumeSessionId: undefined, isUndo: false, undoRollbackUuid: undefined, sdkHooks, blockedTools: pipelineCtx.blockedTools, incompatibleTools: pipelineCtx.incompatibleTools, mcpServerName: adapter.getMcpServerName(), allowedMcpTools: pipelineCtx.allowedMcpTools, onStderr,
effort, thinking, taskBudget, betas, settingSources,
codeSystemPrompt: sdkFeatures.codeSystemPrompt, clientSystemPrompt: sdkFeatures.clientSystemPrompt === false ? false : undefined,
memory: sdkFeatures.memory, dreaming: sdkFeatures.dreaming, sharedMemory: sdkFeatures.sharedMemory,
maxBudgetUsd: sdkFeatures.maxBudgetUsd, fallbackModel: sdkFeatures.fallbackModel,
sdkDebug: sdkFeatures.sdkDebug,
additionalDirectories: sdkFeatures.additionalDirectories
? sdkFeatures.additionalDirectories.split(",").map(d => d.trim()).filter(Boolean)
: undefined,
advisorModel,
}))
return
}

// Expired OAuth token: refresh once and retry
if (isExpiredTokenError(errMsg) && !tokenRefreshed) {
tokenRefreshed = true
Expand Down
Loading