From 986f7024c247fc329a61a98f7daec2a5e1280d4b Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Mon, 25 May 2026 18:02:13 -0700 Subject: [PATCH 01/12] feat(agents): support OpenAI Agents SDK --- cmd/gomodel/docs/docs.go | 330 ++++++++++++++ docs/dev/agents-sdk-support.md | 260 +++++++++++ docs/dev/claude-agent-sdk-support.md | 293 +++++++++++++ docs/docs.json | 1 + docs/examples/openai-agents-sdk/README.md | 31 ++ .../openai-agents-sdk/javascript_basic.mjs | 26 ++ .../openai-agents-sdk/python_basic.py | 31 ++ .../python_streaming_tool.py | 49 +++ docs/guides/openai-agents-sdk.mdx | 176 ++++++++ docs/openapi.json | 404 ++++++++++++++++++ internal/core/chat_json.go | 15 + internal/core/responses.go | 43 +- internal/core/responses_json.go | 175 ++++++-- internal/core/responses_json_test.go | 94 +++- internal/core/types.go | 3 + internal/providers/responses_adapter.go | 48 +++ internal/providers/responses_adapter_test.go | 79 ++++ internal/providers/responses_input.go | 7 +- internal/server/handlers_test.go | 37 +- .../server/translated_inference_service.go | 3 + 20 files changed, 2041 insertions(+), 64 deletions(-) create mode 100644 docs/dev/agents-sdk-support.md create mode 100644 docs/dev/claude-agent-sdk-support.md create mode 100644 docs/examples/openai-agents-sdk/README.md create mode 100644 docs/examples/openai-agents-sdk/javascript_basic.mjs create mode 100644 docs/examples/openai-agents-sdk/python_basic.py create mode 100644 docs/examples/openai-agents-sdk/python_streaming_tool.py create mode 100644 docs/guides/openai-agents-sdk.mdx diff --git a/cmd/gomodel/docs/docs.go b/cmd/gomodel/docs/docs.go index 10036700..68616f60 100644 --- a/cmd/gomodel/docs/docs.go +++ b/cmd/gomodel/docs/docs.go @@ -2537,6 +2537,223 @@ const docTemplate = `{ ] } }, + "/v1/conversations": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "conversations" + ], + "summary": "Create a conversation", + "parameters": [ + { + "description": "Conversation create request", + "name": "request", + "in": "body", + "schema": { + "$ref": "#/definitions/core.ConversationCreateRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/core.Conversation" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ] + } + }, + "/v1/conversations/{id}": { + "get": { + "produces": [ + "application/json" + ], + "tags": [ + "conversations" + ], + "summary": "Get a conversation", + "parameters": [ + { + "type": "string", + "description": "Conversation ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/core.Conversation" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ] + }, + "post": { + "description": "Replaces the conversation metadata in full.", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "conversations" + ], + "summary": "Update a conversation", + "parameters": [ + { + "type": "string", + "description": "Conversation ID", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "Conversation update request", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/core.ConversationUpdateRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/core.Conversation" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ] + }, + "delete": { + "produces": [ + "application/json" + ], + "tags": [ + "conversations" + ], + "summary": "Delete a conversation", + "parameters": [ + { + "type": "string", + "description": "Conversation ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/core.ConversationDeleteResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/core.OpenAIErrorEnvelope" + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ] + } + }, "/v1/embeddings": { "post": { "consumes": [ @@ -4734,6 +4951,9 @@ const docTemplate = `{ "reasoning": { "$ref": "#/definitions/core.Reasoning" }, + "service_tier": { + "type": "string" + }, "stream": { "type": "boolean" }, @@ -4752,6 +4972,12 @@ const docTemplate = `{ "type": "object", "additionalProperties": {} } + }, + "top_p": { + "type": "number" + }, + "user": { + "type": "string" } } }, @@ -4838,6 +5064,73 @@ const docTemplate = `{ } } }, + "core.Conversation": { + "type": "object", + "properties": { + "created_at": { + "type": "integer" + }, + "id": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "object": { + "description": "\"conversation\"", + "type": "string" + } + } + }, + "core.ConversationCreateRequest": { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, + "core.ConversationDeleteResponse": { + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "object": { + "description": "\"conversation.deleted\"", + "type": "string" + } + } + }, + "core.ConversationUpdateRequest": { + "type": "object", + "properties": { + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, "core.EmbeddingData": { "type": "object", "properties": { @@ -5576,6 +5869,14 @@ const docTemplate = `{ "core.ResponsesRequest": { "type": "object", "properties": { + "context_management": {}, + "conversation": {}, + "include": { + "type": "array", + "items": { + "type": "string" + } + }, "input": { "description": "string or []ResponsesInputElement — see docs for array form" }, @@ -5597,6 +5898,13 @@ const docTemplate = `{ "parallel_tool_calls": { "type": "boolean" }, + "previous_response_id": { + "type": "string" + }, + "prompt": {}, + "prompt_cache_retention": { + "type": "string" + }, "provider": { "description": "Gateway routing hint; stripped before upstream execution.", "type": "string" @@ -5604,6 +5912,15 @@ const docTemplate = `{ "reasoning": { "$ref": "#/definitions/core.Reasoning" }, + "safety_identifier": { + "type": "string" + }, + "service_tier": { + "type": "string" + }, + "store": { + "type": "boolean" + }, "stream": { "type": "boolean" }, @@ -5613,6 +5930,7 @@ const docTemplate = `{ "temperature": { "type": "number" }, + "text": {}, "tool_choice": { "description": "string or object" }, @@ -5622,6 +5940,18 @@ const docTemplate = `{ "type": "object", "additionalProperties": {} } + }, + "top_logprobs": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" } } }, diff --git a/docs/dev/agents-sdk-support.md b/docs/dev/agents-sdk-support.md new file mode 100644 index 00000000..335ffa20 --- /dev/null +++ b/docs/dev/agents-sdk-support.md @@ -0,0 +1,260 @@ +# OpenAI Agents SDK Support + +Status checked: 2026-05-22 + +## Short answer + +GoModel is close to supporting the OpenAI Agents SDK for normal HTTP-based +model calls. + +Basic Agents SDK runs should work when the SDK is pointed at GoModel as an +OpenAI-compatible endpoint and tracing is disabled or configured separately. +GoModel already exposes: + +- `POST /v1/responses` +- `POST /v1/chat/completions` +- Responses streaming over SSE +- Responses lifecycle endpoints: + - `GET /v1/responses/{id}` + - `GET /v1/responses/{id}/input_items` + - `POST /v1/responses/{id}/cancel` + - `DELETE /v1/responses/{id}` + - `POST /v1/responses/input_tokens` + - `POST /v1/responses/compact` + +That is enough for Codex-style Responses clients and likely enough for a simple +Agents SDK `Runner.run(...)` with text and function tools. + +It is not yet safe to market as full Agents SDK support. The SDK uses newer +Responses fields, state-management modes, built-in tools, streaming events, and +optional websocket transport. Some of those are only pass-through today, and +some are not validated against the SDK. + +## What the SDK expects + +The OpenAI Agents SDK uses the Responses API by default for OpenAI models. Its +Responses request path can send fields such as: + +- `previous_response_id` +- `conversation` +- `instructions` +- `model` +- `input` +- `include` +- `tools` +- `prompt` +- `temperature` +- `top_p` +- `truncation` +- `max_output_tokens` +- `tool_choice` +- `parallel_tool_calls` +- `stream` +- `text` +- `store` +- `prompt_cache_retention` +- `reasoning` +- `metadata` +- `context_management` +- SDK `extra_args` / `extra_body` fields + +GoModel preserves unknown top-level Responses fields, so native OpenAI-compatible +providers receive many of these without code changes. The weak spot is the +translated-provider path, where Responses-only fields can leak into Chat +Completions requests or newer Responses input/output item types can lose their +exact shape. + +## Current support assessment + +### Supported now + +- Basic non-streaming Responses calls. +- Basic streaming Responses calls over HTTP/SSE. +- Function tool calls and `function_call_output` items in the + Responses-to-Chat adapter. +- `tools`, `tool_choice`, `parallel_tool_calls`, `temperature`, + `max_output_tokens`, `reasoning`, and `metadata`. +- Native OpenAI-compatible passthrough for extra top-level request fields. +- Stored non-streaming response snapshots for local response retrieval and + `input_items`. +- `responses.input_tokens` and `responses.compact` when the selected provider + exposes native support. +- `/v1/conversations` lifecycle endpoints. +- `store: false` skips GoModel's local response snapshot. +- Unknown Responses input item types round-trip unchanged for native Responses + providers; chat-translated providers now return a clear compatibility error. +- First OpenAI Agents SDK guide and runnable smoke examples. + +### Needs validation + +- Python Agents SDK with `OpenAIResponsesModel`. +- JavaScript Agents SDK with the default Responses provider. +- `Runner.run_streamed(...)` against GoModel SSE streams. +- Function tool loops across multiple SDK turns. +- Handoffs and agents-as-tools, which become tool definitions at the model + boundary. +- Structured outputs through the Responses `text` format field. +- Sessions that replay `result.to_input_list()` and SDK-managed local session + history. +- `OpenAIResponsesCompactionSession` with `responses.compact`. + +### Known or likely gaps + +- No SDK contract test suite in CI. +- `previous_response_id` is only safe when the upstream provider handles it + natively. Chat-translated providers now return a clear compatibility error; + local expansion from GoModel's stored Responses state is still not + implemented. +- The Chat-to-Responses stream converter emits the core events needed for text + and function calls, but the event sequence has not been validated against the + current Agents SDK parsers. +- Websocket Responses transport is unsupported. The SDK can use HTTP/SSE, but + `use_responses_websocket=True` needs a websocket-compatible `/responses` + endpoint. +- Built-in Responses tools such as web search, file search, computer use, and + tool search are only safe when the selected upstream provider natively + supports those tool payloads. +- Prompt-managed flows and deferred tool loading need validation, especially + when the SDK omits `model` because the prompt owns model selection. +- Tracing uploads go to OpenAI by default in the SDK. Users without an OpenAI + Platform key need docs to disable tracing or configure a separate tracing + processor/key. + +## Implementation checklist + +### P0: Prove basic SDK compatibility + +- Done: add `docs/guides/openai-agents-sdk.mdx`. + - Python example using `AsyncOpenAI(base_url="http://localhost:8080/v1", + api_key="$GOMODEL_MASTER_KEY")`. + - Python example using `OpenAIProvider` / `RunConfig`. + - JavaScript example using an OpenAI provider pointed at GoModel. + - Mention that tracing must be disabled or configured with a real OpenAI + Platform key. + - Mention that HTTP/SSE Responses is the supported path; websocket transport + is not supported yet. +- Done: add a small runnable smoke test example under + `docs/examples/openai-agents-sdk/`. + - Text-only `Runner.run`. + - Streaming `Runner.run_streamed`. + - One local function tool. +- Still needed: add CI or manual contract tests that boot GoModel against the existing mock + provider and run the smoke examples. + +### P0: Preserve Responses items exactly + +- Done: change Responses input decoding so unknown item types keep their original raw + JSON shape. +- Keep typed conversion for known item types: + - `message` + - `function_call` + - `function_call_output` +- Partially done: add tests for raw round-trip preservation of newer item types: + - `reasoning` + - `web_search_call` + - `file_search_call` + - `computer_call` + - `mcp_call` + - any item with `provider_data` +- Done: ensure native OpenAI-compatible providers receive those items unchanged. +- Done: ensure Chat-translated providers return a clear error or intentionally strip + unsupported item types instead of sending malformed messages upstream. + +### P0: Respect `store: false` + +- Done: add a typed `Store *bool` field to `core.ResponsesRequest`. +- Done: when `store == false`, do not persist GoModel's local response snapshot by + default. +- Add a config option only if operators need to override this for audit or + debugging. +- Document the behavior in the Responses API guide and Agents SDK guide. + +### P1: Add typed SDK request fields + +Done: add typed fields to `core.ResponsesRequest` for fields the Agents SDK sends +regularly, while still preserving unknown fields: + +- `PreviousResponseID string` +- `Conversation any` +- `Include []string` +- `Prompt any` +- `TopP *float64` +- `Truncation string` +- `Text any` +- `Store *bool` +- `PromptCacheRetention string` +- `ContextManagement any` +- `TopLogprobs *int` +- `User string` +- `ServiceTier string` +- `SafetyIdentifier string` + +Use these fields for cache keys, audit summaries, compatibility decisions, and +provider-specific adaptation where relevant. + +### P1: Make stateful Responses modes explicit + +- Done: implement `/v1/conversations` lifecycle support. +- Done: reject `previous_response_id` and `conversation` on chat-translated + providers with a clear compatibility error. +- Still needed: optionally expand previous stored responses into full input for + chat-translated providers. +- Add tests for: + - `result.to_input_list()` / local session replay + - `previous_response_id` with native OpenAI provider + - `previous_response_id` with a chat-translated provider + - `conversation_id` unsupported behavior until conversations are implemented + +### P1: Validate streaming against the SDK + +- Run Python and JavaScript Agents SDK streaming clients against GoModel. +- Compare GoModel's chat-translated stream with native OpenAI Responses SSE + ordering. +- Add missing stream events if the SDK requires them: + - `response.content_part.added` + - `response.output_text.done` + - `response.content_part.done` + - terminal `response.failed` / `response.incomplete` propagation +- Verify usage appears on the final SDK result for both native and + chat-translated streams. + +### P2: Feature capability gating + +- Add model/provider capability metadata for Responses features: + - function tools + - structured outputs through `text.format` + - multimodal input + - web search + - file search + - computer use + - tool search / deferred tool loading + - response compaction + - response lifecycle retrieval + - conversations + - websocket Responses transport +- Use the metadata to reject unsupported SDK requests early with clear + OpenAI-compatible errors. +- Surface capability notes in `/v1/models` metadata and docs. + +### P2: Subscription and harness compatibility + +- Keep this separate from OpenAI Agents SDK support. +- Document that GoModel's normal path uses gateway credentials plus upstream + provider API keys, not ChatGPT, Copilot, or Claude subscription credentials. +- Treat subscription-backed harness support as a separate compliance and product + investigation before implementation. + +## Suggested public claim + +Until the P0 work is done: + +> GoModel supports the OpenAI-compatible Responses API used by the OpenAI Agents +> SDK for basic HTTP flows, and full Agents SDK compatibility is being +> validated. + +After P0: + +> GoModel supports the OpenAI Agents SDK over HTTP Responses for text, +> streaming, function tools, and SDK-managed local sessions. Provider-native +> features such as hosted tools, conversations, and websocket transport depend +> on the selected upstream provider. diff --git a/docs/dev/claude-agent-sdk-support.md b/docs/dev/claude-agent-sdk-support.md new file mode 100644 index 00000000..98186ad3 --- /dev/null +++ b/docs/dev/claude-agent-sdk-support.md @@ -0,0 +1,293 @@ +# Claude Agent SDK Support + +Status checked: 2026-05-22 + +## Short answer + +GoModel is probably already close to supporting Anthropic's Agent SDK through +Anthropic passthrough. + +The supported path should be: + +```bash +export ANTHROPIC_BASE_URL=http://localhost:8080/p/anthropic +export ANTHROPIC_AUTH_TOKEN=$GOMODEL_MASTER_KEY +``` + +With GoModel configured with its own upstream `ANTHROPIC_API_KEY`, the SDK's +Anthropic Messages calls should flow as: + +```text +Claude Agent SDK -> /p/anthropic/v1/messages -> GoModel -> Anthropic /v1/messages +``` + +This should work because the Agent SDK is built on Claude Code, and Anthropic's +gateway requirements for Claude Code are exactly the native Messages endpoints +GoModel can expose through passthrough: + +- `POST /v1/messages` +- `POST /v1/messages/count_tokens` +- forwarding `anthropic-beta` and `anthropic-version` + +It is not yet safe to market this as full Claude Agent SDK support. GoModel has +not been validated against the current Python and TypeScript SDKs, and the +managed `/v1/messages` route is only a portable subset of Anthropic's native +Messages API. For full SDK compatibility, passthrough should remain the primary +path. + +## What the SDK expects + +The current Claude Agent SDK packages are: + +- Python: `claude-agent-sdk` +- TypeScript: `@anthropic-ai/claude-agent-sdk` + +The SDK runs the same agent loop and tool runtime used by Claude Code. It can +read and edit files, run shell commands, search the web, call MCP tools, use +subagents, apply hooks, and maintain sessions. At the model boundary that means +GoModel should expect normal Claude Code-style Anthropic traffic rather than a +small single-turn client request. + +The gateway-facing requirements are: + +- Anthropic-compatible base URL configured with `ANTHROPIC_BASE_URL`. +- Gateway auth through `ANTHROPIC_AUTH_TOKEN` or equivalent SDK environment. +- Native Messages request and response shapes. +- Native Messages SSE event streams. +- Native `count_tokens` behavior for context budgeting. +- Forwarded `anthropic-beta` and `anthropic-version` headers. +- Preserved Claude Code attribution headers: + - `X-Claude-Code-Session-Id` + - `X-Claude-Code-Agent-Id` + - `X-Claude-Code-Parent-Agent-Id` +- Long-lived requests. The Agent SDK defaults allow long API calls and retries, + and tool loops can run for much longer than a normal chat completion. + +Subscription-backed usage is a separate topic. Anthropic's docs say Agent SDK +and `claude -p` usage on subscription plans will draw from a separate monthly +Agent SDK credit starting 2026-06-15. They also state that third-party products +should use the API-key authentication methods unless previously approved. +GoModel's normal gateway path should therefore stay API-key backed unless there +is a separate compliance and product decision to support subscription-backed +harnesses. + +## Current support assessment + +### Supported now + +- GoModel already has a Claude Code guide using Anthropic passthrough: + `ANTHROPIC_BASE_URL=http://localhost:8080/p/anthropic`. +- Anthropic passthrough is enabled by default. +- `/p/anthropic/v1/...` is normalized to the Anthropic provider's native path. + This should cover `/v1/messages`, `/v1/messages/count_tokens`, and + `/v1/models`. +- Passthrough strips client `Authorization` and `X-Api-Key`, then applies the + server-side upstream Anthropic credential. +- Passthrough forwards normal request headers that the SDK needs, including + `anthropic-beta`, `anthropic-version`, and `X-Claude-Code-*`. +- Passthrough SSE responses are streamed without body translation. +- GoModel classifies `/p/...` as a model interaction route and clears the + per-request write deadline, so long streams are not constrained by the + server-wide 30 second write timeout. +- The managed Anthropic Messages ingress exists at: + - `POST /v1/messages` + - `POST /v1/messages/count_tokens` +- The managed route supports text, images, custom tools, `tool_choice`, basic + thinking output, Anthropic-style non-streaming responses, and Anthropic-style + SSE conversion. + +### Needs validation + +- Python SDK `query(...)` pointed at GoModel passthrough. +- Python SDK `ClaudeSDKClient` pointed at GoModel passthrough. +- TypeScript SDK `query(...)` pointed at GoModel passthrough. +- Text-only agent runs. +- Streaming agent runs. +- Built-in file tools: + - `Read` + - `Write` + - `Edit` + - `Glob` + - `Grep` +- `Bash` tool calls and command-heavy sessions. +- `WebSearch` and `WebFetch`. +- SDK MCP servers and SDK-created MCP tools. +- Subagents, including the `X-Claude-Code-Agent-Id` and + `X-Claude-Code-Parent-Agent-Id` headers. +- Hooks and permission callbacks. +- Session resume and continuation. +- Structured output. +- Large contexts and request bodies against GoModel's default body-size limit. +- Long-running streams against GoModel, proxies, and load balancers. +- Gateway model discovery with `CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY=1`. +- Usage and cost extraction from passthrough streams. +- Native Anthropic error bodies as seen by the SDK. + +### Known or likely gaps + +- No first-class Claude Agent SDK guide. +- No SDK smoke examples in the repository. +- No contract tests against `claude-agent-sdk` or + `@anthropic-ai/claude-agent-sdk`. +- The existing Claude Code guide recommends + `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`. Full Agent SDK support should + validate whether this workaround is still needed with Anthropic passthrough. +- Managed `/v1/messages/count_tokens` is heuristic, not tokenizer-exact. This + is risky for SDK context budgeting; native passthrough should be used when the + SDK needs exact Anthropic behavior. +- Managed `/v1/messages` drops or rejects several native Anthropic features: + - `cache_control` prompt-cache breakpoints + - input `thinking` and extended-thinking signatures + - server/built-in tools + - `top_k` + - `document` and other non-text/image blocks +- Managed `/v1/messages` can route to non-Anthropic providers, so it cannot + guarantee Anthropic-native behavior unless capabilities are explicitly gated. +- Passthrough error handling currently normalizes provider errors through + GoModel's error path. Verify that the body and status are compatible with the + SDK's Anthropic error parser. +- Passthrough audit and usage observers see SDK traffic, but subagent/session + attribution from `X-Claude-Code-*` headers is not yet surfaced as a first-class + reporting dimension. + +## Implementation checklist + +### P0: Prove passthrough SDK compatibility + +- Add `docs/guides/claude-agent-sdk.mdx`. + - Show `ANTHROPIC_BASE_URL=http://localhost:8080/p/anthropic`. + - Show `ANTHROPIC_AUTH_TOKEN=$GOMODEL_MASTER_KEY`. + - Explain that GoModel still needs an upstream `ANTHROPIC_API_KEY`. + - Explain API-key-backed gateway usage separately from Claude plan + subscription-backed usage. + - Recommend passthrough as the SDK compatibility path. + - Document that managed `/v1/messages` is a portable subset, not full SDK + compatibility. +- Add runnable examples under `examples/claude-agent-sdk/`. + - Python `query(...)` text-only example. + - Python `ClaudeSDKClient` streaming example. + - TypeScript `query(...)` text-only example. + - A low-risk tool example using `Read`, `Glob`, and `Grep`. +- Add manual or CI smoke tests that boot GoModel and run both SDKs against the + passthrough base URL. +- Verify these endpoints with the SDK: + - `POST /p/anthropic/v1/messages` + - `POST /p/anthropic/v1/messages/count_tokens` + - `GET /p/anthropic/v1/models` +- Test with and without `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`, then update + the Claude Code guide with the current recommendation. + +### P0: Make passthrough fidelity explicit + +- Add tests that Anthropic passthrough forwards: + - `anthropic-beta` + - `anthropic-version` + - `X-Claude-Code-Session-Id` + - `X-Claude-Code-Agent-Id` + - `X-Claude-Code-Parent-Agent-Id` +- Add tests that passthrough strips client auth headers and replaces them with + GoModel's configured upstream Anthropic credential. +- Add SSE passthrough tests with Anthropic event names: + - `message_start` + - `content_block_start` + - `content_block_delta` + - `content_block_stop` + - `message_delta` + - `message_stop` + - `ping` + - `error` +- Verify passthrough error responses stay compatible with Anthropic SDK parsing. +- Verify streamed usage is captured for passthrough `/messages` responses. + +### P1: Improve SDK observability + +- Capture Claude Code session and agent headers into audit and usage metadata: + - `X-Claude-Code-Session-Id` + - `X-Claude-Code-Agent-Id` + - `X-Claude-Code-Parent-Agent-Id` +- Add dashboard filters for SDK session ID and agent ID if the fields prove + useful in real traffic. +- Decide whether User-Path can be derived from one of those headers by + configuration, or whether users should keep sending an explicit + `X-GoModel-User-Path` / managed-key user path. +- Document privacy implications: SDK traffic can contain source files, command + output, tool results, and MCP data. + +### P1: Validate advanced SDK features + +- Run SDK examples that exercise MCP servers. +- Run SDK examples that exercise subagents and parent/child agent attribution. +- Run SDK examples that exercise session resume. +- Run SDK examples that exercise structured output. +- Run SDK examples that exercise permission callbacks and hooks. +- Confirm these features do not require endpoints beyond Anthropic Messages, + `count_tokens`, and optional gateway model discovery. + +### P1: Tighten managed `/v1/messages` + +- Keep documenting passthrough as the full-fidelity path. +- If the selected provider is Anthropic, optionally support native + `/v1/messages/count_tokens` instead of the heuristic estimate. +- Preserve or explicitly reject more Anthropic-native fields with clear errors: + - `cache_control` + - `thinking` signatures + - `document` + - server/built-in tool definitions + - beta-specific fields +- Add capability metadata so non-Anthropic providers fail early for + Anthropic-native SDK features instead of receiving malformed translated + requests. + +### P1: Validate long-running behavior + +- Run a multi-turn SDK session that includes file reads, tool calls, and + streaming output for at least 10 minutes. +- Verify request cancellation propagates cleanly to the upstream Anthropic + request. +- Verify SDK retry behavior does not double-count usage in GoModel. +- Verify large file/context requests against `BODY_SIZE_LIMIT`. +- Document recommended proxy and load-balancer timeouts for SDK traffic. + +### P2: Subscription-backed harness investigation + +- Treat this separately from Agent SDK API support. +- Review Anthropic's current terms and gateway docs before implementation. +- Decide whether GoModel should support only API-key-backed Agent SDK traffic, + or whether subscription-backed Claude Code / Agent SDK use is in scope. +- If it is in scope, design a separate auth flow rather than mixing Claude plan + credentials into the existing `ANTHROPIC_API_KEY` provider configuration. + +## Suggested public claim + +Until the P0 work is done: + +> GoModel supports Claude Code today and should work with the Claude Agent SDK +> through Anthropic passthrough. Full SDK compatibility is being validated. + +After P0: + +> GoModel supports the Claude Agent SDK through Anthropic Messages passthrough +> for text, streaming, basic built-in tool loops, and gateway model discovery. +> The managed `/v1/messages` endpoint supports a portable Anthropic Messages +> subset for cross-provider routing. + +After P1 advanced validation: + +> GoModel supports the Claude Agent SDK through Anthropic Messages passthrough +> for MCP, subagents, sessions, hooks, structured output, and long-running +> agent workflows. + +## References + +- Anthropic Claude Agent SDK overview: + https://code.claude.com/docs/en/agent-sdk/overview +- Anthropic Claude Agent SDK quickstart: + https://code.claude.com/docs/en/agent-sdk/quickstart +- Anthropic Claude Code LLM gateway requirements: + https://code.claude.com/docs/en/llm-gateway +- GoModel Claude Code guide: + `docs/guides/claude-code.mdx` +- GoModel Anthropic Messages API guide: + `docs/advanced/anthropic-messages-api.mdx` +- GoModel passthrough guide: + `docs/features/passthrough-api.mdx` diff --git a/docs/docs.json b/docs/docs.json index a32d09b6..b43b8b7d 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -86,6 +86,7 @@ "tab": "Guides", "icon": "compass", "pages": [ + "guides/openai-agents-sdk", "guides/openclaw", "guides/claude-code", "guides/codex", diff --git a/docs/examples/openai-agents-sdk/README.md b/docs/examples/openai-agents-sdk/README.md new file mode 100644 index 00000000..b613889d --- /dev/null +++ b/docs/examples/openai-agents-sdk/README.md @@ -0,0 +1,31 @@ +# OpenAI Agents SDK examples + +These examples point the OpenAI Agents SDK at a local GoModel instance. + +Start GoModel first: + +```bash +docker run --rm -p 8080:8080 \ + -e GOMODEL_MASTER_KEY="change-me" \ + -e OPENAI_API_KEY="sk-..." \ + enterpilot/gomodel +``` + +Then run one of the examples: + +```bash +export OPENAI_BASE_URL=http://localhost:8080/v1 +export GOMODEL_MASTER_KEY=change-me +export OPENAI_MODEL=gpt-5-mini + +python3 python_basic.py +python3 python_streaming_tool.py +node javascript_basic.mjs +``` + +Install the SDK dependencies in your own environment: + +```bash +pip install openai-agents openai +npm install @openai/agents openai +``` diff --git a/docs/examples/openai-agents-sdk/javascript_basic.mjs b/docs/examples/openai-agents-sdk/javascript_basic.mjs new file mode 100644 index 00000000..3821c61f --- /dev/null +++ b/docs/examples/openai-agents-sdk/javascript_basic.mjs @@ -0,0 +1,26 @@ +import OpenAI from "openai"; +import { + Agent, + run, + setDefaultOpenAIClient, + setOpenAIAPI, + setTracingDisabled, +} from "@openai/agents"; + +setDefaultOpenAIClient( + new OpenAI({ + baseURL: process.env.OPENAI_BASE_URL ?? "http://localhost:8080/v1", + apiKey: process.env.GOMODEL_MASTER_KEY ?? "change-me", + }), +); +setOpenAIAPI("responses"); +setTracingDisabled(true); + +const agent = new Agent({ + name: "Gateway assistant", + instructions: "Be concise.", + model: process.env.OPENAI_MODEL ?? "gpt-5-mini", +}); + +const result = await run(agent, "Reply with exactly ok."); +console.log(result.finalOutput); diff --git a/docs/examples/openai-agents-sdk/python_basic.py b/docs/examples/openai-agents-sdk/python_basic.py new file mode 100644 index 00000000..ace00c62 --- /dev/null +++ b/docs/examples/openai-agents-sdk/python_basic.py @@ -0,0 +1,31 @@ +import asyncio +import os + +from agents import Agent, Runner, set_default_openai_client, set_tracing_disabled +from openai import AsyncOpenAI + + +set_default_openai_client( + AsyncOpenAI( + base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:8080/v1"), + api_key=os.getenv("GOMODEL_MASTER_KEY", "change-me"), + ), + use_for_tracing=False, +) +set_tracing_disabled(True) + + +agent = Agent( + name="Gateway assistant", + instructions="Be concise.", + model=os.getenv("OPENAI_MODEL", "gpt-5-mini"), +) + + +async def main() -> None: + result = await Runner.run(agent, "Reply with exactly ok.") + print(result.final_output) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/openai-agents-sdk/python_streaming_tool.py b/docs/examples/openai-agents-sdk/python_streaming_tool.py new file mode 100644 index 00000000..2ed7d3f2 --- /dev/null +++ b/docs/examples/openai-agents-sdk/python_streaming_tool.py @@ -0,0 +1,49 @@ +import asyncio +import os + +from agents import ( + Agent, + Runner, + function_tool, + set_default_openai_client, + set_tracing_disabled, +) +from openai import AsyncOpenAI + + +set_default_openai_client( + AsyncOpenAI( + base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:8080/v1"), + api_key=os.getenv("GOMODEL_MASTER_KEY", "change-me"), + ), + use_for_tracing=False, +) +set_tracing_disabled(True) + + +@function_tool +def gateway_status() -> str: + """Return the status of the local gateway smoke test.""" + return "GoModel is reachable through the OpenAI-compatible Responses API." + + +agent = Agent( + name="Gateway tool assistant", + instructions="Use gateway_status when it helps. Be concise.", + model=os.getenv("OPENAI_MODEL", "gpt-5-mini"), + tools=[gateway_status], +) + + +async def main() -> None: + result = Runner.run_streamed( + agent, + "Call the status tool, then summarize the result in one sentence.", + ) + async for _event in result.stream_events(): + pass + print(result.final_output) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/guides/openai-agents-sdk.mdx b/docs/guides/openai-agents-sdk.mdx new file mode 100644 index 00000000..743458aa --- /dev/null +++ b/docs/guides/openai-agents-sdk.mdx @@ -0,0 +1,176 @@ +--- +title: "GoModel & OpenAI Agents SDK" +description: "Route OpenAI Agents SDK model calls through GoModel's OpenAI-compatible Responses API." +icon: "bot" +--- + +GoModel can sit in front of the OpenAI Agents SDK when you want gateway keys, +budgets, audit logs, model routing, and usage tracking around agent runs. + +Flow: + +`OpenAI Agents SDK -> GoModel /v1 -> selected provider` + +## Before you start + +- Install GoModel. +- Choose a GoModel master key, for example `change-me`. +- Configure at least one upstream model provider. +- Install the OpenAI Agents SDK for Python or JavaScript. + + + The SDK exports traces to OpenAI by default. If GoModel is your only OpenAI + client endpoint, disable tracing or configure a separate OpenAI tracing key. + + +## 1. Run GoModel + +Start GoModel with a master key and an upstream provider key. This example uses +OpenAI upstream: + +```bash +docker run --rm -p 8080:8080 \ + -e GOMODEL_MASTER_KEY="change-me" \ + -e OPENAI_API_KEY="sk-..." \ + enterpilot/gomodel +``` + +If you use another provider, keep the same GoModel base URL and choose a model +that your provider exposes through GoModel. + +## 2. Confirm the Responses endpoint + +Send one small request through GoModel: + +```bash +curl -s http://localhost:8080/v1/responses \ + -H "Authorization: Bearer change-me" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-5-mini", + "input": "Reply with exactly ok." + }' +``` + +If the gateway is wired correctly, the response will contain `ok`. + +## 3. Configure the SDK + +Point the SDK's OpenAI client at GoModel: + + + +```python Python +import asyncio +import os + +from agents import Agent, Runner, set_default_openai_client, set_tracing_disabled +from openai import AsyncOpenAI + +set_default_openai_client( + AsyncOpenAI( + base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:8080/v1"), + api_key=os.getenv("GOMODEL_MASTER_KEY", "change-me"), + ), + use_for_tracing=False, +) +set_tracing_disabled(True) + +agent = Agent( + name="Gateway assistant", + instructions="Be concise.", + model=os.getenv("OPENAI_MODEL", "gpt-5-mini"), +) + + +async def main(): + result = await Runner.run(agent, "Reply with exactly ok.") + print(result.final_output) + + +if __name__ == "__main__": + asyncio.run(main()) +``` + +```javascript JavaScript +import OpenAI from "openai"; +import { + Agent, + run, + setDefaultOpenAIClient, + setOpenAIAPI, + setTracingDisabled, +} from "@openai/agents"; + +setDefaultOpenAIClient( + new OpenAI({ + baseURL: process.env.OPENAI_BASE_URL ?? "http://localhost:8080/v1", + apiKey: process.env.GOMODEL_MASTER_KEY ?? "change-me", + }), +); +setOpenAIAPI("responses"); +setTracingDisabled(true); + +const agent = new Agent({ + name: "Gateway assistant", + instructions: "Be concise.", + model: process.env.OPENAI_MODEL ?? "gpt-5-mini", +}); + +const result = await run(agent, "Reply with exactly ok."); +console.log(result.finalOutput); +``` + + + +## Supported SDK paths + +GoModel supports the SDK's normal HTTP Responses path: + +- non-streaming `Runner.run(...)` +- streaming `Runner.run_streamed(...)` over HTTP/SSE +- function tool loops +- handoffs and agents-as-tools when they compile to model tool calls +- SDK-managed local sessions that replay input history +- `/v1/conversations` for server-managed conversation resources +- `/v1/responses/input_tokens` +- `/v1/responses/compact` +- local response retrieval through `/v1/responses/{id}` and + `/v1/responses/{id}/input_items` + +GoModel also preserves newer Responses input items for native Responses +providers. If a request has to be translated to Chat Completions for a provider +that does not implement Responses natively, GoModel returns a clear error for +stateful or provider-native Responses fields such as `previous_response_id`, +`conversation`, hosted tool items, and structured `text` output settings. + +## Storage behavior + +GoModel stores local response snapshots so lifecycle endpoints can work even +when the upstream provider does not support response retrieval. If the SDK sends +`store: false`, GoModel does not persist that local response snapshot. + +## Current limitations + +- Responses websocket transport is not implemented. Use the SDK's HTTP/SSE + transport with GoModel. +- Hosted tools such as web search, file search, and computer use require a + native upstream provider that supports those tool payloads. +- `previous_response_id` and `conversation` are forwarded to native Responses + providers. Chat-translated providers reject them because GoModel cannot safely + reconstruct provider-managed state across that translation boundary yet. +- Tracing is separate from model routing. Disable SDK tracing or configure a + real OpenAI tracing key. + +## More examples + +See the runnable examples in `docs/examples/openai-agents-sdk/`. + +## References + +- OpenAI Agents SDK Python configuration: + [https://openai.github.io/openai-agents-python/config/](https://openai.github.io/openai-agents-python/config/) +- OpenAI Agents SDK JavaScript configuration: + [https://openai.github.io/openai-agents-js/guides/config/](https://openai.github.io/openai-agents-js/guides/config/) +- OpenAI Agents SDK running agents: + [https://openai.github.io/openai-agents-python/running_agents/](https://openai.github.io/openai-agents-python/running_agents/) diff --git a/docs/openapi.json b/docs/openapi.json index 3d541e0f..7f8c871e 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -3974,6 +3974,297 @@ } } }, + "/v1/conversations": { + "post": { + "tags": [ + "conversations" + ], + "summary": "Create a conversation", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.ConversationCreateRequest" + } + } + }, + "description": "Conversation create request" + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.Conversation" + } + } + } + }, + "400": { + "description": "Bad Request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "500": { + "description": "Internal Server Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ], + "x-mint": { + "metadata": { + "sidebarTitle": "/v1/conversations" + } + } + } + }, + "/v1/conversations/{id}": { + "get": { + "tags": [ + "conversations" + ], + "summary": "Get a conversation", + "parameters": [ + { + "description": "Conversation ID", + "name": "id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.Conversation" + } + } + } + }, + "400": { + "description": "Bad Request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "404": { + "description": "Not Found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ], + "x-mint": { + "metadata": { + "sidebarTitle": "/v1/conversations/{id}" + } + } + }, + "post": { + "description": "Replaces the conversation metadata in full.", + "tags": [ + "conversations" + ], + "summary": "Update a conversation", + "parameters": [ + { + "description": "Conversation ID", + "name": "id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.ConversationUpdateRequest" + } + } + }, + "description": "Conversation update request", + "required": true + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.Conversation" + } + } + } + }, + "400": { + "description": "Bad Request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "404": { + "description": "Not Found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ], + "x-mint": { + "metadata": { + "sidebarTitle": "/v1/conversations/{id}" + } + } + }, + "delete": { + "tags": [ + "conversations" + ], + "summary": "Delete a conversation", + "parameters": [ + { + "description": "Conversation ID", + "name": "id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.ConversationDeleteResponse" + } + } + } + }, + "400": { + "description": "Bad Request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + }, + "404": { + "description": "Not Found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/core.OpenAIErrorEnvelope" + } + } + } + } + }, + "security": [ + { + "BearerAuth": [] + } + ], + "x-mint": { + "metadata": { + "sidebarTitle": "/v1/conversations/{id}" + } + } + } + }, "/v1/embeddings": { "post": { "tags": [ @@ -6668,6 +6959,9 @@ "reasoning": { "$ref": "#/components/schemas/core.Reasoning" }, + "service_tier": { + "type": "string" + }, "stream": { "type": "boolean" }, @@ -6686,6 +6980,12 @@ "type": "object", "additionalProperties": {} } + }, + "top_p": { + "type": "number" + }, + "user": { + "type": "string" } } }, @@ -6772,6 +7072,73 @@ } } }, + "core.Conversation": { + "type": "object", + "properties": { + "created_at": { + "type": "integer" + }, + "id": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "object": { + "description": "\"conversation\"", + "type": "string" + } + } + }, + "core.ConversationCreateRequest": { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, + "core.ConversationDeleteResponse": { + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "object": { + "description": "\"conversation.deleted\"", + "type": "string" + } + } + }, + "core.ConversationUpdateRequest": { + "type": "object", + "properties": { + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, "core.EmbeddingData": { "type": "object", "properties": { @@ -7533,6 +7900,14 @@ "core.ResponsesRequest": { "type": "object", "properties": { + "context_management": {}, + "conversation": {}, + "include": { + "type": "array", + "items": { + "type": "string" + } + }, "input": { "description": "string or []ResponsesInputElement — see docs for array form", "oneOf": [ @@ -7565,6 +7940,13 @@ "parallel_tool_calls": { "type": "boolean" }, + "previous_response_id": { + "type": "string" + }, + "prompt": {}, + "prompt_cache_retention": { + "type": "string" + }, "provider": { "description": "Gateway routing hint; stripped before upstream execution.", "type": "string" @@ -7572,6 +7954,15 @@ "reasoning": { "$ref": "#/components/schemas/core.Reasoning" }, + "safety_identifier": { + "type": "string" + }, + "service_tier": { + "type": "string" + }, + "store": { + "type": "boolean" + }, "stream": { "type": "boolean" }, @@ -7581,6 +7972,7 @@ "temperature": { "type": "number" }, + "text": {}, "tool_choice": { "description": "string or object" }, @@ -7590,6 +7982,18 @@ "type": "object", "additionalProperties": {} } + }, + "top_logprobs": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" } } }, diff --git a/internal/core/chat_json.go b/internal/core/chat_json.go index c9dcea46..66901a25 100644 --- a/internal/core/chat_json.go +++ b/internal/core/chat_json.go @@ -5,6 +5,7 @@ import "encoding/json" func (r *ChatRequest) UnmarshalJSON(data []byte) error { var raw struct { Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` MaxTokens *int `json:"max_tokens,omitempty"` Model string `json:"model"` Provider string `json:"provider,omitempty"` @@ -15,6 +16,8 @@ func (r *ChatRequest) UnmarshalJSON(data []byte) error { Stream bool `json:"stream,omitempty"` StreamOptions *StreamOptions `json:"stream_options,omitempty"` Reasoning *Reasoning `json:"reasoning,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` } if err := json.Unmarshal(data, &raw); err != nil { return err @@ -22,6 +25,7 @@ func (r *ChatRequest) UnmarshalJSON(data []byte) error { extraFields, err := extractUnknownJSONFields(data, "temperature", + "top_p", "max_tokens", "model", "provider", @@ -32,12 +36,15 @@ func (r *ChatRequest) UnmarshalJSON(data []byte) error { "stream", "stream_options", "reasoning", + "user", + "service_tier", ) if err != nil { return err } r.Temperature = raw.Temperature + r.TopP = raw.TopP r.MaxTokens = raw.MaxTokens r.Model = raw.Model r.Provider = raw.Provider @@ -48,6 +55,8 @@ func (r *ChatRequest) UnmarshalJSON(data []byte) error { r.Stream = raw.Stream r.StreamOptions = raw.StreamOptions r.Reasoning = raw.Reasoning + r.User = raw.User + r.ServiceTier = raw.ServiceTier r.ExtraFields = extraFields return nil } @@ -55,6 +64,7 @@ func (r *ChatRequest) UnmarshalJSON(data []byte) error { func (r ChatRequest) MarshalJSON() ([]byte, error) { type chatRequestAlias struct { Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` MaxTokens *int `json:"max_tokens,omitempty"` Model string `json:"model"` Provider string `json:"provider,omitempty"` @@ -65,10 +75,13 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) { Stream bool `json:"stream,omitempty"` StreamOptions *StreamOptions `json:"stream_options,omitempty"` Reasoning *Reasoning `json:"reasoning,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` } return marshalWithUnknownJSONFields(chatRequestAlias{ Temperature: r.Temperature, + TopP: r.TopP, MaxTokens: r.MaxTokens, Model: r.Model, Provider: r.Provider, @@ -79,5 +92,7 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) { Stream: r.Stream, StreamOptions: r.StreamOptions, Reasoning: r.Reasoning, + User: r.User, + ServiceTier: r.ServiceTier, }, r.ExtraFields) } diff --git a/internal/core/responses.go b/internal/core/responses.go index 92731985..8f73ddaa 100644 --- a/internal/core/responses.go +++ b/internal/core/responses.go @@ -9,20 +9,34 @@ import "encoding/json" // can round-trip extensions; Swagger ignores ExtraFields, and typed fields // should be preferred when available. type ResponsesRequest struct { - Model string `json:"model"` - Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. - Input any `json:"input"` // string or []ResponsesInputElement — see docs for array form - Instructions string `json:"instructions,omitempty"` - Tools []map[string]any `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` // string or object - ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - MaxOutputTokens *int `json:"max_output_tokens,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` - ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. + Input any `json:"input"` // string or []ResponsesInputElement — see docs for array form + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` // string or object + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Conversation any `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` + ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` } // ResponseInputTokensRequest documents the request body accepted by @@ -89,6 +103,7 @@ type ResponsesInputElement struct { // Function call output fields (type="function_call_output") — CallID shared above Output string `json:"output,omitempty"` + Raw json.RawMessage `json:"-" swaggerignore:"true"` ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` } diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index e19cd284..fd08cc7e 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -9,19 +9,33 @@ import ( // Array inputs are deserialized as []ResponsesInputElement for type-safe downstream handling. func (r *ResponsesRequest) UnmarshalJSON(data []byte) error { var raw struct { - Model string `json:"model"` - Provider string `json:"provider,omitempty"` - Input json.RawMessage `json:"input"` - Instructions string `json:"instructions,omitempty"` - Tools []map[string]any `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - MaxOutputTokens *int `json:"max_output_tokens,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + Input json.RawMessage `json:"input"` + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Conversation any `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` } if err := json.Unmarshal(data, &raw); err != nil { return err @@ -36,11 +50,25 @@ func (r *ResponsesRequest) UnmarshalJSON(data []byte) error { "tool_choice", "parallel_tool_calls", "temperature", + "top_p", + "top_logprobs", "max_output_tokens", "stream", "stream_options", "metadata", "reasoning", + "text", + "include", + "truncation", + "store", + "previous_response_id", + "conversation", + "prompt", + "prompt_cache_retention", + "context_management", + "user", + "service_tier", + "safety_identifier", ) if err != nil { return err @@ -59,11 +87,25 @@ func (r *ResponsesRequest) UnmarshalJSON(data []byte) error { r.ToolChoice = raw.ToolChoice r.ParallelToolCalls = raw.ParallelToolCalls r.Temperature = raw.Temperature + r.TopP = raw.TopP + r.TopLogprobs = raw.TopLogprobs r.MaxOutputTokens = raw.MaxOutputTokens r.Stream = raw.Stream r.StreamOptions = raw.StreamOptions r.Metadata = raw.Metadata r.Reasoning = raw.Reasoning + r.Text = raw.Text + r.Include = raw.Include + r.Truncation = raw.Truncation + r.Store = raw.Store + r.PreviousResponseID = raw.PreviousResponseID + r.Conversation = raw.Conversation + r.Prompt = raw.Prompt + r.PromptCacheRetention = raw.PromptCacheRetention + r.ContextManagement = raw.ContextManagement + r.User = raw.User + r.ServiceTier = raw.ServiceTier + r.SafetyIdentifier = raw.SafetyIdentifier r.ExtraFields = extraFields return nil } @@ -91,33 +133,61 @@ func decodeResponsesInput(raw json.RawMessage) (any, error) { // MarshalJSON preserves dynamic input payloads while supporting Swagger-only schema fields. func (r ResponsesRequest) MarshalJSON() ([]byte, error) { return marshalWithUnknownJSONFields(struct { - Model string `json:"model"` - Provider string `json:"provider,omitempty"` - Input any `json:"input"` - Instructions string `json:"instructions,omitempty"` - Tools []map[string]any `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - MaxOutputTokens *int `json:"max_output_tokens,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + Input any `json:"input"` + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Conversation any `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` }{ - Model: r.Model, - Provider: r.Provider, - Input: r.Input, - Instructions: r.Instructions, - Tools: r.Tools, - ToolChoice: r.ToolChoice, - ParallelToolCalls: r.ParallelToolCalls, - Temperature: r.Temperature, - MaxOutputTokens: r.MaxOutputTokens, - Stream: r.Stream, - StreamOptions: r.StreamOptions, - Metadata: r.Metadata, - Reasoning: r.Reasoning, + Model: r.Model, + Provider: r.Provider, + Input: r.Input, + Instructions: r.Instructions, + Tools: r.Tools, + ToolChoice: r.ToolChoice, + ParallelToolCalls: r.ParallelToolCalls, + Temperature: r.Temperature, + TopP: r.TopP, + TopLogprobs: r.TopLogprobs, + MaxOutputTokens: r.MaxOutputTokens, + Stream: r.Stream, + StreamOptions: r.StreamOptions, + Metadata: r.Metadata, + Reasoning: r.Reasoning, + Text: r.Text, + Include: r.Include, + Truncation: r.Truncation, + Store: r.Store, + PreviousResponseID: r.PreviousResponseID, + Conversation: r.Conversation, + Prompt: r.Prompt, + PromptCacheRetention: r.PromptCacheRetention, + ContextManagement: r.ContextManagement, + User: r.User, + ServiceTier: r.ServiceTier, + SafetyIdentifier: r.SafetyIdentifier, }, r.ExtraFields) } @@ -263,7 +333,7 @@ func (e *ResponsesInputElement) UnmarshalJSON(data []byte) error { if v, ok := raw["output"]; ok { e.Output = stringifyRawValue(v) } - default: // message (type="" or "message") + case "", "message": if v, ok := raw["role"]; ok { _ = json.Unmarshal(v, &e.Role) } @@ -278,6 +348,8 @@ func (e *ResponsesInputElement) UnmarshalJSON(data []byte) error { e.Content = content } } + default: + e.Raw = cloneRawMessage(data) } knownFields := []string{"type"} @@ -286,7 +358,7 @@ func (e *ResponsesInputElement) UnmarshalJSON(data []byte) error { knownFields = append(knownFields, "call_id", "id", "name", "arguments", "status") case "function_call_output": knownFields = append(knownFields, "call_id", "status", "output") - default: + case "", "message": knownFields = append(knownFields, "role", "status", "content") } @@ -328,7 +400,7 @@ func (e ResponsesInputElement) MarshalJSON() ([]byte, error) { Output: e.Output, Status: e.Status, }, e.ExtraFields) - default: // message + case "", "message": type msg struct { Type string `json:"type,omitempty"` Role string `json:"role"` @@ -341,7 +413,26 @@ func (e ResponsesInputElement) MarshalJSON() ([]byte, error) { Content: e.Content, Status: e.Status, }, e.ExtraFields) + default: + if len(bytes.TrimSpace(e.Raw)) > 0 { + return e.Raw, nil + } + return marshalWithUnknownJSONFields(struct { + Type string `json:"type"` + }{ + Type: e.Type, + }, e.ExtraFields) + } +} + +func cloneRawMessage(data []byte) json.RawMessage { + trimmed := bytes.TrimSpace(data) + if len(trimmed) == 0 { + return nil } + cloned := make([]byte, len(trimmed)) + copy(cloned, trimmed) + return cloned } // stringifyRawValue converts a json.RawMessage to a string. diff --git a/internal/core/responses_json_test.go b/internal/core/responses_json_test.go index b1e4057f..8e4c7e1b 100644 --- a/internal/core/responses_json_test.go +++ b/internal/core/responses_json_test.go @@ -362,6 +362,57 @@ func TestResponsesRequestJSON_PreservesUnknownNestedFields(t *testing.T) { } } +func TestResponsesRequestJSON_PreservesUnknownInputItems(t *testing.T) { + var req ResponsesRequest + if err := json.Unmarshal([]byte(`{ + "model":"gpt-5-mini", + "input":[ + { + "type":"reasoning", + "id":"rs_123", + "summary":[{"type":"summary_text","text":"Checked the facts."}] + } + ] + }`), &req); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + + input, ok := req.Input.([]ResponsesInputElement) + if !ok || len(input) != 1 { + t.Fatalf("Input = %#v, want []ResponsesInputElement len=1", req.Input) + } + if input[0].Type != "reasoning" { + t.Fatalf("Input[0].Type = %q, want reasoning", input[0].Type) + } + if len(input[0].Raw) == 0 { + t.Fatal("Input[0].Raw missing for unknown input item") + } + + body, err := json.Marshal(req) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + + var decoded map[string]any + if err := json.Unmarshal(body, &decoded); err != nil { + t.Fatalf("json.Unmarshal(roundTrip) error = %v", err) + } + items := decoded["input"].([]any) + item := items[0].(map[string]any) + if item["type"] != "reasoning" || item["id"] != "rs_123" { + t.Fatalf("round-tripped item = %#v, want reasoning item", item) + } + if _, ok := item["summary"].([]any); !ok { + t.Fatalf("round-tripped summary = %#v, want array", item["summary"]) + } + if _, ok := item["role"]; ok { + t.Fatalf("unknown item gained role field: %#v", item) + } + if _, ok := item["content"]; ok { + t.Fatalf("unknown item gained content field: %#v", item) + } +} + func TestResponsesRequestJSON_PreservesVariantSpecificUnknownFields(t *testing.T) { var req ResponsesRequest if err := json.Unmarshal([]byte(`{ @@ -415,11 +466,24 @@ func TestResponsesRequestJSON_PreservesVariantSpecificUnknownFields(t *testing.T } } -func TestResponsesRequestJSON_PreservesUnknownFields(t *testing.T) { +func TestResponsesRequestJSON_PreservesAgentsSDKFields(t *testing.T) { var req ResponsesRequest if err := json.Unmarshal([]byte(`{ "model":"gpt-5-mini", "input":"hello", + "previous_response_id":"resp_previous", + "conversation":"conv_123", + "include":["reasoning.encrypted_content"], + "top_p":0.8, + "top_logprobs":3, + "truncation":"auto", + "store":false, + "prompt":{"id":"pmpt_123"}, + "prompt_cache_retention":"24h", + "context_management":{"truncation":"auto"}, + "user":"tenant-123", + "service_tier":"flex", + "safety_identifier":"safe_123", "text":{ "format":{ "type":"json_schema", @@ -430,8 +494,20 @@ func TestResponsesRequestJSON_PreservesUnknownFields(t *testing.T) { t.Fatalf("json.Unmarshal() error = %v", err) } - if req.ExtraFields.Lookup("text") == nil { - t.Fatal("text missing from ExtraFields") + if req.PreviousResponseID != "resp_previous" { + t.Fatalf("PreviousResponseID = %q, want resp_previous", req.PreviousResponseID) + } + if req.Store == nil || *req.Store { + t.Fatalf("Store = %#v, want false", req.Store) + } + if req.TopP == nil || *req.TopP != 0.8 { + t.Fatalf("TopP = %#v, want 0.8", req.TopP) + } + if req.TopLogprobs == nil || *req.TopLogprobs != 3 { + t.Fatalf("TopLogprobs = %#v, want 3", req.TopLogprobs) + } + if req.Text == nil { + t.Fatal("Text missing") } body, err := json.Marshal(req) @@ -455,6 +531,18 @@ func TestResponsesRequestJSON_PreservesUnknownFields(t *testing.T) { if formatField["type"] != "json_schema" { t.Fatalf("decoded text.format.type = %#v, want json_schema", formatField["type"]) } + if decoded["store"] != false { + t.Fatalf("decoded store = %#v, want false", decoded["store"]) + } + if decoded["previous_response_id"] != "resp_previous" { + t.Fatalf("decoded previous_response_id = %#v, want resp_previous", decoded["previous_response_id"]) + } + if decoded["conversation"] != "conv_123" { + t.Fatalf("decoded conversation = %#v, want conv_123", decoded["conversation"]) + } + if decoded["service_tier"] != "flex" { + t.Fatalf("decoded service_tier = %#v, want flex", decoded["service_tier"]) + } } func TestResponsesResponseJSON_AcceptsStructuredAnnotations(t *testing.T) { diff --git a/internal/core/types.go b/internal/core/types.go index 9174f67b..e264f9f0 100644 --- a/internal/core/types.go +++ b/internal/core/types.go @@ -21,6 +21,7 @@ type Reasoning struct { // ChatRequest represents the incoming chat completion request type ChatRequest struct { Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` MaxTokens *int `json:"max_tokens,omitempty"` Model string `json:"model"` Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. @@ -31,6 +32,8 @@ type ChatRequest struct { Stream bool `json:"stream,omitempty"` StreamOptions *StreamOptions `json:"stream_options,omitempty"` Reasoning *Reasoning `json:"reasoning,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` } diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index c50faf53..1748749c 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -2,6 +2,7 @@ package providers import ( "context" + "fmt" "io" "maps" "strings" @@ -24,6 +25,9 @@ func ConvertResponsesRequestToChat(req *core.ResponsesRequest) (*core.ChatReques if req == nil { return nil, core.NewInvalidRequestError("responses request is required", nil) } + if err := validateResponsesRequestForChatTranslation(req); err != nil { + return nil, err + } chatReq := &core.ChatRequest{ Model: req.Model, @@ -33,9 +37,12 @@ func ConvertResponsesRequestToChat(req *core.ResponsesRequest) (*core.ChatReques ToolChoice: normalizeResponsesToolChoiceForChat(req.ToolChoice), ParallelToolCalls: req.ParallelToolCalls, Temperature: req.Temperature, + TopP: req.TopP, Stream: req.Stream, StreamOptions: cloneStreamOptions(req.StreamOptions), Reasoning: req.Reasoning, + User: req.User, + ServiceTier: req.ServiceTier, ExtraFields: core.CloneUnknownJSONFields(req.ExtraFields), } @@ -59,6 +66,47 @@ func ConvertResponsesRequestToChat(req *core.ResponsesRequest) (*core.ChatReques return chatReq, nil } +func validateResponsesRequestForChatTranslation(req *core.ResponsesRequest) error { + if strings.TrimSpace(req.PreviousResponseID) != "" { + return unsupportedResponsesChatTranslationField("previous_response_id") + } + if req.Conversation != nil { + return unsupportedResponsesChatTranslationField("conversation") + } + if len(req.Include) > 0 { + return unsupportedResponsesChatTranslationField("include") + } + if req.Prompt != nil { + return unsupportedResponsesChatTranslationField("prompt") + } + if strings.TrimSpace(req.Truncation) != "" { + return unsupportedResponsesChatTranslationField("truncation") + } + if req.Text != nil { + return unsupportedResponsesChatTranslationField("text") + } + if strings.TrimSpace(req.PromptCacheRetention) != "" { + return unsupportedResponsesChatTranslationField("prompt_cache_retention") + } + if req.ContextManagement != nil { + return unsupportedResponsesChatTranslationField("context_management") + } + if req.TopLogprobs != nil { + return unsupportedResponsesChatTranslationField("top_logprobs") + } + if strings.TrimSpace(req.SafetyIdentifier) != "" { + return unsupportedResponsesChatTranslationField("safety_identifier") + } + return nil +} + +func unsupportedResponsesChatTranslationField(field string) error { + return core.NewInvalidRequestError( + fmt.Sprintf("responses field %q is only supported by native Responses providers; use an OpenAI-compatible provider or passthrough for this request", field), + nil, + ) +} + func cloneStreamOptions(src *core.StreamOptions) *core.StreamOptions { if src == nil { return nil diff --git a/internal/providers/responses_adapter_test.go b/internal/providers/responses_adapter_test.go index a0b5cd7c..3b41ae7a 100644 --- a/internal/providers/responses_adapter_test.go +++ b/internal/providers/responses_adapter_test.go @@ -388,6 +388,85 @@ func TestConvertResponsesRequestToChat(t *testing.T) { } } +func TestConvertResponsesRequestToChat_MapsPortableAgentsSDKFields(t *testing.T) { + topP := 0.8 + req := &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + TopP: &topP, + User: "tenant-123", + ServiceTier: "flex", + } + + chatReq, err := ConvertResponsesRequestToChat(req) + if err != nil { + t.Fatalf("ConvertResponsesRequestToChat() error = %v", err) + } + if chatReq.TopP == nil || *chatReq.TopP != 0.8 { + t.Fatalf("TopP = %#v, want 0.8", chatReq.TopP) + } + if chatReq.User != "tenant-123" { + t.Fatalf("User = %q, want tenant-123", chatReq.User) + } + if chatReq.ServiceTier != "flex" { + t.Fatalf("ServiceTier = %q, want flex", chatReq.ServiceTier) + } +} + +func TestConvertResponsesRequestToChat_RejectsStatefulAgentsSDKFields(t *testing.T) { + tests := []struct { + name string + req *core.ResponsesRequest + want string + }{ + { + name: "previous response id", + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", PreviousResponseID: "resp_123"}, + want: "previous_response_id", + }, + { + name: "conversation", + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", Conversation: "conv_123"}, + want: "conversation", + }, + { + name: "structured output text", + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", Text: map[string]any{"format": map[string]any{"type": "json_schema"}}}, + want: "text", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := ConvertResponsesRequestToChat(tt.req) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tt.want) { + t.Fatalf("error = %v, want mention %q", err, tt.want) + } + }) + } +} + +func TestConvertResponsesRequestToChat_RejectsUnknownInputItemTypes(t *testing.T) { + var req core.ResponsesRequest + if err := json.Unmarshal([]byte(`{ + "model":"test-model", + "input":[{"type":"reasoning","id":"rs_123","summary":[]}] + }`), &req); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + + _, err := ConvertResponsesRequestToChat(&req) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), `unsupported input item type "reasoning"`) { + t.Fatalf("error = %v, want unsupported reasoning item", err) + } +} + func TestConvertResponsesRequestToChat_DoesNotMergeAssistantMessagesWithExtraFields(t *testing.T) { req := &core.ResponsesRequest{ Model: "test-model", diff --git a/internal/providers/responses_input.go b/internal/providers/responses_input.go index cc5abd2d..129773b1 100644 --- a/internal/providers/responses_input.go +++ b/internal/providers/responses_input.go @@ -130,7 +130,7 @@ func convertResponsesInputElement(item core.ResponsesInputElement, index int) (c Content: content, ExtraFields: core.CloneUnknownJSONFields(item.ExtraFields), }, "function_call_output", nil - default: // message (type="" or "message") + case "", "message": role := strings.TrimSpace(item.Role) if role == "" { return core.Message{}, "", core.NewInvalidRequestError(fmt.Sprintf("invalid responses input item at index %d: role is required", index), nil) @@ -144,6 +144,8 @@ func convertResponsesInputElement(item core.ResponsesInputElement, index int) (c Content: content, ExtraFields: core.CloneUnknownJSONFields(item.ExtraFields), }, "message", nil + default: + return core.Message{}, "", core.NewInvalidRequestError(fmt.Sprintf("invalid responses input item at index %d: unsupported input item type %q for chat-translated providers", index, item.Type), nil) } } @@ -191,6 +193,9 @@ func convertResponsesInputMap(item map[string]any, index int) (core.Message, str Content: content, ExtraFields: core.UnknownJSONFieldsFromMap(rawJSONMapFromUnknownKeys(item, "type", "call_id", "status", "output")), }, "function_call_output", nil + case "", "message": + default: + return core.Message{}, "", core.NewInvalidRequestError(fmt.Sprintf("invalid responses input item at index %d: unsupported input item type %q for chat-translated providers", index, itemType), nil) } role, _ := item["role"].(string) diff --git a/internal/server/handlers_test.go b/internal/server/handlers_test.go index 68ab2c71..471d5428 100644 --- a/internal/server/handlers_test.go +++ b/internal/server/handlers_test.go @@ -2747,10 +2747,10 @@ func TestRecordStreamingError_ClassifiesClientDisconnect(t *testing.T) { cancel() tests := []struct { - name string - ctx context.Context - err error - wantType string + name string + ctx context.Context + err error + wantType string }{ { name: "explicit context.Canceled", @@ -5158,6 +5158,35 @@ func TestResponsesLifecycle_StoresConcreteProviderName(t *testing.T) { } } +func TestResponsesLifecycle_StoreFalseSkipsLocalSnapshot(t *testing.T) { + store := responsestore.NewMemoryStore(responsestore.WithUnboundedRetention()) + provider := &mockProvider{ + supportedModels: []string{"gpt-5-mini"}, + providerTypes: map[string]string{ + "gpt-5-mini": "mock", + }, + responsesResponse: &core.ResponsesResponse{ + ID: "resp_store_false_1", + Object: "response", + Model: "gpt-5-mini", + Status: "completed", + }, + } + srv := New(provider, &Config{ResponseStore: store}) + + createReq := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(`{"model":"gpt-5-mini","input":"hello","store":false}`)) + createReq.Header.Set("Content-Type", "application/json") + createRec := httptest.NewRecorder() + srv.ServeHTTP(createRec, createReq) + if createRec.Code != http.StatusOK { + t.Fatalf("create status = %d, want 200 (%s)", createRec.Code, createRec.Body.String()) + } + + if _, err := store.Get(context.Background(), "resp_store_false_1"); !errors.Is(err, responsestore.ErrNotFound) { + t.Fatalf("store.Get() error = %v, want ErrNotFound", err) + } +} + func TestResponsesLifecycle_ReturnsSuccessWhenSnapshotStoreFails(t *testing.T) { observability.ResetMetrics() provider := &mockProvider{ diff --git a/internal/server/translated_inference_service.go b/internal/server/translated_inference_service.go index 6c49af10..7380707a 100644 --- a/internal/server/translated_inference_service.go +++ b/internal/server/translated_inference_service.go @@ -283,6 +283,9 @@ func (s *translatedInferenceService) storeResponseSnapshot(ctx context.Context, if store == nil || resp == nil || resp.ID == "" { return nil } + if req != nil && req.Store != nil && !*req.Store { + return nil + } stored := &responsestore.StoredResponse{ Response: resp, From a964078e80e1b62091a8f0320440725c79e39a9e Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Wed, 27 May 2026 22:27:34 -0700 Subject: [PATCH 02/12] fix(agents): address SDK review feedback --- cmd/gomodel/docs/docs.go | 151 ++++++- docs/dev/agents-sdk-support.md | 2 +- docs/openapi.json | 151 ++++++- internal/core/conversations.go | 4 +- internal/core/responses.go | 131 ++++-- internal/core/responses_json.go | 382 ++++++++++++------ internal/core/responses_json_test.go | 198 +++++++++ .../providers/openai/compatible_provider.go | 47 ++- internal/providers/openai/openai_test.go | 79 +++- internal/providers/responses_adapter.go | 44 +- internal/providers/responses_adapter_test.go | 3 +- 11 files changed, 988 insertions(+), 204 deletions(-) diff --git a/cmd/gomodel/docs/docs.go b/cmd/gomodel/docs/docs.go index 68616f60..95c9eb3b 100644 --- a/cmd/gomodel/docs/docs.go +++ b/cmd/gomodel/docs/docs.go @@ -5091,10 +5091,7 @@ const docTemplate = `{ "items": { "type": "array", "items": { - "type": "array", - "items": { - "type": "integer" - } + "type": "object" } }, "metadata": { @@ -5122,6 +5119,9 @@ const docTemplate = `{ }, "core.ConversationUpdateRequest": { "type": "object", + "required": [ + "metadata" + ], "properties": { "metadata": { "type": "object", @@ -5642,12 +5642,30 @@ const docTemplate = `{ "core.ResponseCompactRequest": { "type": "object", "properties": { + "context_management": {}, + "conversation": { + "description": "Conversation accepts either a conversation ID string or an object with id.", + "allOf": [ + { + "$ref": "#/definitions/core.ResponsesConversationRef" + } + ] + }, + "include": { + "type": "array", + "items": { + "type": "string" + } + }, "input": { "description": "string or []ResponsesInputElement — see docs for array form" }, "instructions": { "type": "string" }, + "max_output_tokens": { + "type": "integer" + }, "metadata": { "type": "object", "additionalProperties": { @@ -5657,12 +5675,57 @@ const docTemplate = `{ "model": { "type": "string" }, + "parallel_tool_calls": { + "type": "boolean" + }, + "previous_response_id": { + "type": "string" + }, + "prompt": {}, + "prompt_cache_retention": { + "type": "string" + }, "provider": { "description": "Gateway routing hint; stripped before upstream execution.", "type": "string" }, "reasoning": { "$ref": "#/definitions/core.Reasoning" + }, + "safety_identifier": { + "type": "string" + }, + "service_tier": { + "type": "string" + }, + "store": { + "type": "boolean" + }, + "temperature": { + "type": "number" + }, + "text": {}, + "tool_choice": { + "description": "string or object" + }, + "tools": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": {} + } + }, + "top_logprobs": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" } } }, @@ -5741,12 +5804,30 @@ const docTemplate = `{ "core.ResponseInputTokensRequest": { "type": "object", "properties": { + "context_management": {}, + "conversation": { + "description": "Conversation accepts either a conversation ID string or an object with id.", + "allOf": [ + { + "$ref": "#/definitions/core.ResponsesConversationRef" + } + ] + }, + "include": { + "type": "array", + "items": { + "type": "string" + } + }, "input": { "description": "string or []ResponsesInputElement — see docs for array form" }, "instructions": { "type": "string" }, + "max_output_tokens": { + "type": "integer" + }, "metadata": { "type": "object", "additionalProperties": { @@ -5756,12 +5837,57 @@ const docTemplate = `{ "model": { "type": "string" }, + "parallel_tool_calls": { + "type": "boolean" + }, + "previous_response_id": { + "type": "string" + }, + "prompt": {}, + "prompt_cache_retention": { + "type": "string" + }, "provider": { "description": "Gateway routing hint; stripped before upstream execution.", "type": "string" }, "reasoning": { "$ref": "#/definitions/core.Reasoning" + }, + "safety_identifier": { + "type": "string" + }, + "service_tier": { + "type": "string" + }, + "store": { + "type": "boolean" + }, + "temperature": { + "type": "number" + }, + "text": {}, + "tool_choice": { + "description": "string or object" + }, + "tools": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": {} + } + }, + "top_logprobs": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" } } }, @@ -5822,6 +5948,14 @@ const docTemplate = `{ } } }, + "core.ResponsesConversationRef": { + "type": "object", + "properties": { + "id": { + "type": "string" + } + } + }, "core.ResponsesError": { "type": "object", "properties": { @@ -5870,7 +6004,14 @@ const docTemplate = `{ "type": "object", "properties": { "context_management": {}, - "conversation": {}, + "conversation": { + "description": "Conversation accepts either a conversation ID string or an object with id.", + "allOf": [ + { + "$ref": "#/definitions/core.ResponsesConversationRef" + } + ] + }, "include": { "type": "array", "items": { diff --git a/docs/dev/agents-sdk-support.md b/docs/dev/agents-sdk-support.md index 335ffa20..be88de1b 100644 --- a/docs/dev/agents-sdk-support.md +++ b/docs/dev/agents-sdk-support.md @@ -175,7 +175,7 @@ Done: add typed fields to `core.ResponsesRequest` for fields the Agents SDK send regularly, while still preserving unknown fields: - `PreviousResponseID string` -- `Conversation any` +- `Conversation *ResponsesConversationRef` - `Include []string` - `Prompt any` - `TopP *float64` diff --git a/docs/openapi.json b/docs/openapi.json index 7f8c871e..90629f96 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -7099,10 +7099,7 @@ "items": { "type": "array", "items": { - "type": "array", - "items": { - "type": "integer" - } + "type": "object" } }, "metadata": { @@ -7130,6 +7127,9 @@ }, "core.ConversationUpdateRequest": { "type": "object", + "required": [ + "metadata" + ], "properties": { "metadata": { "type": "object", @@ -7651,6 +7651,21 @@ "core.ResponseCompactRequest": { "type": "object", "properties": { + "context_management": {}, + "conversation": { + "description": "Conversation accepts either a conversation ID string or an object with id.", + "allOf": [ + { + "$ref": "#/components/schemas/core.ResponsesConversationRef" + } + ] + }, + "include": { + "type": "array", + "items": { + "type": "string" + } + }, "input": { "description": "string or []ResponsesInputElement — see docs for array form", "oneOf": [ @@ -7668,6 +7683,9 @@ "instructions": { "type": "string" }, + "max_output_tokens": { + "type": "integer" + }, "metadata": { "type": "object", "additionalProperties": { @@ -7677,12 +7695,57 @@ "model": { "type": "string" }, + "parallel_tool_calls": { + "type": "boolean" + }, + "previous_response_id": { + "type": "string" + }, + "prompt": {}, + "prompt_cache_retention": { + "type": "string" + }, "provider": { "description": "Gateway routing hint; stripped before upstream execution.", "type": "string" }, "reasoning": { "$ref": "#/components/schemas/core.Reasoning" + }, + "safety_identifier": { + "type": "string" + }, + "service_tier": { + "type": "string" + }, + "store": { + "type": "boolean" + }, + "temperature": { + "type": "number" + }, + "text": {}, + "tool_choice": { + "description": "string or object" + }, + "tools": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": {} + } + }, + "top_logprobs": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" } } }, @@ -7761,6 +7824,21 @@ "core.ResponseInputTokensRequest": { "type": "object", "properties": { + "context_management": {}, + "conversation": { + "description": "Conversation accepts either a conversation ID string or an object with id.", + "allOf": [ + { + "$ref": "#/components/schemas/core.ResponsesConversationRef" + } + ] + }, + "include": { + "type": "array", + "items": { + "type": "string" + } + }, "input": { "description": "string or []ResponsesInputElement — see docs for array form", "oneOf": [ @@ -7778,6 +7856,9 @@ "instructions": { "type": "string" }, + "max_output_tokens": { + "type": "integer" + }, "metadata": { "type": "object", "additionalProperties": { @@ -7787,12 +7868,57 @@ "model": { "type": "string" }, + "parallel_tool_calls": { + "type": "boolean" + }, + "previous_response_id": { + "type": "string" + }, + "prompt": {}, + "prompt_cache_retention": { + "type": "string" + }, "provider": { "description": "Gateway routing hint; stripped before upstream execution.", "type": "string" }, "reasoning": { "$ref": "#/components/schemas/core.Reasoning" + }, + "safety_identifier": { + "type": "string" + }, + "service_tier": { + "type": "string" + }, + "store": { + "type": "boolean" + }, + "temperature": { + "type": "number" + }, + "text": {}, + "tool_choice": { + "description": "string or object" + }, + "tools": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": {} + } + }, + "top_logprobs": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" } } }, @@ -7853,6 +7979,14 @@ } } }, + "core.ResponsesConversationRef": { + "type": "object", + "properties": { + "id": { + "type": "string" + } + } + }, "core.ResponsesError": { "type": "object", "properties": { @@ -7901,7 +8035,14 @@ "type": "object", "properties": { "context_management": {}, - "conversation": {}, + "conversation": { + "description": "Conversation accepts either a conversation ID string or an object with id.", + "allOf": [ + { + "$ref": "#/components/schemas/core.ResponsesConversationRef" + } + ] + }, "include": { "type": "array", "items": { diff --git a/internal/core/conversations.go b/internal/core/conversations.go index c2c13c11..f87f80df 100644 --- a/internal/core/conversations.go +++ b/internal/core/conversations.go @@ -46,7 +46,7 @@ type ConversationDeleteResponse struct { // Items are stored as opaque JSON so the gateway accepts any item shape the // client sends without constraining future item-list support. type ConversationCreateRequest struct { - Items []json.RawMessage `json:"items,omitempty"` + Items []json.RawMessage `json:"items,omitempty" swaggertype:"array,object"` Metadata map[string]string `json:"metadata,omitempty"` } @@ -54,7 +54,7 @@ type ConversationCreateRequest struct { // Metadata is a pointer so the handler can tell an absent field apart from an // explicit empty object: OpenAI requires metadata on update. type ConversationUpdateRequest struct { - Metadata *map[string]string `json:"metadata"` + Metadata *map[string]string `json:"metadata" binding:"required"` } // DecodeConversationCreateRequest parses a conversation create body. An empty diff --git a/internal/core/responses.go b/internal/core/responses.go index 8f73ddaa..4f0a1cd8 100644 --- a/internal/core/responses.go +++ b/internal/core/responses.go @@ -9,56 +9,107 @@ import "encoding/json" // can round-trip extensions; Swagger ignores ExtraFields, and typed fields // should be preferred when available. type ResponsesRequest struct { - Model string `json:"model"` - Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. - Input any `json:"input"` // string or []ResponsesInputElement — see docs for array form - Instructions string `json:"instructions,omitempty"` - Tools []map[string]any `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` // string or object - ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - TopLogprobs *int `json:"top_logprobs,omitempty"` - MaxOutputTokens *int `json:"max_output_tokens,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` - Text any `json:"text,omitempty"` - Include []string `json:"include,omitempty"` - Truncation string `json:"truncation,omitempty"` - Store *bool `json:"store,omitempty"` - PreviousResponseID string `json:"previous_response_id,omitempty"` - Conversation any `json:"conversation,omitempty"` - Prompt any `json:"prompt,omitempty"` - PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` - ContextManagement any `json:"context_management,omitempty"` - User string `json:"user,omitempty"` - ServiceTier string `json:"service_tier,omitempty"` - SafetyIdentifier string `json:"safety_identifier,omitempty"` - ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. + Input any `json:"input"` // string or []ResponsesInputElement — see docs for array form + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` // string or object + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + // Conversation accepts either a conversation ID string or an object with id. + Conversation *ResponsesConversationRef `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` + ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` +} + +// ResponsesConversationRef represents the Responses API conversation request +// field. OpenAI accepts either a conversation ID string or an object with id. +// Raw preserves the original string/object shape across JSON round trips. +type ResponsesConversationRef struct { + ID string `json:"id,omitempty"` + Raw json.RawMessage `json:"-" swaggerignore:"true"` } // ResponseInputTokensRequest documents the request body accepted by // POST /v1/responses/input_tokens. type ResponseInputTokensRequest struct { - Model string `json:"model,omitempty"` - Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. - Input any `json:"input,omitempty"` // string or []ResponsesInputElement — see docs for array form - Instructions string `json:"instructions,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` + Model string `json:"model,omitempty"` + Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. + Input any `json:"input,omitempty"` // string or []ResponsesInputElement — see docs for array form + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` // string or object + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + // Conversation accepts either a conversation ID string or an object with id. + Conversation *ResponsesConversationRef `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` + ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` } // ResponseCompactRequest documents the request body accepted by // POST /v1/responses/compact. type ResponseCompactRequest struct { - Model string `json:"model,omitempty"` - Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. - Input any `json:"input,omitempty"` // string or []ResponsesInputElement — see docs for array form - Instructions string `json:"instructions,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` + Model string `json:"model,omitempty"` + Provider string `json:"provider,omitempty"` // Gateway routing hint; stripped before upstream execution. + Input any `json:"input,omitempty"` // string or []ResponsesInputElement — see docs for array form + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` // string or object + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + // Conversation accepts either a conversation ID string or an object with id. + Conversation *ResponsesConversationRef `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` + ExtraFields UnknownJSONFields `json:"-" swaggerignore:"true"` } func (r *ResponsesRequest) semanticSelector() (string, string) { diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index fd08cc7e..18e29bb2 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -3,39 +3,40 @@ package core import ( "bytes" "encoding/json" + "fmt" ) // UnmarshalJSON preserves dynamic input payloads while supporting Swagger-only schema fields. // Array inputs are deserialized as []ResponsesInputElement for type-safe downstream handling. func (r *ResponsesRequest) UnmarshalJSON(data []byte) error { var raw struct { - Model string `json:"model"` - Provider string `json:"provider,omitempty"` - Input json.RawMessage `json:"input"` - Instructions string `json:"instructions,omitempty"` - Tools []map[string]any `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - TopLogprobs *int `json:"top_logprobs,omitempty"` - MaxOutputTokens *int `json:"max_output_tokens,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` - Text any `json:"text,omitempty"` - Include []string `json:"include,omitempty"` - Truncation string `json:"truncation,omitempty"` - Store *bool `json:"store,omitempty"` - PreviousResponseID string `json:"previous_response_id,omitempty"` - Conversation any `json:"conversation,omitempty"` - Prompt any `json:"prompt,omitempty"` - PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` - ContextManagement any `json:"context_management,omitempty"` - User string `json:"user,omitempty"` - ServiceTier string `json:"service_tier,omitempty"` - SafetyIdentifier string `json:"safety_identifier,omitempty"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + Input json.RawMessage `json:"input"` + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Conversation *ResponsesConversationRef `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` } if err := json.Unmarshal(data, &raw); err != nil { return err @@ -130,36 +131,75 @@ func decodeResponsesInput(raw json.RawMessage) (any, error) { return input, nil } +// UnmarshalJSON accepts the documented Responses conversation union: a string +// ID or an object with an id field. +func (c *ResponsesConversationRef) UnmarshalJSON(data []byte) error { + trimmed := bytes.TrimSpace(data) + if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) { + *c = ResponsesConversationRef{} + return nil + } + + c.Raw = cloneRawMessage(trimmed) + switch trimmed[0] { + case '"': + return json.Unmarshal(trimmed, &c.ID) + case '{': + var ref struct { + ID string `json:"id"` + } + if err := json.Unmarshal(trimmed, &ref); err != nil { + return err + } + c.ID = ref.ID + return nil + default: + return fmt.Errorf("conversation must be a string or object") + } +} + +// MarshalJSON preserves whether the conversation was originally supplied as a +// string or object. Programmatic values default to the compact string ID form. +func (c ResponsesConversationRef) MarshalJSON() ([]byte, error) { + if len(bytes.TrimSpace(c.Raw)) > 0 { + return cloneRawMessage(c.Raw), nil + } + if c.ID != "" { + return json.Marshal(c.ID) + } + return []byte("null"), nil +} + // MarshalJSON preserves dynamic input payloads while supporting Swagger-only schema fields. func (r ResponsesRequest) MarshalJSON() ([]byte, error) { return marshalWithUnknownJSONFields(struct { - Model string `json:"model"` - Provider string `json:"provider,omitempty"` - Input any `json:"input"` - Instructions string `json:"instructions,omitempty"` - Tools []map[string]any `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - TopLogprobs *int `json:"top_logprobs,omitempty"` - MaxOutputTokens *int `json:"max_output_tokens,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` - Text any `json:"text,omitempty"` - Include []string `json:"include,omitempty"` - Truncation string `json:"truncation,omitempty"` - Store *bool `json:"store,omitempty"` - PreviousResponseID string `json:"previous_response_id,omitempty"` - Conversation any `json:"conversation,omitempty"` - Prompt any `json:"prompt,omitempty"` - PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` - ContextManagement any `json:"context_management,omitempty"` - User string `json:"user,omitempty"` - ServiceTier string `json:"service_tier,omitempty"` - SafetyIdentifier string `json:"safety_identifier,omitempty"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + Input any `json:"input"` + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Conversation *ResponsesConversationRef `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` }{ Model: r.Model, Provider: r.Provider, @@ -192,72 +232,200 @@ func (r ResponsesRequest) MarshalJSON() ([]byte, error) { } type responseUtilityRequestJSON struct { - Model string - Provider string - Input any - Instructions string - Metadata map[string]string - Reasoning *Reasoning + Model string + Provider string + Input any + Instructions string + Tools []map[string]any + ToolChoice any + ParallelToolCalls *bool + Temperature *float64 + TopP *float64 + TopLogprobs *int + MaxOutputTokens *int + Metadata map[string]string + Reasoning *Reasoning + Text any + Include []string + Truncation string + Store *bool + PreviousResponseID string + Conversation *ResponsesConversationRef + Prompt any + PromptCacheRetention string + ContextManagement any + User string + ServiceTier string + SafetyIdentifier string + ExtraFields UnknownJSONFields } func decodeResponseUtilityRequestJSON(data []byte) (responseUtilityRequestJSON, error) { var raw struct { - Model string `json:"model,omitempty"` - Provider string `json:"provider,omitempty"` - Input json.RawMessage `json:"input,omitempty"` - Instructions string `json:"instructions,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` + Model string `json:"model,omitempty"` + Provider string `json:"provider,omitempty"` + Input json.RawMessage `json:"input,omitempty"` + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Conversation *ResponsesConversationRef `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` } if err := json.Unmarshal(data, &raw); err != nil { return responseUtilityRequestJSON{}, err } + + extraFields, err := extractUnknownJSONFields(data, + "model", + "provider", + "input", + "instructions", + "tools", + "tool_choice", + "parallel_tool_calls", + "temperature", + "top_p", + "top_logprobs", + "max_output_tokens", + "metadata", + "reasoning", + "text", + "include", + "truncation", + "store", + "previous_response_id", + "conversation", + "prompt", + "prompt_cache_retention", + "context_management", + "user", + "service_tier", + "safety_identifier", + ) + if err != nil { + return responseUtilityRequestJSON{}, err + } + input, err := decodeResponsesInput(raw.Input) if err != nil { return responseUtilityRequestJSON{}, err } return responseUtilityRequestJSON{ - Model: raw.Model, - Provider: raw.Provider, - Input: input, - Instructions: raw.Instructions, - Metadata: raw.Metadata, - Reasoning: raw.Reasoning, + Model: raw.Model, + Provider: raw.Provider, + Input: input, + Instructions: raw.Instructions, + Tools: raw.Tools, + ToolChoice: raw.ToolChoice, + ParallelToolCalls: raw.ParallelToolCalls, + Temperature: raw.Temperature, + TopP: raw.TopP, + TopLogprobs: raw.TopLogprobs, + MaxOutputTokens: raw.MaxOutputTokens, + Metadata: raw.Metadata, + Reasoning: raw.Reasoning, + Text: raw.Text, + Include: raw.Include, + Truncation: raw.Truncation, + Store: raw.Store, + PreviousResponseID: raw.PreviousResponseID, + Conversation: raw.Conversation, + Prompt: raw.Prompt, + PromptCacheRetention: raw.PromptCacheRetention, + ContextManagement: raw.ContextManagement, + User: raw.User, + ServiceTier: raw.ServiceTier, + SafetyIdentifier: raw.SafetyIdentifier, + ExtraFields: extraFields, }, nil } +func marshalResponseUtilityRequestJSON(raw responseUtilityRequestJSON) ([]byte, error) { + return marshalWithUnknownJSONFields(struct { + Model string `json:"model,omitempty"` + Provider string `json:"provider,omitempty"` + Input any `json:"input,omitempty"` + Instructions string `json:"instructions,omitempty"` + Tools []map[string]any `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + Reasoning *Reasoning `json:"reasoning,omitempty"` + Text any `json:"text,omitempty"` + Include []string `json:"include,omitempty"` + Truncation string `json:"truncation,omitempty"` + Store *bool `json:"store,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Conversation *ResponsesConversationRef `json:"conversation,omitempty"` + Prompt any `json:"prompt,omitempty"` + PromptCacheRetention string `json:"prompt_cache_retention,omitempty"` + ContextManagement any `json:"context_management,omitempty"` + User string `json:"user,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SafetyIdentifier string `json:"safety_identifier,omitempty"` + }{ + Model: raw.Model, + Provider: raw.Provider, + Input: raw.Input, + Instructions: raw.Instructions, + Tools: raw.Tools, + ToolChoice: raw.ToolChoice, + ParallelToolCalls: raw.ParallelToolCalls, + Temperature: raw.Temperature, + TopP: raw.TopP, + TopLogprobs: raw.TopLogprobs, + MaxOutputTokens: raw.MaxOutputTokens, + Metadata: raw.Metadata, + Reasoning: raw.Reasoning, + Text: raw.Text, + Include: raw.Include, + Truncation: raw.Truncation, + Store: raw.Store, + PreviousResponseID: raw.PreviousResponseID, + Conversation: raw.Conversation, + Prompt: raw.Prompt, + PromptCacheRetention: raw.PromptCacheRetention, + ContextManagement: raw.ContextManagement, + User: raw.User, + ServiceTier: raw.ServiceTier, + SafetyIdentifier: raw.SafetyIdentifier, + }, raw.ExtraFields) +} + // UnmarshalJSON preserves the dynamic input payload for gateway utility requests. func (r *ResponseInputTokensRequest) UnmarshalJSON(data []byte) error { raw, err := decodeResponseUtilityRequestJSON(data) if err != nil { return err } - r.Model = raw.Model - r.Provider = raw.Provider - r.Input = raw.Input - r.Instructions = raw.Instructions - r.Metadata = raw.Metadata - r.Reasoning = raw.Reasoning + *r = ResponseInputTokensRequest(raw) return nil } // MarshalJSON preserves the dynamic input payload while omitting Swagger-only schema fields. func (r ResponseInputTokensRequest) MarshalJSON() ([]byte, error) { - return json.Marshal(struct { - Model string `json:"model,omitempty"` - Provider string `json:"provider,omitempty"` - Input any `json:"input,omitempty"` - Instructions string `json:"instructions,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` - }{ - Model: r.Model, - Provider: r.Provider, - Input: r.Input, - Instructions: r.Instructions, - Metadata: r.Metadata, - Reasoning: r.Reasoning, - }) + return marshalResponseUtilityRequestJSON(responseUtilityRequestJSON(r)) } // UnmarshalJSON preserves the dynamic input payload for gateway utility requests. @@ -266,32 +434,13 @@ func (r *ResponseCompactRequest) UnmarshalJSON(data []byte) error { if err != nil { return err } - r.Model = raw.Model - r.Provider = raw.Provider - r.Input = raw.Input - r.Instructions = raw.Instructions - r.Metadata = raw.Metadata - r.Reasoning = raw.Reasoning + *r = ResponseCompactRequest(raw) return nil } // MarshalJSON preserves the dynamic input payload while omitting Swagger-only schema fields. func (r ResponseCompactRequest) MarshalJSON() ([]byte, error) { - return json.Marshal(struct { - Model string `json:"model,omitempty"` - Provider string `json:"provider,omitempty"` - Input any `json:"input,omitempty"` - Instructions string `json:"instructions,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - Reasoning *Reasoning `json:"reasoning,omitempty"` - }{ - Model: r.Model, - Provider: r.Provider, - Input: r.Input, - Instructions: r.Instructions, - Metadata: r.Metadata, - Reasoning: r.Reasoning, - }) + return marshalResponseUtilityRequestJSON(responseUtilityRequestJSON(r)) } // UnmarshalJSON deserializes a ResponsesInputElement, switching on the "type" @@ -415,7 +564,10 @@ func (e ResponsesInputElement) MarshalJSON() ([]byte, error) { }, e.ExtraFields) default: if len(bytes.TrimSpace(e.Raw)) > 0 { - return e.Raw, nil + if e.ExtraFields.IsEmpty() { + return cloneRawMessage(e.Raw), nil + } + return mergeUnknownJSONObject(e.Raw, e.ExtraFields.raw) } return marshalWithUnknownJSONFields(struct { Type string `json:"type"` diff --git a/internal/core/responses_json_test.go b/internal/core/responses_json_test.go index 8e4c7e1b..36b30a72 100644 --- a/internal/core/responses_json_test.go +++ b/internal/core/responses_json_test.go @@ -228,6 +228,145 @@ func TestResponseUtilityRequestMarshalJSON_PreservesProvider(t *testing.T) { } } +func TestResponseUtilityRequestJSON_PreservesResponsesContextFields(t *testing.T) { + store := false + parallelToolCalls := true + temperature := 0.2 + topP := 0.8 + topLogprobs := 3 + maxOutputTokens := 256 + utilityRequests := []struct { + name string + req any + }{ + { + name: "input tokens", + req: ResponseInputTokensRequest{ + Model: "gpt-5-mini", + Input: "hello", + Instructions: "be brief", + Tools: []map[string]any{{"type": "function", "name": "lookup"}}, + ToolChoice: "auto", + ParallelToolCalls: ¶llelToolCalls, + Temperature: &temperature, + TopP: &topP, + TopLogprobs: &topLogprobs, + MaxOutputTokens: &maxOutputTokens, + Metadata: map[string]string{"team": "alpha"}, + Reasoning: &Reasoning{Effort: "low"}, + Text: map[string]any{"format": map[string]any{"type": "text"}}, + Include: []string{"reasoning.encrypted_content"}, + Truncation: "auto", + Store: &store, + PreviousResponseID: "resp_previous", + Conversation: &ResponsesConversationRef{ID: "conv_123"}, + Prompt: map[string]any{"id": "pmpt_123"}, + PromptCacheRetention: "24h", + ContextManagement: map[string]any{"truncation": "auto"}, + User: "tenant-123", + ServiceTier: "flex", + SafetyIdentifier: "safe_123", + ExtraFields: UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + "future_field": json.RawMessage(`{"enabled":true}`), + }), + }, + }, + { + name: "compact", + req: ResponseCompactRequest{ + Model: "gpt-5-mini", + Input: "hello", + Instructions: "be brief", + Tools: []map[string]any{{"type": "function", "name": "lookup"}}, + ToolChoice: "auto", + ParallelToolCalls: ¶llelToolCalls, + Temperature: &temperature, + TopP: &topP, + TopLogprobs: &topLogprobs, + MaxOutputTokens: &maxOutputTokens, + Metadata: map[string]string{"team": "alpha"}, + Reasoning: &Reasoning{Effort: "low"}, + Text: map[string]any{"format": map[string]any{"type": "text"}}, + Include: []string{"reasoning.encrypted_content"}, + Truncation: "auto", + Store: &store, + PreviousResponseID: "resp_previous", + Conversation: &ResponsesConversationRef{ID: "conv_123"}, + Prompt: map[string]any{"id": "pmpt_123"}, + PromptCacheRetention: "24h", + ContextManagement: map[string]any{"truncation": "auto"}, + User: "tenant-123", + ServiceTier: "flex", + SafetyIdentifier: "safe_123", + ExtraFields: UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + "future_field": json.RawMessage(`{"enabled":true}`), + }), + }, + }, + } + + for _, tt := range utilityRequests { + t.Run(tt.name, func(t *testing.T) { + body, err := json.Marshal(tt.req) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + + var decoded map[string]any + if err := json.Unmarshal(body, &decoded); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + for _, field := range []string{ + "tools", + "tool_choice", + "parallel_tool_calls", + "temperature", + "top_p", + "top_logprobs", + "max_output_tokens", + "metadata", + "reasoning", + "text", + "include", + "truncation", + "store", + "previous_response_id", + "conversation", + "prompt", + "prompt_cache_retention", + "context_management", + "user", + "service_tier", + "safety_identifier", + "future_field", + } { + if _, ok := decoded[field]; !ok { + t.Fatalf("decoded utility request missing %q: %s", field, string(body)) + } + } + + switch tt.req.(type) { + case ResponseInputTokensRequest: + var roundTripped ResponseInputTokensRequest + if err := json.Unmarshal(body, &roundTripped); err != nil { + t.Fatalf("json.Unmarshal(ResponseInputTokensRequest) error = %v", err) + } + if roundTripped.PreviousResponseID != "resp_previous" || roundTripped.ExtraFields.Lookup("future_field") == nil { + t.Fatalf("round-tripped input token request lost context fields: %+v", roundTripped) + } + case ResponseCompactRequest: + var roundTripped ResponseCompactRequest + if err := json.Unmarshal(body, &roundTripped); err != nil { + t.Fatalf("json.Unmarshal(ResponseCompactRequest) error = %v", err) + } + if roundTripped.PreviousResponseID != "resp_previous" || roundTripped.ExtraFields.Lookup("future_field") == nil { + t.Fatalf("round-tripped compact request lost context fields: %+v", roundTripped) + } + } + }) + } +} + func TestResponsesRequestMarshalJSON_PreservesToolCallingControls(t *testing.T) { parallelToolCalls := false body, err := json.Marshal(ResponsesRequest{ @@ -413,6 +552,33 @@ func TestResponsesRequestJSON_PreservesUnknownInputItems(t *testing.T) { } } +func TestResponsesInputElementMarshalJSON_MergesRawUnknownItemExtras(t *testing.T) { + elem := ResponsesInputElement{ + Type: "reasoning", + Raw: json.RawMessage(`{"type":"reasoning","id":"rs_123","summary":[]}`), + ExtraFields: UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + "provider_data": json.RawMessage(`{"trace_id":"trace-1"}`), + }), + } + + body, err := json.Marshal(elem) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + + var decoded map[string]any + if err := json.Unmarshal(body, &decoded); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + if decoded["type"] != "reasoning" || decoded["id"] != "rs_123" { + t.Fatalf("decoded item = %#v, want original raw reasoning item", decoded) + } + providerData, ok := decoded["provider_data"].(map[string]any) + if !ok || providerData["trace_id"] != "trace-1" { + t.Fatalf("provider_data = %#v, want merged trace id", decoded["provider_data"]) + } +} + func TestResponsesRequestJSON_PreservesVariantSpecificUnknownFields(t *testing.T) { var req ResponsesRequest if err := json.Unmarshal([]byte(`{ @@ -545,6 +711,38 @@ func TestResponsesRequestJSON_PreservesAgentsSDKFields(t *testing.T) { } } +func TestResponsesRequestJSON_PreservesConversationObjectShape(t *testing.T) { + var req ResponsesRequest + if err := json.Unmarshal([]byte(`{ + "model":"gpt-5-mini", + "input":"hello", + "conversation":{"id":"conv_123","metadata":{"team":"alpha"}} + }`), &req); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + if req.Conversation == nil || req.Conversation.ID != "conv_123" { + t.Fatalf("Conversation = %+v, want id conv_123", req.Conversation) + } + + body, err := json.Marshal(req) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + + var decoded map[string]any + if err := json.Unmarshal(body, &decoded); err != nil { + t.Fatalf("json.Unmarshal(roundTrip) error = %v", err) + } + conversation, ok := decoded["conversation"].(map[string]any) + if !ok { + t.Fatalf("decoded conversation = %#v, want object", decoded["conversation"]) + } + metadata, ok := conversation["metadata"].(map[string]any) + if !ok || metadata["team"] != "alpha" { + t.Fatalf("decoded conversation metadata = %#v, want team alpha", conversation["metadata"]) + } +} + func TestResponsesResponseJSON_AcceptsStructuredAnnotations(t *testing.T) { var resp ResponsesResponse if err := json.Unmarshal([]byte(`{ diff --git a/internal/providers/openai/compatible_provider.go b/internal/providers/openai/compatible_provider.go index e8e3da9a..51a41ee4 100644 --- a/internal/providers/openai/compatible_provider.go +++ b/internal/providers/openai/compatible_provider.go @@ -268,25 +268,46 @@ func responseInputTokensRequestFromResponses(req *core.ResponsesRequest) *core.R if req == nil { return nil } - return &core.ResponseInputTokensRequest{ - Model: req.Model, - Input: req.Input, - Instructions: req.Instructions, - Metadata: req.Metadata, - Reasoning: req.Reasoning, - } + utility := responseUtilityRequestFromResponses(req) + return &utility } func responseCompactRequestFromResponses(req *core.ResponsesRequest) *core.ResponseCompactRequest { if req == nil { return nil } - return &core.ResponseCompactRequest{ - Model: req.Model, - Input: req.Input, - Instructions: req.Instructions, - Metadata: req.Metadata, - Reasoning: req.Reasoning, + utility := responseUtilityRequestFromResponses(req) + compact := core.ResponseCompactRequest(utility) + return &compact +} + +func responseUtilityRequestFromResponses(req *core.ResponsesRequest) core.ResponseInputTokensRequest { + return core.ResponseInputTokensRequest{ + Model: req.Model, + Input: req.Input, + Instructions: req.Instructions, + Tools: req.Tools, + ToolChoice: req.ToolChoice, + ParallelToolCalls: req.ParallelToolCalls, + Temperature: req.Temperature, + TopP: req.TopP, + TopLogprobs: req.TopLogprobs, + MaxOutputTokens: req.MaxOutputTokens, + Metadata: req.Metadata, + Reasoning: req.Reasoning, + Text: req.Text, + Include: req.Include, + Truncation: req.Truncation, + Store: req.Store, + PreviousResponseID: req.PreviousResponseID, + Conversation: req.Conversation, + Prompt: req.Prompt, + PromptCacheRetention: req.PromptCacheRetention, + ContextManagement: req.ContextManagement, + User: req.User, + ServiceTier: req.ServiceTier, + SafetyIdentifier: req.SafetyIdentifier, + ExtraFields: core.CloneUnknownJSONFields(req.ExtraFields), } } diff --git a/internal/providers/openai/openai_test.go b/internal/providers/openai/openai_test.go index ae8ab8d3..13e1d318 100644 --- a/internal/providers/openai/openai_test.go +++ b/internal/providers/openai/openai_test.go @@ -1155,7 +1155,7 @@ func TestResponses(t *testing.T) { } } -func TestResponsesUtilitiesForwardNarrowRequests(t *testing.T) { +func TestResponsesUtilitiesForwardResponseContext(t *testing.T) { var inputTokensBody map[string]any var compactBody map[string]any server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -1183,20 +1183,37 @@ func TestResponsesUtilitiesForwardNarrowRequests(t *testing.T) { maxOutputTokens := 128 parallelToolCalls := true temperature := 0.2 + topP := 0.8 + topLogprobs := 3 + store := false req := &core.ResponsesRequest{ - Model: "gpt-4o", - Provider: "openai_primary", - Input: "hello", - Instructions: "be brief", - Tools: []map[string]any{{"type": "function"}}, - ToolChoice: "auto", - ParallelToolCalls: ¶llelToolCalls, - Temperature: &temperature, - MaxOutputTokens: &maxOutputTokens, - Stream: true, - StreamOptions: &core.StreamOptions{IncludeUsage: true}, - Metadata: map[string]string{"team": "alpha"}, - Reasoning: &core.Reasoning{Effort: "low"}, + Model: "gpt-4o", + Provider: "openai_primary", + Input: "hello", + Instructions: "be brief", + Tools: []map[string]any{{"type": "function", "name": "lookup"}}, + ToolChoice: "auto", + ParallelToolCalls: ¶llelToolCalls, + Temperature: &temperature, + TopP: &topP, + TopLogprobs: &topLogprobs, + MaxOutputTokens: &maxOutputTokens, + Stream: true, + StreamOptions: &core.StreamOptions{IncludeUsage: true}, + Metadata: map[string]string{"team": "alpha"}, + Reasoning: &core.Reasoning{Effort: "low"}, + Text: map[string]any{"format": map[string]any{"type": "text"}}, + Include: []string{"reasoning.encrypted_content"}, + Truncation: "auto", + Store: &store, + PreviousResponseID: "resp_previous", + Conversation: &core.ResponsesConversationRef{ID: "conv_123"}, + Prompt: map[string]any{"id": "pmpt_123"}, + PromptCacheRetention: "24h", + ContextManagement: map[string]any{"truncation": "auto"}, + User: "tenant-123", + ServiceTier: "flex", + SafetyIdentifier: "safe_123", ExtraFields: core.UnknownJSONFieldsFromMap(map[string]json.RawMessage{ "custom": json.RawMessage(`"value"`), }), @@ -1215,13 +1232,35 @@ func TestResponsesUtilitiesForwardNarrowRequests(t *testing.T) { if body["model"] != "gpt-4o" || body["input"] != "hello" || body["instructions"] != "be brief" { t.Fatalf("%s body kept fields = %+v, want model/input/instructions", name, body) } - if _, ok := body["metadata"]; !ok { - t.Fatalf("%s body missing metadata: %+v", name, body) - } - if _, ok := body["reasoning"]; !ok { - t.Fatalf("%s body missing reasoning: %+v", name, body) + for _, field := range []string{ + "tools", + "tool_choice", + "parallel_tool_calls", + "temperature", + "top_p", + "top_logprobs", + "max_output_tokens", + "metadata", + "reasoning", + "text", + "include", + "truncation", + "store", + "previous_response_id", + "conversation", + "prompt", + "prompt_cache_retention", + "context_management", + "user", + "service_tier", + "safety_identifier", + "custom", + } { + if _, ok := body[field]; !ok { + t.Fatalf("%s body missing %q: %+v", name, field, body) + } } - for _, field := range []string{"provider", "tools", "tool_choice", "parallel_tool_calls", "temperature", "max_output_tokens", "stream", "stream_options", "custom"} { + for _, field := range []string{"provider", "stream", "stream_options"} { if _, ok := body[field]; ok { t.Fatalf("%s body includes filtered field %q: %+v", name, field, body) } diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index 1748749c..bc8e4413 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -82,8 +82,8 @@ func validateResponsesRequestForChatTranslation(req *core.ResponsesRequest) erro if strings.TrimSpace(req.Truncation) != "" { return unsupportedResponsesChatTranslationField("truncation") } - if req.Text != nil { - return unsupportedResponsesChatTranslationField("text") + if err := validateResponsesTextForChatTranslation(req.Text); err != nil { + return err } if strings.TrimSpace(req.PromptCacheRetention) != "" { return unsupportedResponsesChatTranslationField("prompt_cache_retention") @@ -100,6 +100,46 @@ func validateResponsesRequestForChatTranslation(req *core.ResponsesRequest) erro return nil } +func validateResponsesTextForChatTranslation(text any) error { + if text == nil { + return nil + } + + textMap, ok := text.(map[string]any) + if !ok { + return unsupportedResponsesChatTranslationField("text") + } + for key, value := range textMap { + switch key { + case "format": + if !isPlainResponsesTextFormat(value) { + return unsupportedResponsesChatTranslationField("text") + } + default: + return unsupportedResponsesChatTranslationField("text") + } + } + return nil +} + +func isPlainResponsesTextFormat(format any) bool { + if format == nil { + return true + } + formatMap, ok := format.(map[string]any) + if !ok { + return false + } + for key := range formatMap { + if key != "type" { + return false + } + } + formatType, _ := formatMap["type"].(string) + formatType = strings.TrimSpace(formatType) + return formatType == "" || formatType == "text" +} + func unsupportedResponsesChatTranslationField(field string) error { return core.NewInvalidRequestError( fmt.Sprintf("responses field %q is only supported by native Responses providers; use an OpenAI-compatible provider or passthrough for this request", field), diff --git a/internal/providers/responses_adapter_test.go b/internal/providers/responses_adapter_test.go index 3b41ae7a..49aaa08b 100644 --- a/internal/providers/responses_adapter_test.go +++ b/internal/providers/responses_adapter_test.go @@ -394,6 +394,7 @@ func TestConvertResponsesRequestToChat_MapsPortableAgentsSDKFields(t *testing.T) Model: "test-model", Input: "Hello", TopP: &topP, + Text: map[string]any{"format": map[string]any{"type": "text"}}, User: "tenant-123", ServiceTier: "flex", } @@ -426,7 +427,7 @@ func TestConvertResponsesRequestToChat_RejectsStatefulAgentsSDKFields(t *testing }, { name: "conversation", - req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", Conversation: "conv_123"}, + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", Conversation: &core.ResponsesConversationRef{ID: "conv_123"}}, want: "conversation", }, { From bf44afe47698760afc7578b99fdc3aae01b4dab4 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Sat, 30 May 2026 19:53:37 -0400 Subject: [PATCH 03/12] refactor(core): reuse CloneRawJSON in cloneRawMessage Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/core/responses_json.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index 18e29bb2..8d7536e7 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -577,14 +577,10 @@ func (e ResponsesInputElement) MarshalJSON() ([]byte, error) { } } +// cloneRawMessage returns a detached, whitespace-trimmed copy of a raw JSON +// value so stored Raw fields stay independent of the decoder's backing buffer. func cloneRawMessage(data []byte) json.RawMessage { - trimmed := bytes.TrimSpace(data) - if len(trimmed) == 0 { - return nil - } - cloned := make([]byte, len(trimmed)) - copy(cloned, trimmed) - return cloned + return CloneRawJSON(bytes.TrimSpace(data)) } // stringifyRawValue converts a json.RawMessage to a string. From 50bd4965be9d68945d8637abfb0185e861563765 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Mon, 1 Jun 2026 00:04:03 -0400 Subject: [PATCH 04/12] fix(responses): avoid duplicate keys when re-marshaling unknown input items Unknown Responses input item types stored the full payload in Raw and also extracted every non-type field into ExtraFields. Marshaling then merged both, emitting duplicate JSON keys (e.g. id, summary) to native Responses providers. Preserve the verbatim Raw on decode and reserve ExtraFields for metadata added after decoding, so a round trip emits each field once. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/core/responses_json.go | 4 ++++ internal/core/responses_json_test.go | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index 8d7536e7..7d06ddd4 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -498,7 +498,11 @@ func (e *ResponsesInputElement) UnmarshalJSON(data []byte) error { } } default: + // Unknown item types are preserved verbatim in Raw, which already holds + // every field. Skip ExtraFields extraction here so a round trip emits Raw + // once; ExtraFields stays reserved for metadata added after decoding. e.Raw = cloneRawMessage(data) + return nil } knownFields := []string{"type"} diff --git a/internal/core/responses_json_test.go b/internal/core/responses_json_test.go index 36b30a72..fe4f58a5 100644 --- a/internal/core/responses_json_test.go +++ b/internal/core/responses_json_test.go @@ -1,6 +1,7 @@ package core import ( + "bytes" "encoding/json" "testing" ) @@ -552,6 +553,25 @@ func TestResponsesRequestJSON_PreservesUnknownInputItems(t *testing.T) { } } +func TestResponsesInputElementJSON_UnknownItemRoundTripHasNoDuplicateKeys(t *testing.T) { + var elem ResponsesInputElement + if err := json.Unmarshal([]byte(`{"type":"reasoning","id":"rs_123","summary":[]}`), &elem); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + + body, err := json.Marshal(elem) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + + // A decode→encode round trip must not duplicate the fields preserved in Raw. + for _, key := range []string{`"type"`, `"id"`, `"summary"`} { + if got := bytes.Count(body, []byte(key)); got != 1 { + t.Fatalf("key %s appears %d times in %s, want 1", key, got, body) + } + } +} + func TestResponsesInputElementMarshalJSON_MergesRawUnknownItemExtras(t *testing.T) { elem := ResponsesInputElement{ Type: "reasoning", From 1ca99df37a774a6bcd860c3c260d5d340982ee65 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Mon, 1 Jun 2026 00:04:52 -0400 Subject: [PATCH 05/12] feat(agents): translate Responses text.format to chat response_format Chat-translated providers previously rejected any Responses request carrying structured text output settings. Map text.format onto the Chat Completions response_format (json_schema fields are nested under a json_schema member; json_object passes through) and forward text.verbosity, so the Agents SDK can request structured outputs on non-native providers. Unknown text formats still return a clear error. Adds core.MergeUnknownJSONFields to inject the derived passthrough fields, reusing the same extras path providers already read for response_format. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/dev/agents-sdk-support.md | 3 + docs/guides/openai-agents-sdk.mdx | 8 +- internal/core/json_fields.go | 20 +++++ internal/core/json_fields_test.go | 37 +++++++++ internal/providers/responses_adapter.go | 85 ++++++++++++++----- internal/providers/responses_adapter_test.go | 87 +++++++++++++++++++- 6 files changed, 215 insertions(+), 25 deletions(-) diff --git a/docs/dev/agents-sdk-support.md b/docs/dev/agents-sdk-support.md index be88de1b..f795e66c 100644 --- a/docs/dev/agents-sdk-support.md +++ b/docs/dev/agents-sdk-support.md @@ -197,6 +197,9 @@ provider-specific adaptation where relevant. - Done: implement `/v1/conversations` lifecycle support. - Done: reject `previous_response_id` and `conversation` on chat-translated providers with a clear compatibility error. +- Done: translate `text.format` to the Chat Completions `response_format` + (`json_schema` / `json_object`) and pass `text.verbosity` through on + chat-translated providers; unknown text formats still return a clear error. - Still needed: optionally expand previous stored responses into full input for chat-translated providers. - Add tests for: diff --git a/docs/guides/openai-agents-sdk.mdx b/docs/guides/openai-agents-sdk.mdx index 743458aa..744354a3 100644 --- a/docs/guides/openai-agents-sdk.mdx +++ b/docs/guides/openai-agents-sdk.mdx @@ -140,9 +140,11 @@ GoModel supports the SDK's normal HTTP Responses path: GoModel also preserves newer Responses input items for native Responses providers. If a request has to be translated to Chat Completions for a provider -that does not implement Responses natively, GoModel returns a clear error for -stateful or provider-native Responses fields such as `previous_response_id`, -`conversation`, hosted tool items, and structured `text` output settings. +that does not implement Responses natively, GoModel translates structured +`text` output settings into the Chat Completions `response_format` (and passes +`text.verbosity` through), while returning a clear error for stateful or +provider-native Responses fields such as `previous_response_id`, `conversation`, +and hosted tool items. ## Storage behavior diff --git a/internal/core/json_fields.go b/internal/core/json_fields.go index 28be0c3b..4ffb8059 100644 --- a/internal/core/json_fields.go +++ b/internal/core/json_fields.go @@ -72,6 +72,26 @@ func unknownJSONFieldsFromMap(fields map[string]json.RawMessage, cloneValues boo return UnknownJSONFields{raw: buf.Bytes()} } +// MergeUnknownJSONFields returns base with the given raw members added; additions +// override existing members on key conflict. It lets translation layers inject +// derived fields (such as a chat response_format mapped from a Responses text +// format) into a request's passthrough object without a dedicated typed field. +func MergeUnknownJSONFields(base UnknownJSONFields, additions map[string]json.RawMessage) (UnknownJSONFields, error) { + if len(additions) == 0 { + return base, nil + } + merged := make(map[string]json.RawMessage, len(additions)) + if !base.IsEmpty() { + if err := json.Unmarshal(base.raw, &merged); err != nil { + return UnknownJSONFields{}, err + } + } + for key, value := range additions { + merged[key] = value + } + return UnknownJSONFieldsFromMap(merged), nil +} + // Lookup returns the raw JSON value for key or nil when absent. // It scans the stored object on demand so single-lookups stay allocation-light, // but repeated lookups on the same value are linear in the raw JSON size. diff --git a/internal/core/json_fields_test.go b/internal/core/json_fields_test.go index 93eee4f0..96445ea0 100644 --- a/internal/core/json_fields_test.go +++ b/internal/core/json_fields_test.go @@ -85,6 +85,43 @@ func TestUnknownJSONFieldsFromMap_EmptyRawValueEncodesAsNull(t *testing.T) { } } +func TestMergeUnknownJSONFields_AddsAndOverrides(t *testing.T) { + base := UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + "keep": json.RawMessage(`1`), + "override": json.RawMessage(`"old"`), + }) + + merged, err := MergeUnknownJSONFields(base, map[string]json.RawMessage{ + "override": json.RawMessage(`"new"`), + "added": json.RawMessage(`true`), + }) + if err != nil { + t.Fatalf("MergeUnknownJSONFields() error = %v", err) + } + + if got := merged.Lookup("keep"); !bytes.Equal(got, []byte(`1`)) { + t.Fatalf("keep = %q, want 1", got) + } + if got := merged.Lookup("override"); !bytes.Equal(got, []byte(`"new"`)) { + t.Fatalf("override = %q, want \"new\"", got) + } + if got := merged.Lookup("added"); !bytes.Equal(got, []byte(`true`)) { + t.Fatalf("added = %q, want true", got) + } +} + +func TestMergeUnknownJSONFields_NoAdditionsReturnsBase(t *testing.T) { + base := UnknownJSONFieldsFromMap(map[string]json.RawMessage{"a": json.RawMessage(`1`)}) + + merged, err := MergeUnknownJSONFields(base, nil) + if err != nil { + t.Fatalf("MergeUnknownJSONFields() error = %v", err) + } + if !bytes.Equal(merged.Lookup("a"), []byte(`1`)) { + t.Fatalf("a = %q, want 1", merged.Lookup("a")) + } +} + func TestExtractUnknownJSONFields_RejectsInvalidJSONSyntax(t *testing.T) { tests := []struct { name string diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index bc8e4413..7f852915 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -2,6 +2,7 @@ package providers import ( "context" + "encoding/json" "fmt" "io" "maps" @@ -50,6 +51,14 @@ func ConvertResponsesRequestToChat(req *core.ResponsesRequest) (*core.ChatReques chatReq.MaxTokens = req.MaxOutputTokens } + textFields, err := responsesTextToChatExtraFields(req.Text) + if err != nil { + return nil, err + } + if chatReq.ExtraFields, err = core.MergeUnknownJSONFields(chatReq.ExtraFields, textFields); err != nil { + return nil, err + } + if req.Instructions != "" { chatReq.Messages = append(chatReq.Messages, core.Message{ Role: "system", @@ -82,9 +91,6 @@ func validateResponsesRequestForChatTranslation(req *core.ResponsesRequest) erro if strings.TrimSpace(req.Truncation) != "" { return unsupportedResponsesChatTranslationField("truncation") } - if err := validateResponsesTextForChatTranslation(req.Text); err != nil { - return err - } if strings.TrimSpace(req.PromptCacheRetention) != "" { return unsupportedResponsesChatTranslationField("prompt_cache_retention") } @@ -100,44 +106,83 @@ func validateResponsesRequestForChatTranslation(req *core.ResponsesRequest) erro return nil } -func validateResponsesTextForChatTranslation(text any) error { +// responsesTextToChatExtraFields maps the Responses "text" settings onto the +// equivalent Chat Completions fields. text.format becomes response_format and +// text.verbosity passes through unchanged; both are emitted as passthrough +// members so existing provider handling (e.g. Gemini response_format) applies. +// Plain text output produces no fields. Anything that cannot be translated +// faithfully (an unknown format type or text option) returns an error rather +// than silently dropping the caller's intent. +func responsesTextToChatExtraFields(text any) (map[string]json.RawMessage, error) { if text == nil { - return nil + return nil, nil } - textMap, ok := text.(map[string]any) if !ok { - return unsupportedResponsesChatTranslationField("text") + return nil, unsupportedResponsesChatTranslationField("text") } + + additions := make(map[string]json.RawMessage) for key, value := range textMap { switch key { case "format": - if !isPlainResponsesTextFormat(value) { - return unsupportedResponsesChatTranslationField("text") + responseFormat, err := responsesTextFormatToChatResponseFormat(value) + if err != nil { + return nil, err } + if responseFormat != nil { + additions["response_format"] = responseFormat + } + case "verbosity": + raw, err := json.Marshal(value) + if err != nil { + return nil, err + } + additions["verbosity"] = raw default: - return unsupportedResponsesChatTranslationField("text") + return nil, unsupportedResponsesChatTranslationField("text") } } - return nil + if len(additions) == 0 { + return nil, nil + } + return additions, nil } -func isPlainResponsesTextFormat(format any) bool { +// responsesTextFormatToChatResponseFormat converts a Responses text.format into +// a Chat Completions response_format. Plain text yields nil (chat default). The +// Responses API places json_schema fields directly on the format object, while +// Chat nests them under a json_schema member. +func responsesTextFormatToChatResponseFormat(format any) (json.RawMessage, error) { if format == nil { - return true + return nil, nil } formatMap, ok := format.(map[string]any) if !ok { - return false + return nil, unsupportedResponsesChatTranslationField("text") } - for key := range formatMap { - if key != "type" { - return false + + formatType, _ := formatMap["type"].(string) + switch strings.TrimSpace(formatType) { + case "", "text": + return nil, nil + case "json_object": + return json.Marshal(map[string]any{"type": "json_object"}) + case "json_schema": + jsonSchema := make(map[string]any, len(formatMap)) + for k, v := range formatMap { + if k == "type" { + continue + } + jsonSchema[k] = v } + return json.Marshal(map[string]any{ + "type": "json_schema", + "json_schema": jsonSchema, + }) + default: + return nil, unsupportedResponsesChatTranslationField("text") } - formatType, _ := formatMap["type"].(string) - formatType = strings.TrimSpace(formatType) - return formatType == "" || formatType == "text" } func unsupportedResponsesChatTranslationField(field string) error { diff --git a/internal/providers/responses_adapter_test.go b/internal/providers/responses_adapter_test.go index 49aaa08b..dd4e2936 100644 --- a/internal/providers/responses_adapter_test.go +++ b/internal/providers/responses_adapter_test.go @@ -431,8 +431,8 @@ func TestConvertResponsesRequestToChat_RejectsStatefulAgentsSDKFields(t *testing want: "conversation", }, { - name: "structured output text", - req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", Text: map[string]any{"format": map[string]any{"type": "json_schema"}}}, + name: "unknown text format type", + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", Text: map[string]any{"format": map[string]any{"type": "grammar"}}}, want: "text", }, } @@ -450,6 +450,89 @@ func TestConvertResponsesRequestToChat_RejectsStatefulAgentsSDKFields(t *testing } } +func TestConvertResponsesRequestToChat_MapsTextFormatToResponseFormat(t *testing.T) { + t.Run("json_schema nests schema fields", func(t *testing.T) { + req := &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + Text: map[string]any{ + "format": map[string]any{ + "type": "json_schema", + "name": "weather", + "strict": true, + "schema": map[string]any{"type": "object"}, + }, + "verbosity": "low", + }, + } + + chatReq, err := ConvertResponsesRequestToChat(req) + if err != nil { + t.Fatalf("ConvertResponsesRequestToChat() error = %v", err) + } + + raw := chatReq.ExtraFields.Lookup("response_format") + if raw == nil { + t.Fatal("response_format missing from chat request extras") + } + var responseFormat struct { + Type string `json:"type"` + JSONSchema struct { + Name string `json:"name"` + Strict bool `json:"strict"` + Schema map[string]any `json:"schema"` + } `json:"json_schema"` + } + if err := json.Unmarshal(raw, &responseFormat); err != nil { + t.Fatalf("json.Unmarshal(response_format) error = %v", err) + } + if responseFormat.Type != "json_schema" { + t.Fatalf("response_format.type = %q, want json_schema", responseFormat.Type) + } + if responseFormat.JSONSchema.Name != "weather" || !responseFormat.JSONSchema.Strict { + t.Fatalf("response_format.json_schema = %#v, want nested name/strict", responseFormat.JSONSchema) + } + if responseFormat.JSONSchema.Schema["type"] != "object" { + t.Fatalf("response_format.json_schema.schema = %#v, want nested schema", responseFormat.JSONSchema.Schema) + } + if verbosity := chatReq.ExtraFields.Lookup("verbosity"); string(verbosity) != `"low"` { + t.Fatalf("verbosity = %s, want \"low\"", verbosity) + } + }) + + t.Run("json_object passes through", func(t *testing.T) { + req := &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + Text: map[string]any{"format": map[string]any{"type": "json_object"}}, + } + + chatReq, err := ConvertResponsesRequestToChat(req) + if err != nil { + t.Fatalf("ConvertResponsesRequestToChat() error = %v", err) + } + if got := string(chatReq.ExtraFields.Lookup("response_format")); got != `{"type":"json_object"}` { + t.Fatalf("response_format = %s, want json_object", got) + } + }) + + t.Run("plain text produces no response_format", func(t *testing.T) { + req := &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + Text: map[string]any{"format": map[string]any{"type": "text"}}, + } + + chatReq, err := ConvertResponsesRequestToChat(req) + if err != nil { + t.Fatalf("ConvertResponsesRequestToChat() error = %v", err) + } + if raw := chatReq.ExtraFields.Lookup("response_format"); raw != nil { + t.Fatalf("response_format = %s, want none for plain text", raw) + } + }) +} + func TestConvertResponsesRequestToChat_RejectsUnknownInputItemTypes(t *testing.T) { var req core.ResponsesRequest if err := json.Unmarshal([]byte(`{ From dfd68e9cc3374e9c46ac6a33c4334447ee152400 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 4 Jun 2026 00:44:46 -0400 Subject: [PATCH 06/12] feat(agents): gate unsupported Responses tool features --- docs/advanced/responses-api.mdx | 3 + docs/advanced/responses-compatibility.mdx | 142 ++++++++++++ docs/dev/agents-sdk-support.md | 22 +- docs/docs.json | 1 + docs/examples/openai-agents-sdk/README.md | 15 ++ .../anthropic_agents_probe.py | 150 +++++++++++++ .../anthropic_responses_probe.py | 203 ++++++++++++++++++ docs/guides/openai-agents-sdk.mdx | 49 ++++- .../providers/anthropic/anthropic_test.go | 37 ++++ .../anthropic/request_translation.go | 12 ++ internal/providers/gemini/native.go | 9 +- .../providers/gemini/native_schema_test.go | 36 ++++ internal/providers/responses_adapter.go | 24 +++ internal/providers/responses_adapter_test.go | 33 +++ 14 files changed, 724 insertions(+), 12 deletions(-) create mode 100644 docs/advanced/responses-compatibility.mdx create mode 100644 docs/examples/openai-agents-sdk/anthropic_agents_probe.py create mode 100644 docs/examples/openai-agents-sdk/anthropic_responses_probe.py diff --git a/docs/advanced/responses-api.mdx b/docs/advanced/responses-api.mdx index c6651ef4..65c08413 100644 --- a/docs/advanced/responses-api.mdx +++ b/docs/advanced/responses-api.mdx @@ -15,6 +15,9 @@ apply. Lifecycle and utility endpoints use native provider capabilities when available, and return explicit compatibility errors when the selected provider does not support the requested operation. +For feature-level behavior, including hosted tools and chat-translated +providers, see [Responses compatibility](/advanced/responses-compatibility). + ## Supported endpoints | Endpoint | Behavior | diff --git a/docs/advanced/responses-compatibility.mdx b/docs/advanced/responses-compatibility.mdx new file mode 100644 index 00000000..b936ce09 --- /dev/null +++ b/docs/advanced/responses-compatibility.mdx @@ -0,0 +1,142 @@ +--- +title: "Responses compatibility" +description: "Understand which Responses API features GoModel can translate to chat providers, which ones require native provider support, and how this affects agent SDKs." +icon: "route" +tag: "Beta" +--- + +GoModel accepts OpenAI-compatible `/v1/responses` requests and routes them to +the selected model provider. Some providers expose a native Responses-compatible +surface. Others expose chat completions or provider-native chat APIs, so +GoModel translates the request. + +The compatibility rule is conservative: GoModel translates portable model +features and rejects provider-hosted features when it cannot preserve their +meaning. + +## Routing modes + +| Mode | What happens | +| --- | --- | +| Native Responses provider | GoModel forwards the Responses payload to the provider's Responses-compatible endpoint. Provider-native features may work if that provider supports them. | +| Chat-translated provider | GoModel converts `/v1/responses` to chat semantics, calls the provider's chat API, then converts the result back to a Responses shape. | + +Chat-translated providers include Anthropic and Gemini native routing. They work +well for text, streaming, multimodal inputs supported by the target adapter, and +function tool loops. They cannot safely execute OpenAI-hosted tools. + +## Feature behavior + +| Feature | Native Responses provider | Chat-translated provider | +| --- | --- | --- | +| Text input and `instructions` | Forwarded | Converted to chat messages | +| Streaming over HTTP/SSE | Forwarded | Converted from chat streaming events | +| Function tools | Forwarded | Converted to provider function/tool declarations | +| Function call output items | Forwarded | Converted to chat tool-result messages | +| `text.format` structured output | Forwarded | Converted to `response_format` when the provider supports it | +| OpenAI-hosted web search | Provider decides | Rejected | +| OpenAI-hosted file search | Provider decides | Rejected | +| OpenAI-hosted computer use | Provider decides | Rejected | +| `previous_response_id` and `conversation` | Forwarded | Rejected | +| Unknown Responses input item types | Preserved | Rejected | +| Responses websocket transport | Not implemented by GoModel | Not implemented by GoModel | + + + Anthropic does not currently accept translated `response_format` or + `text.verbosity` settings through GoModel's chat translation path. GoModel + rejects those fields instead of dropping them. + + +## Hosted tools + +Hosted tools are executed by the upstream provider, not by the model text +completion alone. Their payloads often reference provider-owned resources and +runtime state: + +- `web_search_preview` depends on the provider's search implementation and + event schema. +- `file_search` references provider vector stores such as `vector_store_ids`. +- `computer_use_preview` depends on a provider-managed computer session, + display environment, and safety model. + +GoModel does not translate these into Anthropic or Gemini tool calls. A fake +translation would make the request appear supported while changing where the +tool runs, how state is stored, and which security controls apply. + +When a chat-translated provider receives a hosted tool request, GoModel returns +an OpenAI-compatible invalid request error: + +```json +{ + "error": { + "type": "invalid_request_error", + "message": "responses tool type \"web_search_preview\" is only supported by native Responses providers; chat-translated providers only support function tools", + "param": null, + "code": null + } +} +``` + +## Agent SDKs + +OpenAI Agents SDK clients can talk to Anthropic and Gemini models through +GoModel for portable flows: + +- plain `Runner.run(...)` +- `Runner.run_streamed(...)` over HTTP/SSE +- local function tools +- SDK-managed local history replay + +Provider-hosted tools, server-managed conversation state, and websocket +Responses transport still depend on provider-specific support. + +For Python Agents SDK clients, namespaced GoModel model IDs such as +`anthropic/claude-sonnet-4-20250514` and `gemini/gemini-2.0-flash` need model ID +pass-through mode: + +```python +from agents import MultiProvider, RunConfig +from openai import AsyncOpenAI + +client = AsyncOpenAI( + base_url="http://localhost:8080/v1", + api_key="change-me", +) + +run_config = RunConfig( + model_provider=MultiProvider( + openai_client=client, + unknown_prefix_mode="model_id", + openai_prefix_mode="model_id", + ) +) +``` + +Use that `run_config` when calling `Runner.run(...)` or +`Runner.run_streamed(...)`. + +## Test the compatibility boundary + +The OpenAI Agents SDK examples include probes for Anthropic routing through +GoModel: + +```bash +export OPENAI_BASE_URL=http://localhost:8080/v1 +export GOMODEL_MASTER_KEY=change-me +export OPENAI_MODEL=anthropic/claude-sonnet-4-20250514 + +python3 docs/examples/openai-agents-sdk/anthropic_responses_probe.py +python3 docs/examples/openai-agents-sdk/anthropic_agents_probe.py +``` + +The Responses probe verifies both supported and unsupported paths: plain +Responses calls, function tools, structured-output rejection, stateful-field +rejection, unknown input-item rejection, and hosted-tool rejection. The Agents +probe verifies basic runs, function tool loops, and streamed function tool +loops. + +## Roadmap + +Future support for hosted tools should use explicit provider capability mapping. +That means GoModel should know which provider, model, API mode, and request +shape can safely handle each feature before accepting the request. diff --git a/docs/dev/agents-sdk-support.md b/docs/dev/agents-sdk-support.md index f795e66c..11fabf2d 100644 --- a/docs/dev/agents-sdk-support.md +++ b/docs/dev/agents-sdk-support.md @@ -1,6 +1,6 @@ # OpenAI Agents SDK Support -Status checked: 2026-05-22 +Status checked: 2026-06-02 ## Short answer @@ -84,6 +84,9 @@ exact shape. - Unknown Responses input item types round-trip unchanged for native Responses providers; chat-translated providers now return a clear compatibility error. - First OpenAI Agents SDK guide and runnable smoke examples. +- Manual Anthropic probes passed for direct OpenAI Responses calls, Python + Agents SDK `Runner.run(...)`, function tool loops, and + `Runner.run_streamed(...)` on 2026-06-02. ### Needs validation @@ -97,6 +100,7 @@ exact shape. - Sessions that replay `result.to_input_list()` and SDK-managed local session history. - `OpenAIResponsesCompactionSession` with `responses.compact`. +- Full Gemini Agents SDK probes against live upstream models. ### Known or likely gaps @@ -113,12 +117,19 @@ exact shape. endpoint. - Built-in Responses tools such as web search, file search, computer use, and tool search are only safe when the selected upstream provider natively - supports those tool payloads. + supports those tool payloads. Chat-translated providers now reject hosted tool + payloads instead of assuming provider compatibility. +- Anthropic rejects translated `response_format` and `verbosity` fields because + there is no safe native mapping today. - Prompt-managed flows and deferred tool loading need validation, especially when the SDK omits `model` because the prompt owns model selection. - Tracing uploads go to OpenAI by default in the SDK. Users without an OpenAI Platform key need docs to disable tracing or configure a separate tracing processor/key. +- Python Agents SDK users must enable model ID pass-through on `MultiProvider` + when sending GoModel namespaced model IDs such as `anthropic/...` or + `gemini/...`; otherwise the SDK rejects unknown provider prefixes before + calling GoModel. ## Implementation checklist @@ -199,7 +210,8 @@ provider-specific adaptation where relevant. providers with a clear compatibility error. - Done: translate `text.format` to the Chat Completions `response_format` (`json_schema` / `json_object`) and pass `text.verbosity` through on - chat-translated providers; unknown text formats still return a clear error. + chat-translated providers that support those fields; unknown text formats + still return a clear error. Anthropic rejects these fields explicitly. - Still needed: optionally expand previous stored responses into full input for chat-translated providers. - Add tests for: @@ -223,7 +235,9 @@ provider-specific adaptation where relevant. ### P2: Feature capability gating -- Add model/provider capability metadata for Responses features: +- Partially done: chat-translated providers reject hosted OpenAI Responses tools + until a provider-specific capability mapping exists. +- Still needed: add model/provider capability metadata for Responses features: - function tools - structured outputs through `text.format` - multimodal input diff --git a/docs/docs.json b/docs/docs.json index 7a69f5e2..8d5423df 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -61,6 +61,7 @@ "advanced/config-yaml", "advanced/resilience", "advanced/responses-api", + "advanced/responses-compatibility", "advanced/conversations-api", "advanced/anthropic-messages-api", "advanced/audio-api", diff --git a/docs/examples/openai-agents-sdk/README.md b/docs/examples/openai-agents-sdk/README.md index b613889d..d61f39dd 100644 --- a/docs/examples/openai-agents-sdk/README.md +++ b/docs/examples/openai-agents-sdk/README.md @@ -23,6 +23,21 @@ python3 python_streaming_tool.py node javascript_basic.mjs ``` +To probe an Anthropic model through GoModel's OpenAI-compatible Responses API: + +```bash +export OPENAI_BASE_URL=http://localhost:8080/v1 +export GOMODEL_MASTER_KEY=change-me +export OPENAI_MODEL=anthropic/claude-sonnet-4-20250514 + +python3 anthropic_responses_probe.py +python3 anthropic_agents_probe.py +``` + +`anthropic_agents_probe.py` configures the Python SDK's `MultiProvider` with +model ID pass-through so namespaced GoModel IDs such as `anthropic/...` reach +the gateway unchanged. + Install the SDK dependencies in your own environment: ```bash diff --git a/docs/examples/openai-agents-sdk/anthropic_agents_probe.py b/docs/examples/openai-agents-sdk/anthropic_agents_probe.py new file mode 100644 index 00000000..54d18d67 --- /dev/null +++ b/docs/examples/openai-agents-sdk/anthropic_agents_probe.py @@ -0,0 +1,150 @@ +import asyncio +import json +import os +from collections.abc import Awaitable, Callable + +from openai import AsyncOpenAI + +try: + from agents import ( + Agent, + MultiProvider, + RunConfig, + Runner, + function_tool, + set_default_openai_client, + set_tracing_disabled, + ) +except ImportError as exc: + raise SystemExit( + "Missing dependency: install with `pip install openai-agents openai`." + ) from exc + + +BASE_URL = os.getenv("OPENAI_BASE_URL", "http://localhost:8080/v1") +API_KEY = os.getenv("GOMODEL_MASTER_KEY") or os.getenv("OPENAI_API_KEY", "change-me") +CLIENT = AsyncOpenAI( + base_url=BASE_URL, + api_key=API_KEY, +) +MODEL = ( + os.getenv("ANTHROPIC_MODEL") + or os.getenv("OPENAI_MODEL") + or "anthropic/claude-sonnet-4-20250514" +) + + +set_default_openai_client( + CLIENT, + use_for_tracing=False, +) +set_tracing_disabled(True) + +RUN_CONFIG = RunConfig( + model_provider=MultiProvider( + openai_client=CLIENT, + unknown_prefix_mode="model_id", + openai_prefix_mode="model_id", + ) +) + + +@function_tool +def lookup_inventory(sku: str) -> str: + """Look up inventory availability for a SKU.""" + if sku == "WIDGET-42": + return json.dumps({"sku": sku, "status": "in_stock", "quantity": 17}) + return json.dumps({"sku": sku, "status": "unknown", "quantity": 0}) + + +async def run_case( + name: str, + call: Callable[[], Awaitable[str]], + *, + must_contain: str, +) -> bool: + try: + output = await call() + except Exception as exc: + print(f"FAIL {name}: unexpected error: {exc}") + return False + + normalized = output.lower() + if must_contain.lower() not in normalized: + print(f"FAIL {name}: output did not contain {must_contain!r}: {output[:240]}") + return False + + print(f"PASS {name}: {output[:160].replace(chr(10), ' ')}") + return True + + +async def run_basic() -> str: + agent = Agent( + name="Anthropic gateway probe", + instructions="Be exact and concise.", + model=MODEL, + ) + result = await Runner.run( + agent, + "Reply with exactly: gateway-ok", + run_config=RUN_CONFIG, + ) + return str(result.final_output) + + +async def run_tool_loop() -> str: + agent = Agent( + name="Anthropic gateway tool probe", + instructions=( + "You must call lookup_inventory before answering. " + "Include the SKU and status in your final answer." + ), + model=MODEL, + tools=[lookup_inventory], + ) + result = await Runner.run( + agent, + "Check inventory for SKU WIDGET-42.", + run_config=RUN_CONFIG, + ) + return str(result.final_output) + + +async def run_streamed_tool_loop() -> str: + agent = Agent( + name="Anthropic gateway streaming tool probe", + instructions=( + "You must call lookup_inventory before answering. " + "Include the quantity in your final answer." + ), + model=MODEL, + tools=[lookup_inventory], + ) + result = Runner.run_streamed( + agent, + "Check inventory for SKU WIDGET-42.", + run_config=RUN_CONFIG, + ) + async for _event in result.stream_events(): + pass + return str(result.final_output) + + +async def main() -> int: + cases: list[tuple[str, Callable[[], Awaitable[str]], str]] = [ + ("basic agents run", run_basic, "gateway-ok"), + ("function tool loop", run_tool_loop, "17"), + ("streamed function tool loop", run_streamed_tool_loop, "17"), + ] + + results = [] + for name, call, must_contain in cases: + results.append(await run_case(name, call, must_contain=must_contain)) + + passed = sum(1 for result in results if result) + print(json.dumps({"passed": passed, "total": len(results), "model": MODEL}, indent=2)) + return 0 if all(results) else 1 + + +if __name__ == "__main__": + raise SystemExit(asyncio.run(main())) diff --git a/docs/examples/openai-agents-sdk/anthropic_responses_probe.py b/docs/examples/openai-agents-sdk/anthropic_responses_probe.py new file mode 100644 index 00000000..7e395715 --- /dev/null +++ b/docs/examples/openai-agents-sdk/anthropic_responses_probe.py @@ -0,0 +1,203 @@ +import asyncio +import json +import os +from collections.abc import Awaitable, Callable + +from openai import AsyncOpenAI + + +BASE_URL = os.getenv("OPENAI_BASE_URL", "http://localhost:8080/v1") +API_KEY = os.getenv("GOMODEL_MASTER_KEY") or os.getenv("OPENAI_API_KEY", "change-me") +MODEL = ( + os.getenv("ANTHROPIC_MODEL") + or os.getenv("OPENAI_MODEL") + or "anthropic/claude-sonnet-4-20250514" +) + + +async def run_case( + name: str, + call: Callable[[], Awaitable[object]], + *, + expect_error_contains: str | None = None, +) -> bool: + try: + result = await call() + except Exception as exc: + message = str(exc) + if expect_error_contains and expect_error_contains in message: + print(f"PASS {name}: expected unsupported path ({expect_error_contains})") + return True + print(f"FAIL {name}: unexpected error: {message}") + return False + + if expect_error_contains: + print(f"FAIL {name}: expected error containing {expect_error_contains!r}") + return False + + output_text = response_summary(result) + print(f"PASS {name}: {output_text[:160].replace(chr(10), ' ')}") + return True + + +def response_summary(result: object) -> str: + try: + output_text = getattr(result, "output_text", "") or "" + except TypeError: + output_text = "" + if output_text: + return output_text + + output = getattr(result, "output", None) + if output: + parts = [] + for item in output: + item_type = getattr(item, "type", None) or "unknown" + name = getattr(item, "name", None) + parts.append(f"{item_type}:{name}" if name else str(item_type)) + return ", ".join(parts) + + return type(result).__name__ + + +async def main() -> int: + client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY) + cases: list[tuple[str, Callable[[], Awaitable[object]], str | None]] = [ + ( + "plain responses call", + lambda: client.responses.create( + model=MODEL, + instructions="Be concise. Do not mention implementation details.", + input=( + "Give three short bullets for what an AI gateway should verify " + "before routing an agent request." + ), + ), + None, + ), + ( + "forced function tool call", + lambda: client.responses.create( + model=MODEL, + input="Use the tool to inspect order A123 and do not answer directly.", + tools=[ + { + "type": "function", + "name": "lookup_order", + "description": "Look up an order by id.", + "parameters": { + "type": "object", + "properties": { + "order_id": { + "type": "string", + "description": "The customer order id.", + } + }, + "required": ["order_id"], + "additionalProperties": False, + }, + } + ], + tool_choice={"type": "function", "name": "lookup_order"}, + ), + None, + ), + ( + "json schema structured output gap", + lambda: client.responses.create( + model=MODEL, + input="Return a JSON object with ok=true and one detected_gap string.", + text={ + "format": { + "type": "json_schema", + "name": "probe_result", + "strict": True, + "schema": { + "type": "object", + "properties": { + "ok": {"type": "boolean"}, + "detected_gap": {"type": "string"}, + }, + "required": ["ok", "detected_gap"], + "additionalProperties": False, + }, + } + }, + ), + "response_format", + ), + ( + "previous_response_id state gap", + lambda: client.responses.create( + model=MODEL, + input="Continue from previous state.", + previous_response_id="resp_probe_previous", + ), + "previous_response_id", + ), + ( + "unknown input item gap", + lambda: client.responses.create( + model=MODEL, + input=[ + { + "type": "reasoning", + "id": "rs_probe", + "summary": [ + {"type": "summary_text", "text": "Synthetic prior reasoning."} + ], + }, + {"role": "user", "content": "Continue."}, + ], + ), + "unsupported input item type", + ), + ( + "hosted web search gap", + lambda: client.responses.create( + model=MODEL, + input="Search the web for the latest Go release.", + tools=[{"type": "web_search_preview"}], + ), + "web_search_preview", + ), + ( + "hosted file search gap", + lambda: client.responses.create( + model=MODEL, + input="Search the attached vector store.", + tools=[{"type": "file_search", "vector_store_ids": ["vs_probe"]}], + ), + "file_search", + ), + ( + "hosted computer use gap", + lambda: client.responses.create( + model=MODEL, + input="Use the computer to inspect the page.", + tools=[ + { + "type": "computer_use_preview", + "display_width": 1024, + "display_height": 768, + "environment": "browser", + } + ], + ), + "computer_use_preview", + ), + ] + + results = [] + for name, call, expected_error in cases: + results.append( + await run_case(name, call, expect_error_contains=expected_error) + ) + + passed = sum(1 for result in results if result) + print(json.dumps({"passed": passed, "total": len(results), "model": MODEL}, indent=2)) + return 0 if all(results) else 1 + + +if __name__ == "__main__": + raise SystemExit(asyncio.run(main())) diff --git a/docs/guides/openai-agents-sdk.mdx b/docs/guides/openai-agents-sdk.mdx index 744354a3..4740b978 100644 --- a/docs/guides/openai-agents-sdk.mdx +++ b/docs/guides/openai-agents-sdk.mdx @@ -123,6 +123,39 @@ console.log(result.finalOutput); +### Namespaced model IDs + +The Python SDK treats strings like `anthropic/claude-sonnet-4-20250514` and +`gemini/gemini-2.0-flash` as provider-prefixed model names by default. When you +want those namespaced GoModel IDs to reach the gateway unchanged, configure the +SDK model provider in model ID pass-through mode: + +```python +import os + +from agents import MultiProvider, RunConfig, Runner +from openai import AsyncOpenAI + +client = AsyncOpenAI( + base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:8080/v1"), + api_key=os.getenv("GOMODEL_MASTER_KEY", "change-me"), +) + +run_config = RunConfig( + model_provider=MultiProvider( + openai_client=client, + unknown_prefix_mode="model_id", + openai_prefix_mode="model_id", + ) +) + +result = await Runner.run( + agent, + "Reply with exactly ok.", + run_config=run_config, +) +``` + ## Supported SDK paths GoModel supports the SDK's normal HTTP Responses path: @@ -142,9 +175,12 @@ GoModel also preserves newer Responses input items for native Responses providers. If a request has to be translated to Chat Completions for a provider that does not implement Responses natively, GoModel translates structured `text` output settings into the Chat Completions `response_format` (and passes -`text.verbosity` through), while returning a clear error for stateful or -provider-native Responses fields such as `previous_response_id`, `conversation`, -and hosted tool items. +`text.verbosity` through) only for chat providers that support those fields, +while returning a clear error for stateful or provider-native Responses fields +such as `previous_response_id`, `conversation`, and hosted tool items. + +See [Responses compatibility](/advanced/responses-compatibility) for the full +feature matrix. ## Storage behavior @@ -157,7 +193,12 @@ when the upstream provider does not support response retrieval. If the SDK sends - Responses websocket transport is not implemented. Use the SDK's HTTP/SSE transport with GoModel. - Hosted tools such as web search, file search, and computer use require a - native upstream provider that supports those tool payloads. + native upstream provider that supports those tool payloads. Chat-translated + providers such as Anthropic and Gemini reject those tools unless GoModel adds + explicit provider capability mapping for them. +- Anthropic does not currently accept translated structured-output + `response_format` or `text.verbosity` settings. GoModel rejects those fields + instead of silently ignoring them. - `previous_response_id` and `conversation` are forwarded to native Responses providers. Chat-translated providers reject them because GoModel cannot safely reconstruct provider-managed state across that translation boundary yet. diff --git a/internal/providers/anthropic/anthropic_test.go b/internal/providers/anthropic/anthropic_test.go index 4932185d..67f83481 100644 --- a/internal/providers/anthropic/anthropic_test.go +++ b/internal/providers/anthropic/anthropic_test.go @@ -1342,6 +1342,43 @@ func TestConvertToAnthropicRequest_MapsStopSequences(t *testing.T) { } } +func TestConvertToAnthropicRequest_RejectsUnsupportedChatExtras(t *testing.T) { + tests := []struct { + name string + field string + value json.RawMessage + }{ + { + name: "response format", + field: "response_format", + value: json.RawMessage(`{"type":"json_schema","json_schema":{"name":"answer"}}`), + }, + { + name: "verbosity", + field: "verbosity", + value: json.RawMessage(`"low"`), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := convertToAnthropicRequest(&core.ChatRequest{ + Model: "claude-sonnet-4-5-20250929", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + ExtraFields: core.UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + tt.field: tt.value, + }), + }) + if err == nil { + t.Fatal("expected invalid request error, got nil") + } + if !strings.Contains(err.Error(), tt.field) { + t.Fatalf("error = %v, want mention %q", err, tt.field) + } + }) + } +} + func TestConvertToAnthropicRequest_InvalidToolArguments(t *testing.T) { _, err := convertToAnthropicRequest(&core.ChatRequest{ Model: "claude-sonnet-4-5-20250929", diff --git a/internal/providers/anthropic/request_translation.go b/internal/providers/anthropic/request_translation.go index a900a3a5..6b101e38 100644 --- a/internal/providers/anthropic/request_translation.go +++ b/internal/providers/anthropic/request_translation.go @@ -288,6 +288,9 @@ func convertToAnthropicRequest(req *core.ChatRequest) (*anthropicRequest, error) if req == nil { return nil, core.NewInvalidRequestError("anthropic chat request is required", nil) } + if err := validateAnthropicUnsupportedChatExtras(req.ExtraFields); err != nil { + return nil, err + } anthropicReq := &anthropicRequest{ Model: req.Model, @@ -353,6 +356,15 @@ func convertToAnthropicRequest(req *core.ChatRequest) (*anthropicRequest, error) return anthropicReq, nil } +func validateAnthropicUnsupportedChatExtras(extra core.UnknownJSONFields) error { + for _, field := range []string{"response_format", "verbosity"} { + if extra.Lookup(field) != nil { + return core.NewInvalidRequestError("chat field "+field+" is not supported by Anthropic translation", nil) + } + } + return nil +} + // convertResponsesRequestToAnthropic converts a canonical Responses request by // first mapping it onto shared chat semantics and then translating that semantic // request into Anthropic's native message payload. diff --git a/internal/providers/gemini/native.go b/internal/providers/gemini/native.go index 7fa167ee..5cb46d9c 100644 --- a/internal/providers/gemini/native.go +++ b/internal/providers/gemini/native.go @@ -347,16 +347,17 @@ func geminiToolsFromOpenAI(tools []map[string]any) ([]geminiTool, error) { } declarations := make([]geminiFunctionDeclaration, 0, len(tools)) for _, tool := range tools { - if strings.TrimSpace(fmt.Sprint(tool["type"])) != "function" { - continue + toolType := strings.TrimSpace(fmt.Sprint(tool["type"])) + if toolType != "function" { + return nil, core.NewInvalidRequestError("unsupported tool type: "+toolType, nil) } fn, ok := tool["function"].(map[string]any) if !ok { - continue + return nil, core.NewInvalidRequestError("tool.function must be an object", nil) } name, _ := fn["name"].(string) if strings.TrimSpace(name) == "" { - continue + return nil, core.NewInvalidRequestError("tool.function.name is required", nil) } description, _ := fn["description"].(string) var parametersJSONSchema json.RawMessage diff --git a/internal/providers/gemini/native_schema_test.go b/internal/providers/gemini/native_schema_test.go index c048aad9..ad6ad364 100644 --- a/internal/providers/gemini/native_schema_test.go +++ b/internal/providers/gemini/native_schema_test.go @@ -155,3 +155,39 @@ func TestGeminiToolsFromOpenAIRejectsInvalidParameterSchemas(t *testing.T) { }) } } + +func TestGeminiToolsFromOpenAIRejectsUnsupportedToolShapes(t *testing.T) { + tests := []struct { + name string + tool map[string]any + wantError string + }{ + { + name: "hosted tool type", + tool: map[string]any{"type": "web_search_preview"}, + wantError: "unsupported tool type: web_search_preview", + }, + { + name: "missing function object", + tool: map[string]any{"type": "function"}, + wantError: "tool.function must be an object", + }, + { + name: "empty function name", + tool: map[string]any{"type": "function", "function": map[string]any{"name": " "}}, + wantError: "tool.function.name is required", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := geminiToolsFromOpenAI([]map[string]any{tt.tool}) + if err == nil { + t.Fatal("geminiToolsFromOpenAI() error = nil, want error") + } + if !strings.Contains(err.Error(), tt.wantError) { + t.Fatalf("error = %q, want to contain %q", err.Error(), tt.wantError) + } + }) + } +} diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index 7f852915..35520a23 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -103,6 +103,19 @@ func validateResponsesRequestForChatTranslation(req *core.ResponsesRequest) erro if strings.TrimSpace(req.SafetyIdentifier) != "" { return unsupportedResponsesChatTranslationField("safety_identifier") } + if err := validateResponsesToolsForChatTranslation(req.Tools); err != nil { + return err + } + return nil +} + +func validateResponsesToolsForChatTranslation(tools []map[string]any) error { + for _, tool := range tools { + toolType, _ := tool["type"].(string) + if strings.TrimSpace(toolType) != "function" { + return unsupportedResponsesChatTranslationTool(toolType) + } + } return nil } @@ -192,6 +205,17 @@ func unsupportedResponsesChatTranslationField(field string) error { ) } +func unsupportedResponsesChatTranslationTool(toolType string) error { + toolType = strings.TrimSpace(toolType) + if toolType == "" { + toolType = "unknown" + } + return core.NewInvalidRequestError( + fmt.Sprintf("responses tool type %q is only supported by native Responses providers; chat-translated providers only support function tools", toolType), + nil, + ) +} + func cloneStreamOptions(src *core.StreamOptions) *core.StreamOptions { if src == nil { return nil diff --git a/internal/providers/responses_adapter_test.go b/internal/providers/responses_adapter_test.go index dd4e2936..197f7b58 100644 --- a/internal/providers/responses_adapter_test.go +++ b/internal/providers/responses_adapter_test.go @@ -435,6 +435,39 @@ func TestConvertResponsesRequestToChat_RejectsStatefulAgentsSDKFields(t *testing req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", Text: map[string]any{"format": map[string]any{"type": "grammar"}}}, want: "text", }, + { + name: "hosted web search tool", + req: &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + Tools: []map[string]any{ + {"type": "web_search_preview"}, + }, + }, + want: "web_search_preview", + }, + { + name: "hosted file search tool", + req: &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + Tools: []map[string]any{ + {"type": "file_search", "vector_store_ids": []string{"vs_123"}}, + }, + }, + want: "file_search", + }, + { + name: "hosted computer use tool", + req: &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + Tools: []map[string]any{ + {"type": "computer_use_preview", "display_width": 1024, "display_height": 768}, + }, + }, + want: "computer_use_preview", + }, } for _, tt := range tests { From e2529b985e9554a2d970398d323d2aad02cc3c63 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 4 Jun 2026 00:48:46 -0400 Subject: [PATCH 07/12] fix(providers): preserve typed top_p in adapters --- internal/providers/bedrock/bedrock_test.go | 25 ++++++++++++++-- internal/providers/bedrock/chat.go | 8 +++-- internal/providers/gemini/gemini_test.go | 34 ++++++++++++++++++++++ internal/providers/gemini/native.go | 6 +++- 4 files changed, 66 insertions(+), 7 deletions(-) diff --git a/internal/providers/bedrock/bedrock_test.go b/internal/providers/bedrock/bedrock_test.go index 0297b295..d138147e 100644 --- a/internal/providers/bedrock/bedrock_test.go +++ b/internal/providers/bedrock/bedrock_test.go @@ -270,6 +270,25 @@ func TestBuildConverseParts_TopPFromExtraFields(t *testing.T) { } } +func TestBuildConverseParts_TopPFromTypedField(t *testing.T) { + topP := 0.8 + req := &core.ChatRequest{ + Model: "anthropic.claude-3-5-haiku-20241022-v1:0", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + TopP: &topP, + } + parts, err := buildConverseParts(req) + if err != nil { + t.Fatalf("buildConverseParts: %v", err) + } + if parts.infCfg == nil || parts.infCfg.TopP == nil { + t.Fatal("typed top_p was not forwarded to InferenceConfiguration.TopP") + } + if got := awssdk.ToFloat32(parts.infCfg.TopP); got != 0.8 { + t.Errorf("top_p = %v, want 0.8", got) + } +} + func TestBuildConverseParts_RejectsMaxTokensOverflow(t *testing.T) { overflow := int(int64(1) << 33) // 2^33, fits in int64 but not int32 req := &core.ChatRequest{ @@ -468,9 +487,9 @@ func TestConvertTools_ToolChoiceNormalization(t *testing.T) { }} cases := []struct { - name string - choice any - wantNil bool + name string + choice any + wantNil bool wantChoice string // type name suffix for assertion when cfg is non-nil }{ {"auto string", "auto", false, "Auto"}, diff --git a/internal/providers/bedrock/chat.go b/internal/providers/bedrock/chat.go index afbb9ecf..c0f148b9 100644 --- a/internal/providers/bedrock/chat.go +++ b/internal/providers/bedrock/chat.go @@ -351,10 +351,12 @@ func resolveMaxTokens(req *core.ChatRequest) int { return 0 } -// resolveTopP extracts top_p from req.ExtraFields. core.ChatRequest does not -// surface top_p as a typed field, so we look it up in the catch-all map the -// JSON decoder populates for unknown OpenAI parameters. +// resolveTopP extracts top_p from the typed request field, falling back to the +// catch-all map for older internal callers that still carry it as an extra. func resolveTopP(req *core.ChatRequest) (float64, bool) { + if req.TopP != nil { + return *req.TopP, true + } raw := req.ExtraFields.Lookup("top_p") if len(raw) == 0 { return 0, false diff --git a/internal/providers/gemini/gemini_test.go b/internal/providers/gemini/gemini_test.go index 2c6c02e0..93c96778 100644 --- a/internal/providers/gemini/gemini_test.go +++ b/internal/providers/gemini/gemini_test.go @@ -994,6 +994,40 @@ func TestChatCompletion_UsesNativeGenerateContentByDefault(t *testing.T) { } } +func TestGeminiGenerationConfig_UsesTypedTopP(t *testing.T) { + topP := 0.8 + cfg := geminiGenerationConfig(&core.ChatRequest{ + Model: "gemini-2.5-flash", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + TopP: &topP, + }) + + if got := cfg["topP"]; got != 0.8 { + t.Fatalf("topP = %#v, want 0.8", got) + } +} + +func TestConvertResponsesRequestToGeminiPreservesTopP(t *testing.T) { + topP := 0.7 + chatReq, err := providers.ConvertResponsesRequestToChat(&core.ResponsesRequest{ + Model: "gemini-2.5-flash", + Input: "hi", + TopP: &topP, + }) + if err != nil { + t.Fatalf("ConvertResponsesRequestToChat() error = %v", err) + } + + geminiReq, err := convertChatRequestToGemini(chatReq) + if err != nil { + t.Fatalf("convertChatRequestToGemini() error = %v", err) + } + + if got := geminiReq.GenerationConfig["topP"]; got != 0.7 { + t.Fatalf("topP = %#v, want 0.7", got) + } +} + func TestChatCompletion_NativeUsageMetadata(t *testing.T) { t.Setenv(useNativeAPIEnvVar, "true") diff --git a/internal/providers/gemini/native.go b/internal/providers/gemini/native.go index 5cb46d9c..db5bb701 100644 --- a/internal/providers/gemini/native.go +++ b/internal/providers/gemini/native.go @@ -481,7 +481,11 @@ func geminiGenerationConfig(req *core.ChatRequest) map[string]any { if req.Temperature != nil { cfg["temperature"] = *req.Temperature } - copyJSONNumber(req.ExtraFields.Lookup("top_p"), cfg, "topP") + if req.TopP != nil { + cfg["topP"] = *req.TopP + } else { + copyJSONNumber(req.ExtraFields.Lookup("top_p"), cfg, "topP") + } copyJSONNumber(req.ExtraFields.Lookup("top_k"), cfg, "topK") copyJSONNumber(req.ExtraFields.Lookup("candidate_count"), cfg, "candidateCount") copyJSONNumber(req.ExtraFields.Lookup("presence_penalty"), cfg, "presencePenalty") From 9332cfb63f1129e40e89dc03203fbf54cb8d1df8 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 4 Jun 2026 01:42:03 -0400 Subject: [PATCH 08/12] fix(agents): address Responses review gaps --- docs/guides/openai-agents-sdk.mdx | 24 ++++-- internal/core/json_fields.go | 77 +++++++++++++++++-- internal/core/json_fields_test.go | 30 ++++++++ internal/core/responses_json.go | 1 + internal/core/responses_json_test.go | 23 ++++++ .../providers/anthropic/anthropic_test.go | 68 +++++++++++++++- .../anthropic/request_translation.go | 17 ++++ internal/providers/anthropic/types.go | 1 + internal/providers/bedrock/bedrock_test.go | 22 ++++++ internal/providers/responses_adapter.go | 16 ++++ internal/providers/responses_adapter_test.go | 18 +++++ 11 files changed, 282 insertions(+), 15 deletions(-) diff --git a/docs/guides/openai-agents-sdk.mdx b/docs/guides/openai-agents-sdk.mdx index 4740b978..69bce0f7 100644 --- a/docs/guides/openai-agents-sdk.mdx +++ b/docs/guides/openai-agents-sdk.mdx @@ -131,9 +131,10 @@ want those namespaced GoModel IDs to reach the gateway unchanged, configure the SDK model provider in model ID pass-through mode: ```python +import asyncio import os -from agents import MultiProvider, RunConfig, Runner +from agents import Agent, MultiProvider, RunConfig, Runner from openai import AsyncOpenAI client = AsyncOpenAI( @@ -149,11 +150,24 @@ run_config = RunConfig( ) ) -result = await Runner.run( - agent, - "Reply with exactly ok.", - run_config=run_config, +agent = Agent( + name="Gateway assistant", + instructions="Be concise.", + model=os.getenv("OPENAI_MODEL", "anthropic/claude-sonnet-4-20250514"), ) + + +async def main(): + result = await Runner.run( + agent, + "Reply with exactly ok.", + run_config=run_config, + ) + print(result.final_output) + + +if __name__ == "__main__": + asyncio.run(main()) ``` ## Supported SDK paths diff --git a/internal/core/json_fields.go b/internal/core/json_fields.go index 4ffb8059..c1867616 100644 --- a/internal/core/json_fields.go +++ b/internal/core/json_fields.go @@ -80,16 +80,77 @@ func MergeUnknownJSONFields(base UnknownJSONFields, additions map[string]json.Ra if len(additions) == 0 { return base, nil } - merged := make(map[string]json.RawMessage, len(additions)) - if !base.IsEmpty() { - if err := json.Unmarshal(base.raw, &merged); err != nil { - return UnknownJSONFields{}, err - } + additionFields := UnknownJSONFieldsFromMap(additions) + if base.IsEmpty() { + return additionFields, nil + } + + overrideKeys := make(map[string]struct{}, len(additions)) + for key := range additions { + overrideKeys[key] = struct{}{} + } + + merged, err := mergeUnknownJSONFieldsRaw(base.raw, additionFields.raw, overrideKeys) + if err != nil { + return UnknownJSONFields{}, err + } + return UnknownJSONFields{raw: merged}, nil +} + +func mergeUnknownJSONFieldsRaw(baseBody, additionBody []byte, overrideKeys map[string]struct{}) ([]byte, error) { + baseBody = bytes.TrimSpace(baseBody) + additionBody = bytes.TrimSpace(additionBody) + if len(additionBody) == 0 || bytes.Equal(additionBody, []byte("{}")) { + return CloneRawJSON(baseBody), nil + } + if len(baseBody) == 0 || bytes.Equal(baseBody, []byte("{}")) { + return CloneRawJSON(additionBody), nil + } + + totalCap, err := mergedJSONObjectCap(len(baseBody), len(additionBody)) + if err != nil { + return nil, err + } + + buf := bytes.NewBuffer(make([]byte, 0, totalCap)) + buf.WriteByte('{') + wrote := false + if err := appendUnknownJSONMembers(buf, baseBody, overrideKeys, &wrote); err != nil { + return nil, err } - for key, value := range additions { - merged[key] = value + if err := appendUnknownJSONMembers(buf, additionBody, nil, &wrote); err != nil { + return nil, err + } + buf.WriteByte('}') + return buf.Bytes(), nil +} + +func appendUnknownJSONMembers(buf *bytes.Buffer, body []byte, skip map[string]struct{}, wrote *bool) error { + if len(body) == 0 || bytes.Equal(body, []byte("{}")) { + return nil } - return UnknownJSONFieldsFromMap(merged), nil + if !gjson.ValidBytes(body) { + return fmt.Errorf("invalid JSON object") + } + root := gjson.ParseBytes(body) + if !root.IsObject() { + return fmt.Errorf("expected JSON object") + } + + root.ForEach(func(key, value gjson.Result) bool { + if _, shouldSkip := skip[key.String()]; shouldSkip { + return true + } + if *wrote { + buf.WriteByte(',') + } + buf.WriteString(key.Raw) + buf.WriteByte(':') + buf.WriteString(value.Raw) + *wrote = true + return true + }) + return nil } // Lookup returns the raw JSON value for key or nil when absent. diff --git a/internal/core/json_fields_test.go b/internal/core/json_fields_test.go index 96445ea0..69774314 100644 --- a/internal/core/json_fields_test.go +++ b/internal/core/json_fields_test.go @@ -110,6 +110,36 @@ func TestMergeUnknownJSONFields_AddsAndOverrides(t *testing.T) { } } +func TestMergeUnknownJSONFields_PreservesRawBaseMembers(t *testing.T) { + base := UnknownJSONFields{ + raw: json.RawMessage(`{"keep":{"b":2,"a":1},"dup":"first","dup":"second","override":"old"}`), + } + + merged, err := MergeUnknownJSONFields(base, map[string]json.RawMessage{ + "override": json.RawMessage(`"new"`), + "added": json.RawMessage(`true`), + }) + if err != nil { + t.Fatalf("MergeUnknownJSONFields() error = %v", err) + } + + if bytes.Count(merged.raw, []byte(`"dup"`)) != 2 { + t.Fatalf("merged raw = %s, want duplicate dup keys preserved", merged.raw) + } + if bytes.Contains(merged.raw, []byte(`"override":"old"`)) { + t.Fatalf("merged raw = %s, old override value should be removed", merged.raw) + } + if got := merged.Lookup("dup"); !bytes.Equal(got, []byte(`"first"`)) { + t.Fatalf("dup = %s, want first duplicate value", got) + } + if got := merged.Lookup("override"); !bytes.Equal(got, []byte(`"new"`)) { + t.Fatalf("override = %s, want new value", got) + } + if got := merged.Lookup("added"); !bytes.Equal(got, []byte(`true`)) { + t.Fatalf("added = %s, want true", got) + } +} + func TestMergeUnknownJSONFields_NoAdditionsReturnsBase(t *testing.T) { base := UnknownJSONFieldsFromMap(map[string]json.RawMessage{"a": json.RawMessage(`1`)}) diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index 7d06ddd4..d46ee26e 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -450,6 +450,7 @@ func (e *ResponsesInputElement) UnmarshalJSON(data []byte) error { if err := json.Unmarshal(data, &raw); err != nil { return err } + *e = ResponsesInputElement{} if v, ok := raw["type"]; ok { _ = json.Unmarshal(v, &e.Type) diff --git a/internal/core/responses_json_test.go b/internal/core/responses_json_test.go index fe4f58a5..798e190e 100644 --- a/internal/core/responses_json_test.go +++ b/internal/core/responses_json_test.go @@ -572,6 +572,29 @@ func TestResponsesInputElementJSON_UnknownItemRoundTripHasNoDuplicateKeys(t *tes } } +func TestResponsesInputElementUnmarshalJSON_ResetsReceiver(t *testing.T) { + var elem ResponsesInputElement + if err := json.Unmarshal([]byte(`{"type":"message","role":"user","content":"hi","x_trace":"old"}`), &elem); err != nil { + t.Fatalf("json.Unmarshal(message) error = %v", err) + } + if elem.Role != "user" || elem.Content == nil || elem.ExtraFields.Lookup("x_trace") == nil { + t.Fatalf("initial element = %+v, want populated message", elem) + } + + if err := json.Unmarshal([]byte(`{"type":"reasoning","id":"rs_123","summary":[]}`), &elem); err != nil { + t.Fatalf("json.Unmarshal(reasoning) error = %v", err) + } + if elem.Type != "reasoning" { + t.Fatalf("Type = %q, want reasoning", elem.Type) + } + if elem.Role != "" || elem.Content != nil || !elem.ExtraFields.IsEmpty() { + t.Fatalf("stale typed fields remained after unknown item decode: %+v", elem) + } + if len(elem.Raw) == 0 { + t.Fatal("Raw missing for unknown item") + } +} + func TestResponsesInputElementMarshalJSON_MergesRawUnknownItemExtras(t *testing.T) { elem := ResponsesInputElement{ Type: "reasoning", diff --git a/internal/providers/anthropic/anthropic_test.go b/internal/providers/anthropic/anthropic_test.go index 67f83481..824f3c9f 100644 --- a/internal/providers/anthropic/anthropic_test.go +++ b/internal/providers/anthropic/anthropic_test.go @@ -1372,13 +1372,72 @@ func TestConvertToAnthropicRequest_RejectsUnsupportedChatExtras(t *testing.T) { if err == nil { t.Fatal("expected invalid request error, got nil") } - if !strings.Contains(err.Error(), tt.field) { - t.Fatalf("error = %v, want mention %q", err, tt.field) + var gatewayErr *core.GatewayError + if !errors.As(err, &gatewayErr) { + t.Fatalf("error = %T, want *core.GatewayError", err) + } + if gatewayErr.Type != core.ErrorTypeInvalidRequest { + t.Fatalf("error type = %q, want %q", gatewayErr.Type, core.ErrorTypeInvalidRequest) + } + if gatewayErr.HTTPStatusCode() != http.StatusBadRequest { + t.Fatalf("HTTPStatusCode() = %d, want %d", gatewayErr.HTTPStatusCode(), http.StatusBadRequest) + } + if !strings.Contains(gatewayErr.Message, tt.field) { + t.Fatalf("error message = %q, want mention %q", gatewayErr.Message, tt.field) } }) } } +func TestConvertToAnthropicRequest_PreservesTopP(t *testing.T) { + topP := 0.2 + result, err := convertToAnthropicRequest(&core.ChatRequest{ + Model: "claude-sonnet-4-5-20250929", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + TopP: &topP, + }) + if err != nil { + t.Fatalf("convertToAnthropicRequest() error = %v", err) + } + if result.TopP == nil || *result.TopP != 0.2 { + t.Fatalf("TopP = %#v, want 0.2", result.TopP) + } +} + +func TestConvertToAnthropicRequest_TopPFromExtraFields(t *testing.T) { + result, err := convertToAnthropicRequest(&core.ChatRequest{ + Model: "claude-sonnet-4-5-20250929", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + ExtraFields: core.UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + "top_p": json.RawMessage("0.3"), + }), + }) + if err != nil { + t.Fatalf("convertToAnthropicRequest() error = %v", err) + } + if result.TopP == nil || *result.TopP != 0.3 { + t.Fatalf("TopP = %#v, want 0.3", result.TopP) + } +} + +func TestConvertToAnthropicRequest_TypedTopPWinsOverExtraFields(t *testing.T) { + topP := 0.2 + result, err := convertToAnthropicRequest(&core.ChatRequest{ + Model: "claude-sonnet-4-5-20250929", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + TopP: &topP, + ExtraFields: core.UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + "top_p": json.RawMessage("0.9"), + }), + }) + if err != nil { + t.Fatalf("convertToAnthropicRequest() error = %v", err) + } + if result.TopP == nil || *result.TopP != 0.2 { + t.Fatalf("TopP = %#v, want typed value 0.2", result.TopP) + } +} + func TestConvertToAnthropicRequest_InvalidToolArguments(t *testing.T) { _, err := convertToAnthropicRequest(&core.ChatRequest{ Model: "claude-sonnet-4-5-20250929", @@ -2902,6 +2961,7 @@ func TestResponsesWithContext(t *testing.T) { func TestConvertResponsesRequestToAnthropic(t *testing.T) { temp := 0.7 + topP := 0.2 maxTokens := 1024 tests := []struct { @@ -2949,12 +3009,16 @@ func TestConvertResponsesRequestToAnthropic(t *testing.T) { Model: "claude-sonnet-4-5-20250929", Input: "Hello", Temperature: &temp, + TopP: &topP, MaxOutputTokens: &maxTokens, }, checkFn: func(t *testing.T, req *anthropicRequest) { if req.Temperature == nil || *req.Temperature != 0.7 { t.Errorf("Temperature = %v, want 0.7", req.Temperature) } + if req.TopP == nil || *req.TopP != 0.2 { + t.Errorf("TopP = %v, want 0.2", req.TopP) + } if req.MaxTokens != 1024 { t.Errorf("MaxTokens = %d, want 1024", req.MaxTokens) } diff --git a/internal/providers/anthropic/request_translation.go b/internal/providers/anthropic/request_translation.go index 6b101e38..eb7b62ae 100644 --- a/internal/providers/anthropic/request_translation.go +++ b/internal/providers/anthropic/request_translation.go @@ -296,6 +296,7 @@ func convertToAnthropicRequest(req *core.ChatRequest) (*anthropicRequest, error) Model: req.Model, Messages: make([]anthropicMessage, 0, len(req.Messages)), Temperature: req.Temperature, + TopP: resolveAnthropicTopP(req), Stream: req.Stream, StopSequences: stopSequencesFromExtra(req.ExtraFields), } @@ -546,6 +547,22 @@ func anthropicCacheControlFromExtra(extraFields core.UnknownJSONFields) (json.Ra return core.CloneRawJSON(trimmed), nil } +func resolveAnthropicTopP(req *core.ChatRequest) *float64 { + if req.TopP != nil { + return req.TopP + } + + raw := bytes.TrimSpace(req.ExtraFields.Lookup("top_p")) + if len(raw) == 0 || bytes.Equal(raw, []byte("null")) { + return nil + } + var topP float64 + if err := json.Unmarshal(raw, &topP); err != nil { + return nil + } + return &topP +} + // stopSequencesFromExtra maps the OpenAI-compatible stop field (a string or an // array of strings, carried in the request's extra fields) to Anthropic's // stop_sequences. Empty or malformed values yield no sequences. diff --git a/internal/providers/anthropic/types.go b/internal/providers/anthropic/types.go index 0d5404aa..45746745 100644 --- a/internal/providers/anthropic/types.go +++ b/internal/providers/anthropic/types.go @@ -24,6 +24,7 @@ type anthropicRequest struct { ToolChoice *anthropicToolChoice `json:"tool_choice,omitempty"` MaxTokens int `json:"max_tokens"` Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` System any `json:"system,omitempty"` Stream bool `json:"stream,omitempty"` StopSequences []string `json:"stop_sequences,omitempty"` diff --git a/internal/providers/bedrock/bedrock_test.go b/internal/providers/bedrock/bedrock_test.go index d138147e..bed21004 100644 --- a/internal/providers/bedrock/bedrock_test.go +++ b/internal/providers/bedrock/bedrock_test.go @@ -289,6 +289,28 @@ func TestBuildConverseParts_TopPFromTypedField(t *testing.T) { } } +func TestBuildConverseParts_TypedTopPWinsOverExtraFields(t *testing.T) { + topP := 0.8 + req := &core.ChatRequest{ + Model: "anthropic.claude-3-5-haiku-20241022-v1:0", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + TopP: &topP, + ExtraFields: core.UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + "top_p": json.RawMessage("0.2"), + }), + } + parts, err := buildConverseParts(req) + if err != nil { + t.Fatalf("buildConverseParts: %v", err) + } + if parts.infCfg == nil || parts.infCfg.TopP == nil { + t.Fatal("typed top_p was not forwarded to InferenceConfiguration.TopP") + } + if got := awssdk.ToFloat32(parts.infCfg.TopP); got != 0.8 { + t.Errorf("top_p = %v, want typed value 0.8", got) + } +} + func TestBuildConverseParts_RejectsMaxTokensOverflow(t *testing.T) { overflow := int(int64(1) << 33) // 2^33, fits in int64 but not int32 req := &core.ChatRequest{ diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index 35520a23..0875b8cb 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -106,6 +106,9 @@ func validateResponsesRequestForChatTranslation(req *core.ResponsesRequest) erro if err := validateResponsesToolsForChatTranslation(req.Tools); err != nil { return err } + if err := validateResponsesToolChoiceForChatTranslation(req.ToolChoice); err != nil { + return err + } return nil } @@ -119,6 +122,19 @@ func validateResponsesToolsForChatTranslation(tools []map[string]any) error { return nil } +func validateResponsesToolChoiceForChatTranslation(choice any) error { + choiceMap, ok := choice.(map[string]any) + if !ok { + return nil + } + + choiceType, _ := choiceMap["type"].(string) + if strings.TrimSpace(choiceType) != "function" { + return unsupportedResponsesChatTranslationTool(choiceType) + } + return nil +} + // responsesTextToChatExtraFields maps the Responses "text" settings onto the // equivalent Chat Completions fields. text.format becomes response_format and // text.verbosity passes through unchanged; both are emitted as passthrough diff --git a/internal/providers/responses_adapter_test.go b/internal/providers/responses_adapter_test.go index 197f7b58..fb8a8dbe 100644 --- a/internal/providers/responses_adapter_test.go +++ b/internal/providers/responses_adapter_test.go @@ -468,6 +468,24 @@ func TestConvertResponsesRequestToChat_RejectsStatefulAgentsSDKFields(t *testing }, want: "computer_use_preview", }, + { + name: "hosted file search tool choice", + req: &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + ToolChoice: map[string]any{"type": "file_search"}, + }, + want: "file_search", + }, + { + name: "hosted web search tool choice", + req: &core.ResponsesRequest{ + Model: "test-model", + Input: "Hello", + ToolChoice: map[string]any{"type": "web_search_preview"}, + }, + want: "web_search_preview", + }, } for _, tt := range tests { From 2e9b9b7512a5e93c3264779e61320bace34357eb Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 4 Jun 2026 14:31:51 -0400 Subject: [PATCH 09/12] docs(agents): remove SDK planning notes --- docs/dev/agents-sdk-support.md | 277 ------------------------- docs/dev/claude-agent-sdk-support.md | 293 --------------------------- 2 files changed, 570 deletions(-) delete mode 100644 docs/dev/agents-sdk-support.md delete mode 100644 docs/dev/claude-agent-sdk-support.md diff --git a/docs/dev/agents-sdk-support.md b/docs/dev/agents-sdk-support.md deleted file mode 100644 index 11fabf2d..00000000 --- a/docs/dev/agents-sdk-support.md +++ /dev/null @@ -1,277 +0,0 @@ -# OpenAI Agents SDK Support - -Status checked: 2026-06-02 - -## Short answer - -GoModel is close to supporting the OpenAI Agents SDK for normal HTTP-based -model calls. - -Basic Agents SDK runs should work when the SDK is pointed at GoModel as an -OpenAI-compatible endpoint and tracing is disabled or configured separately. -GoModel already exposes: - -- `POST /v1/responses` -- `POST /v1/chat/completions` -- Responses streaming over SSE -- Responses lifecycle endpoints: - - `GET /v1/responses/{id}` - - `GET /v1/responses/{id}/input_items` - - `POST /v1/responses/{id}/cancel` - - `DELETE /v1/responses/{id}` - - `POST /v1/responses/input_tokens` - - `POST /v1/responses/compact` - -That is enough for Codex-style Responses clients and likely enough for a simple -Agents SDK `Runner.run(...)` with text and function tools. - -It is not yet safe to market as full Agents SDK support. The SDK uses newer -Responses fields, state-management modes, built-in tools, streaming events, and -optional websocket transport. Some of those are only pass-through today, and -some are not validated against the SDK. - -## What the SDK expects - -The OpenAI Agents SDK uses the Responses API by default for OpenAI models. Its -Responses request path can send fields such as: - -- `previous_response_id` -- `conversation` -- `instructions` -- `model` -- `input` -- `include` -- `tools` -- `prompt` -- `temperature` -- `top_p` -- `truncation` -- `max_output_tokens` -- `tool_choice` -- `parallel_tool_calls` -- `stream` -- `text` -- `store` -- `prompt_cache_retention` -- `reasoning` -- `metadata` -- `context_management` -- SDK `extra_args` / `extra_body` fields - -GoModel preserves unknown top-level Responses fields, so native OpenAI-compatible -providers receive many of these without code changes. The weak spot is the -translated-provider path, where Responses-only fields can leak into Chat -Completions requests or newer Responses input/output item types can lose their -exact shape. - -## Current support assessment - -### Supported now - -- Basic non-streaming Responses calls. -- Basic streaming Responses calls over HTTP/SSE. -- Function tool calls and `function_call_output` items in the - Responses-to-Chat adapter. -- `tools`, `tool_choice`, `parallel_tool_calls`, `temperature`, - `max_output_tokens`, `reasoning`, and `metadata`. -- Native OpenAI-compatible passthrough for extra top-level request fields. -- Stored non-streaming response snapshots for local response retrieval and - `input_items`. -- `responses.input_tokens` and `responses.compact` when the selected provider - exposes native support. -- `/v1/conversations` lifecycle endpoints. -- `store: false` skips GoModel's local response snapshot. -- Unknown Responses input item types round-trip unchanged for native Responses - providers; chat-translated providers now return a clear compatibility error. -- First OpenAI Agents SDK guide and runnable smoke examples. -- Manual Anthropic probes passed for direct OpenAI Responses calls, Python - Agents SDK `Runner.run(...)`, function tool loops, and - `Runner.run_streamed(...)` on 2026-06-02. - -### Needs validation - -- Python Agents SDK with `OpenAIResponsesModel`. -- JavaScript Agents SDK with the default Responses provider. -- `Runner.run_streamed(...)` against GoModel SSE streams. -- Function tool loops across multiple SDK turns. -- Handoffs and agents-as-tools, which become tool definitions at the model - boundary. -- Structured outputs through the Responses `text` format field. -- Sessions that replay `result.to_input_list()` and SDK-managed local session - history. -- `OpenAIResponsesCompactionSession` with `responses.compact`. -- Full Gemini Agents SDK probes against live upstream models. - -### Known or likely gaps - -- No SDK contract test suite in CI. -- `previous_response_id` is only safe when the upstream provider handles it - natively. Chat-translated providers now return a clear compatibility error; - local expansion from GoModel's stored Responses state is still not - implemented. -- The Chat-to-Responses stream converter emits the core events needed for text - and function calls, but the event sequence has not been validated against the - current Agents SDK parsers. -- Websocket Responses transport is unsupported. The SDK can use HTTP/SSE, but - `use_responses_websocket=True` needs a websocket-compatible `/responses` - endpoint. -- Built-in Responses tools such as web search, file search, computer use, and - tool search are only safe when the selected upstream provider natively - supports those tool payloads. Chat-translated providers now reject hosted tool - payloads instead of assuming provider compatibility. -- Anthropic rejects translated `response_format` and `verbosity` fields because - there is no safe native mapping today. -- Prompt-managed flows and deferred tool loading need validation, especially - when the SDK omits `model` because the prompt owns model selection. -- Tracing uploads go to OpenAI by default in the SDK. Users without an OpenAI - Platform key need docs to disable tracing or configure a separate tracing - processor/key. -- Python Agents SDK users must enable model ID pass-through on `MultiProvider` - when sending GoModel namespaced model IDs such as `anthropic/...` or - `gemini/...`; otherwise the SDK rejects unknown provider prefixes before - calling GoModel. - -## Implementation checklist - -### P0: Prove basic SDK compatibility - -- Done: add `docs/guides/openai-agents-sdk.mdx`. - - Python example using `AsyncOpenAI(base_url="http://localhost:8080/v1", - api_key="$GOMODEL_MASTER_KEY")`. - - Python example using `OpenAIProvider` / `RunConfig`. - - JavaScript example using an OpenAI provider pointed at GoModel. - - Mention that tracing must be disabled or configured with a real OpenAI - Platform key. - - Mention that HTTP/SSE Responses is the supported path; websocket transport - is not supported yet. -- Done: add a small runnable smoke test example under - `docs/examples/openai-agents-sdk/`. - - Text-only `Runner.run`. - - Streaming `Runner.run_streamed`. - - One local function tool. -- Still needed: add CI or manual contract tests that boot GoModel against the existing mock - provider and run the smoke examples. - -### P0: Preserve Responses items exactly - -- Done: change Responses input decoding so unknown item types keep their original raw - JSON shape. -- Keep typed conversion for known item types: - - `message` - - `function_call` - - `function_call_output` -- Partially done: add tests for raw round-trip preservation of newer item types: - - `reasoning` - - `web_search_call` - - `file_search_call` - - `computer_call` - - `mcp_call` - - any item with `provider_data` -- Done: ensure native OpenAI-compatible providers receive those items unchanged. -- Done: ensure Chat-translated providers return a clear error or intentionally strip - unsupported item types instead of sending malformed messages upstream. - -### P0: Respect `store: false` - -- Done: add a typed `Store *bool` field to `core.ResponsesRequest`. -- Done: when `store == false`, do not persist GoModel's local response snapshot by - default. -- Add a config option only if operators need to override this for audit or - debugging. -- Document the behavior in the Responses API guide and Agents SDK guide. - -### P1: Add typed SDK request fields - -Done: add typed fields to `core.ResponsesRequest` for fields the Agents SDK sends -regularly, while still preserving unknown fields: - -- `PreviousResponseID string` -- `Conversation *ResponsesConversationRef` -- `Include []string` -- `Prompt any` -- `TopP *float64` -- `Truncation string` -- `Text any` -- `Store *bool` -- `PromptCacheRetention string` -- `ContextManagement any` -- `TopLogprobs *int` -- `User string` -- `ServiceTier string` -- `SafetyIdentifier string` - -Use these fields for cache keys, audit summaries, compatibility decisions, and -provider-specific adaptation where relevant. - -### P1: Make stateful Responses modes explicit - -- Done: implement `/v1/conversations` lifecycle support. -- Done: reject `previous_response_id` and `conversation` on chat-translated - providers with a clear compatibility error. -- Done: translate `text.format` to the Chat Completions `response_format` - (`json_schema` / `json_object`) and pass `text.verbosity` through on - chat-translated providers that support those fields; unknown text formats - still return a clear error. Anthropic rejects these fields explicitly. -- Still needed: optionally expand previous stored responses into full input for - chat-translated providers. -- Add tests for: - - `result.to_input_list()` / local session replay - - `previous_response_id` with native OpenAI provider - - `previous_response_id` with a chat-translated provider - - `conversation_id` unsupported behavior until conversations are implemented - -### P1: Validate streaming against the SDK - -- Run Python and JavaScript Agents SDK streaming clients against GoModel. -- Compare GoModel's chat-translated stream with native OpenAI Responses SSE - ordering. -- Add missing stream events if the SDK requires them: - - `response.content_part.added` - - `response.output_text.done` - - `response.content_part.done` - - terminal `response.failed` / `response.incomplete` propagation -- Verify usage appears on the final SDK result for both native and - chat-translated streams. - -### P2: Feature capability gating - -- Partially done: chat-translated providers reject hosted OpenAI Responses tools - until a provider-specific capability mapping exists. -- Still needed: add model/provider capability metadata for Responses features: - - function tools - - structured outputs through `text.format` - - multimodal input - - web search - - file search - - computer use - - tool search / deferred tool loading - - response compaction - - response lifecycle retrieval - - conversations - - websocket Responses transport -- Use the metadata to reject unsupported SDK requests early with clear - OpenAI-compatible errors. -- Surface capability notes in `/v1/models` metadata and docs. - -### P2: Subscription and harness compatibility - -- Keep this separate from OpenAI Agents SDK support. -- Document that GoModel's normal path uses gateway credentials plus upstream - provider API keys, not ChatGPT, Copilot, or Claude subscription credentials. -- Treat subscription-backed harness support as a separate compliance and product - investigation before implementation. - -## Suggested public claim - -Until the P0 work is done: - -> GoModel supports the OpenAI-compatible Responses API used by the OpenAI Agents -> SDK for basic HTTP flows, and full Agents SDK compatibility is being -> validated. - -After P0: - -> GoModel supports the OpenAI Agents SDK over HTTP Responses for text, -> streaming, function tools, and SDK-managed local sessions. Provider-native -> features such as hosted tools, conversations, and websocket transport depend -> on the selected upstream provider. diff --git a/docs/dev/claude-agent-sdk-support.md b/docs/dev/claude-agent-sdk-support.md deleted file mode 100644 index 98186ad3..00000000 --- a/docs/dev/claude-agent-sdk-support.md +++ /dev/null @@ -1,293 +0,0 @@ -# Claude Agent SDK Support - -Status checked: 2026-05-22 - -## Short answer - -GoModel is probably already close to supporting Anthropic's Agent SDK through -Anthropic passthrough. - -The supported path should be: - -```bash -export ANTHROPIC_BASE_URL=http://localhost:8080/p/anthropic -export ANTHROPIC_AUTH_TOKEN=$GOMODEL_MASTER_KEY -``` - -With GoModel configured with its own upstream `ANTHROPIC_API_KEY`, the SDK's -Anthropic Messages calls should flow as: - -```text -Claude Agent SDK -> /p/anthropic/v1/messages -> GoModel -> Anthropic /v1/messages -``` - -This should work because the Agent SDK is built on Claude Code, and Anthropic's -gateway requirements for Claude Code are exactly the native Messages endpoints -GoModel can expose through passthrough: - -- `POST /v1/messages` -- `POST /v1/messages/count_tokens` -- forwarding `anthropic-beta` and `anthropic-version` - -It is not yet safe to market this as full Claude Agent SDK support. GoModel has -not been validated against the current Python and TypeScript SDKs, and the -managed `/v1/messages` route is only a portable subset of Anthropic's native -Messages API. For full SDK compatibility, passthrough should remain the primary -path. - -## What the SDK expects - -The current Claude Agent SDK packages are: - -- Python: `claude-agent-sdk` -- TypeScript: `@anthropic-ai/claude-agent-sdk` - -The SDK runs the same agent loop and tool runtime used by Claude Code. It can -read and edit files, run shell commands, search the web, call MCP tools, use -subagents, apply hooks, and maintain sessions. At the model boundary that means -GoModel should expect normal Claude Code-style Anthropic traffic rather than a -small single-turn client request. - -The gateway-facing requirements are: - -- Anthropic-compatible base URL configured with `ANTHROPIC_BASE_URL`. -- Gateway auth through `ANTHROPIC_AUTH_TOKEN` or equivalent SDK environment. -- Native Messages request and response shapes. -- Native Messages SSE event streams. -- Native `count_tokens` behavior for context budgeting. -- Forwarded `anthropic-beta` and `anthropic-version` headers. -- Preserved Claude Code attribution headers: - - `X-Claude-Code-Session-Id` - - `X-Claude-Code-Agent-Id` - - `X-Claude-Code-Parent-Agent-Id` -- Long-lived requests. The Agent SDK defaults allow long API calls and retries, - and tool loops can run for much longer than a normal chat completion. - -Subscription-backed usage is a separate topic. Anthropic's docs say Agent SDK -and `claude -p` usage on subscription plans will draw from a separate monthly -Agent SDK credit starting 2026-06-15. They also state that third-party products -should use the API-key authentication methods unless previously approved. -GoModel's normal gateway path should therefore stay API-key backed unless there -is a separate compliance and product decision to support subscription-backed -harnesses. - -## Current support assessment - -### Supported now - -- GoModel already has a Claude Code guide using Anthropic passthrough: - `ANTHROPIC_BASE_URL=http://localhost:8080/p/anthropic`. -- Anthropic passthrough is enabled by default. -- `/p/anthropic/v1/...` is normalized to the Anthropic provider's native path. - This should cover `/v1/messages`, `/v1/messages/count_tokens`, and - `/v1/models`. -- Passthrough strips client `Authorization` and `X-Api-Key`, then applies the - server-side upstream Anthropic credential. -- Passthrough forwards normal request headers that the SDK needs, including - `anthropic-beta`, `anthropic-version`, and `X-Claude-Code-*`. -- Passthrough SSE responses are streamed without body translation. -- GoModel classifies `/p/...` as a model interaction route and clears the - per-request write deadline, so long streams are not constrained by the - server-wide 30 second write timeout. -- The managed Anthropic Messages ingress exists at: - - `POST /v1/messages` - - `POST /v1/messages/count_tokens` -- The managed route supports text, images, custom tools, `tool_choice`, basic - thinking output, Anthropic-style non-streaming responses, and Anthropic-style - SSE conversion. - -### Needs validation - -- Python SDK `query(...)` pointed at GoModel passthrough. -- Python SDK `ClaudeSDKClient` pointed at GoModel passthrough. -- TypeScript SDK `query(...)` pointed at GoModel passthrough. -- Text-only agent runs. -- Streaming agent runs. -- Built-in file tools: - - `Read` - - `Write` - - `Edit` - - `Glob` - - `Grep` -- `Bash` tool calls and command-heavy sessions. -- `WebSearch` and `WebFetch`. -- SDK MCP servers and SDK-created MCP tools. -- Subagents, including the `X-Claude-Code-Agent-Id` and - `X-Claude-Code-Parent-Agent-Id` headers. -- Hooks and permission callbacks. -- Session resume and continuation. -- Structured output. -- Large contexts and request bodies against GoModel's default body-size limit. -- Long-running streams against GoModel, proxies, and load balancers. -- Gateway model discovery with `CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY=1`. -- Usage and cost extraction from passthrough streams. -- Native Anthropic error bodies as seen by the SDK. - -### Known or likely gaps - -- No first-class Claude Agent SDK guide. -- No SDK smoke examples in the repository. -- No contract tests against `claude-agent-sdk` or - `@anthropic-ai/claude-agent-sdk`. -- The existing Claude Code guide recommends - `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`. Full Agent SDK support should - validate whether this workaround is still needed with Anthropic passthrough. -- Managed `/v1/messages/count_tokens` is heuristic, not tokenizer-exact. This - is risky for SDK context budgeting; native passthrough should be used when the - SDK needs exact Anthropic behavior. -- Managed `/v1/messages` drops or rejects several native Anthropic features: - - `cache_control` prompt-cache breakpoints - - input `thinking` and extended-thinking signatures - - server/built-in tools - - `top_k` - - `document` and other non-text/image blocks -- Managed `/v1/messages` can route to non-Anthropic providers, so it cannot - guarantee Anthropic-native behavior unless capabilities are explicitly gated. -- Passthrough error handling currently normalizes provider errors through - GoModel's error path. Verify that the body and status are compatible with the - SDK's Anthropic error parser. -- Passthrough audit and usage observers see SDK traffic, but subagent/session - attribution from `X-Claude-Code-*` headers is not yet surfaced as a first-class - reporting dimension. - -## Implementation checklist - -### P0: Prove passthrough SDK compatibility - -- Add `docs/guides/claude-agent-sdk.mdx`. - - Show `ANTHROPIC_BASE_URL=http://localhost:8080/p/anthropic`. - - Show `ANTHROPIC_AUTH_TOKEN=$GOMODEL_MASTER_KEY`. - - Explain that GoModel still needs an upstream `ANTHROPIC_API_KEY`. - - Explain API-key-backed gateway usage separately from Claude plan - subscription-backed usage. - - Recommend passthrough as the SDK compatibility path. - - Document that managed `/v1/messages` is a portable subset, not full SDK - compatibility. -- Add runnable examples under `examples/claude-agent-sdk/`. - - Python `query(...)` text-only example. - - Python `ClaudeSDKClient` streaming example. - - TypeScript `query(...)` text-only example. - - A low-risk tool example using `Read`, `Glob`, and `Grep`. -- Add manual or CI smoke tests that boot GoModel and run both SDKs against the - passthrough base URL. -- Verify these endpoints with the SDK: - - `POST /p/anthropic/v1/messages` - - `POST /p/anthropic/v1/messages/count_tokens` - - `GET /p/anthropic/v1/models` -- Test with and without `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`, then update - the Claude Code guide with the current recommendation. - -### P0: Make passthrough fidelity explicit - -- Add tests that Anthropic passthrough forwards: - - `anthropic-beta` - - `anthropic-version` - - `X-Claude-Code-Session-Id` - - `X-Claude-Code-Agent-Id` - - `X-Claude-Code-Parent-Agent-Id` -- Add tests that passthrough strips client auth headers and replaces them with - GoModel's configured upstream Anthropic credential. -- Add SSE passthrough tests with Anthropic event names: - - `message_start` - - `content_block_start` - - `content_block_delta` - - `content_block_stop` - - `message_delta` - - `message_stop` - - `ping` - - `error` -- Verify passthrough error responses stay compatible with Anthropic SDK parsing. -- Verify streamed usage is captured for passthrough `/messages` responses. - -### P1: Improve SDK observability - -- Capture Claude Code session and agent headers into audit and usage metadata: - - `X-Claude-Code-Session-Id` - - `X-Claude-Code-Agent-Id` - - `X-Claude-Code-Parent-Agent-Id` -- Add dashboard filters for SDK session ID and agent ID if the fields prove - useful in real traffic. -- Decide whether User-Path can be derived from one of those headers by - configuration, or whether users should keep sending an explicit - `X-GoModel-User-Path` / managed-key user path. -- Document privacy implications: SDK traffic can contain source files, command - output, tool results, and MCP data. - -### P1: Validate advanced SDK features - -- Run SDK examples that exercise MCP servers. -- Run SDK examples that exercise subagents and parent/child agent attribution. -- Run SDK examples that exercise session resume. -- Run SDK examples that exercise structured output. -- Run SDK examples that exercise permission callbacks and hooks. -- Confirm these features do not require endpoints beyond Anthropic Messages, - `count_tokens`, and optional gateway model discovery. - -### P1: Tighten managed `/v1/messages` - -- Keep documenting passthrough as the full-fidelity path. -- If the selected provider is Anthropic, optionally support native - `/v1/messages/count_tokens` instead of the heuristic estimate. -- Preserve or explicitly reject more Anthropic-native fields with clear errors: - - `cache_control` - - `thinking` signatures - - `document` - - server/built-in tool definitions - - beta-specific fields -- Add capability metadata so non-Anthropic providers fail early for - Anthropic-native SDK features instead of receiving malformed translated - requests. - -### P1: Validate long-running behavior - -- Run a multi-turn SDK session that includes file reads, tool calls, and - streaming output for at least 10 minutes. -- Verify request cancellation propagates cleanly to the upstream Anthropic - request. -- Verify SDK retry behavior does not double-count usage in GoModel. -- Verify large file/context requests against `BODY_SIZE_LIMIT`. -- Document recommended proxy and load-balancer timeouts for SDK traffic. - -### P2: Subscription-backed harness investigation - -- Treat this separately from Agent SDK API support. -- Review Anthropic's current terms and gateway docs before implementation. -- Decide whether GoModel should support only API-key-backed Agent SDK traffic, - or whether subscription-backed Claude Code / Agent SDK use is in scope. -- If it is in scope, design a separate auth flow rather than mixing Claude plan - credentials into the existing `ANTHROPIC_API_KEY` provider configuration. - -## Suggested public claim - -Until the P0 work is done: - -> GoModel supports Claude Code today and should work with the Claude Agent SDK -> through Anthropic passthrough. Full SDK compatibility is being validated. - -After P0: - -> GoModel supports the Claude Agent SDK through Anthropic Messages passthrough -> for text, streaming, basic built-in tool loops, and gateway model discovery. -> The managed `/v1/messages` endpoint supports a portable Anthropic Messages -> subset for cross-provider routing. - -After P1 advanced validation: - -> GoModel supports the Claude Agent SDK through Anthropic Messages passthrough -> for MCP, subagents, sessions, hooks, structured output, and long-running -> agent workflows. - -## References - -- Anthropic Claude Agent SDK overview: - https://code.claude.com/docs/en/agent-sdk/overview -- Anthropic Claude Agent SDK quickstart: - https://code.claude.com/docs/en/agent-sdk/quickstart -- Anthropic Claude Code LLM gateway requirements: - https://code.claude.com/docs/en/llm-gateway -- GoModel Claude Code guide: - `docs/guides/claude-code.mdx` -- GoModel Anthropic Messages API guide: - `docs/advanced/anthropic-messages-api.mdx` -- GoModel passthrough guide: - `docs/features/passthrough-api.mdx` From a7b87b20008d3661b040a1a83f8adef818489f3e Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 4 Jun 2026 23:23:47 -0400 Subject: [PATCH 10/12] fix(agents): address Responses compatibility review --- internal/core/json_fields.go | 18 +++++- internal/core/json_fields_test.go | 38 ++++++++++++ internal/core/responses_json.go | 20 +++++- internal/core/responses_json_test.go | 61 +++++++++++++++++++ .../providers/anthropic/anthropic_test.go | 39 ++++++++++++ .../anthropic/request_translation.go | 20 +++++- internal/providers/responses_adapter.go | 12 +++- internal/providers/responses_adapter_test.go | 36 +++++++++++ 8 files changed, 236 insertions(+), 8 deletions(-) diff --git a/internal/core/json_fields.go b/internal/core/json_fields.go index c1867616..12d6e744 100644 --- a/internal/core/json_fields.go +++ b/internal/core/json_fields.go @@ -81,6 +81,9 @@ func MergeUnknownJSONFields(base UnknownJSONFields, additions map[string]json.Ra return base, nil } additionFields := UnknownJSONFieldsFromMap(additions) + if err := validateUnknownJSONObject(additionFields.raw); err != nil { + return UnknownJSONFields{}, err + } if base.IsEmpty() { return additionFields, nil } @@ -125,7 +128,8 @@ func mergeUnknownJSONFieldsRaw(baseBody, additionBody []byte, overrideKeys map[s return buf.Bytes(), nil } -func appendUnknownJSONMembers(buf *bytes.Buffer, body []byte, skip map[string]struct{}, wrote *bool) error { +func validateUnknownJSONObject(body []byte) error { + body = bytes.TrimSpace(body) if len(body) == 0 || bytes.Equal(body, []byte("{}")) { return nil } @@ -136,6 +140,18 @@ func appendUnknownJSONMembers(buf *bytes.Buffer, body []byte, skip map[string]st if !root.IsObject() { return fmt.Errorf("expected JSON object") } + return nil +} + +func appendUnknownJSONMembers(buf *bytes.Buffer, body []byte, skip map[string]struct{}, wrote *bool) error { + body = bytes.TrimSpace(body) + if err := validateUnknownJSONObject(body); err != nil { + return err + } + if len(body) == 0 || bytes.Equal(body, []byte("{}")) { + return nil + } + root := gjson.ParseBytes(body) root.ForEach(func(key, value gjson.Result) bool { if _, shouldSkip := skip[key.String()]; shouldSkip { diff --git a/internal/core/json_fields_test.go b/internal/core/json_fields_test.go index 69774314..62505580 100644 --- a/internal/core/json_fields_test.go +++ b/internal/core/json_fields_test.go @@ -140,6 +140,44 @@ func TestMergeUnknownJSONFields_PreservesRawBaseMembers(t *testing.T) { } } +func TestMergeUnknownJSONFields_ErrorPaths(t *testing.T) { + tests := []struct { + name string + base UnknownJSONFields + additions map[string]json.RawMessage + }{ + { + name: "malformed base raw", + base: UnknownJSONFields{raw: json.RawMessage(`{"keep":`)}, + additions: map[string]json.RawMessage{ + "added": json.RawMessage(`true`), + }, + }, + { + name: "non object base raw", + base: UnknownJSONFields{raw: json.RawMessage(`[1,2,3]`)}, + additions: map[string]json.RawMessage{ + "added": json.RawMessage(`true`), + }, + }, + { + name: "malformed addition raw", + base: UnknownJSONFields{}, + additions: map[string]json.RawMessage{ + "added": json.RawMessage(`{`), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := MergeUnknownJSONFields(tt.base, tt.additions); err == nil { + t.Fatal("MergeUnknownJSONFields() error = nil, want error") + } + }) + } +} + func TestMergeUnknownJSONFields_NoAdditionsReturnsBase(t *testing.T) { base := UnknownJSONFieldsFromMap(map[string]json.RawMessage{"a": json.RawMessage(`1`)}) diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index d46ee26e..1ad8111f 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -161,8 +161,24 @@ func (c *ResponsesConversationRef) UnmarshalJSON(data []byte) error { // MarshalJSON preserves whether the conversation was originally supplied as a // string or object. Programmatic values default to the compact string ID form. func (c ResponsesConversationRef) MarshalJSON() ([]byte, error) { - if len(bytes.TrimSpace(c.Raw)) > 0 { - return cloneRawMessage(c.Raw), nil + trimmed := bytes.TrimSpace(c.Raw) + if len(trimmed) > 0 { + if c.ID == "" { + return cloneRawMessage(trimmed), nil + } + switch trimmed[0] { + case '"': + return json.Marshal(c.ID) + case '{': + var obj map[string]any + if err := json.Unmarshal(trimmed, &obj); err != nil { + return cloneRawMessage(trimmed), nil + } + obj["id"] = c.ID + return json.Marshal(obj) + default: + return cloneRawMessage(trimmed), nil + } } if c.ID != "" { return json.Marshal(c.ID) diff --git a/internal/core/responses_json_test.go b/internal/core/responses_json_test.go index 798e190e..ce280589 100644 --- a/internal/core/responses_json_test.go +++ b/internal/core/responses_json_test.go @@ -99,6 +99,67 @@ func TestResponsesRequestUnmarshalJSON_PreservesToolCallingControls(t *testing.T } } +func TestResponsesConversationRefMarshalJSON_UsesUpdatedID(t *testing.T) { + tests := []struct { + name string + raw string + want string + }{ + { + name: "string shape", + raw: `"conv_old"`, + want: `"conv_new"`, + }, + { + name: "object shape", + raw: `{"id":"conv_old","metadata":{"team":"alpha"}}`, + want: `{"id":"conv_new","metadata":{"team":"alpha"}}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var ref ResponsesConversationRef + if err := json.Unmarshal([]byte(tt.raw), &ref); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + ref.ID = "conv_new" + + body, err := json.Marshal(ref) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + if !jsonEqual(body, []byte(tt.want)) { + t.Fatalf("body = %s, want JSON equivalent to %s", body, tt.want) + } + }) + } +} + +func jsonEqual(a, b []byte) bool { + var av any + if err := json.Unmarshal(a, &av); err != nil { + return false + } + var bv any + if err := json.Unmarshal(b, &bv); err != nil { + return false + } + return jsonValueEqual(av, bv) +} + +func jsonValueEqual(a, b any) bool { + ab, err := json.Marshal(a) + if err != nil { + return false + } + bb, err := json.Marshal(b) + if err != nil { + return false + } + return bytes.Equal(ab, bb) +} + func TestResponsesRequestMarshalJSON_PreservesInput(t *testing.T) { body, err := json.Marshal(ResponsesRequest{ Model: "gpt-4o-mini", diff --git a/internal/providers/anthropic/anthropic_test.go b/internal/providers/anthropic/anthropic_test.go index 824f3c9f..adc3e6f4 100644 --- a/internal/providers/anthropic/anthropic_test.go +++ b/internal/providers/anthropic/anthropic_test.go @@ -1389,6 +1389,45 @@ func TestConvertToAnthropicRequest_RejectsUnsupportedChatExtras(t *testing.T) { } } +func TestConvertToAnthropicRequest_IgnoresNoopChatExtras(t *testing.T) { + tests := []struct { + name string + field string + value json.RawMessage + }{ + { + name: "null response format", + field: "response_format", + value: json.RawMessage(`null`), + }, + { + name: "text response format", + field: "response_format", + value: json.RawMessage(`{"type":"text"}`), + }, + { + name: "null verbosity", + field: "verbosity", + value: json.RawMessage(`null`), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := convertToAnthropicRequest(&core.ChatRequest{ + Model: "claude-sonnet-4-5-20250929", + Messages: []core.Message{{Role: "user", Content: "hi"}}, + ExtraFields: core.UnknownJSONFieldsFromMap(map[string]json.RawMessage{ + tt.field: tt.value, + }), + }) + if err != nil { + t.Fatalf("convertToAnthropicRequest() error = %v, want nil", err) + } + }) + } +} + func TestConvertToAnthropicRequest_PreservesTopP(t *testing.T) { topP := 0.2 result, err := convertToAnthropicRequest(&core.ChatRequest{ diff --git a/internal/providers/anthropic/request_translation.go b/internal/providers/anthropic/request_translation.go index eb7b62ae..6cf20e4f 100644 --- a/internal/providers/anthropic/request_translation.go +++ b/internal/providers/anthropic/request_translation.go @@ -359,13 +359,29 @@ func convertToAnthropicRequest(req *core.ChatRequest) (*anthropicRequest, error) func validateAnthropicUnsupportedChatExtras(extra core.UnknownJSONFields) error { for _, field := range []string{"response_format", "verbosity"} { - if extra.Lookup(field) != nil { - return core.NewInvalidRequestError("chat field "+field+" is not supported by Anthropic translation", nil) + raw := bytes.TrimSpace(extra.Lookup(field)) + if len(raw) == 0 || bytes.Equal(raw, []byte("null")) { + continue + } + if field == "response_format" && isNoopResponseFormat(raw) { + continue } + return core.NewInvalidRequestError("chat field "+field+" is not supported by Anthropic translation", nil) } return nil } +func isNoopResponseFormat(raw json.RawMessage) bool { + var responseFormat struct { + Type string `json:"type"` + } + if err := json.Unmarshal(raw, &responseFormat); err != nil { + return false + } + responseFormatType := strings.TrimSpace(responseFormat.Type) + return responseFormatType == "" || responseFormatType == "text" +} + // convertResponsesRequestToAnthropic converts a canonical Responses request by // first mapping it onto shared chat semantics and then translating that semantic // request into Anthropic's native message payload. diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index 0875b8cb..2060d7af 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -129,10 +129,12 @@ func validateResponsesToolChoiceForChatTranslation(choice any) error { } choiceType, _ := choiceMap["type"].(string) - if strings.TrimSpace(choiceType) != "function" { + switch strings.TrimSpace(choiceType) { + case "function", "auto", "required", "none": + return nil + default: return unsupportedResponsesChatTranslationTool(choiceType) } - return nil } // responsesTextToChatExtraFields maps the Responses "text" settings onto the @@ -288,7 +290,11 @@ func normalizeResponsesToolChoiceForChat(choice any) any { } choiceType, _ := choiceMap["type"].(string) - if strings.TrimSpace(choiceType) != "function" { + switch choiceType := strings.TrimSpace(choiceType); choiceType { + case "auto", "required", "none": + return choiceType + case "function": + default: return choice } if _, ok := choiceMap["function"].(map[string]any); ok { diff --git a/internal/providers/responses_adapter_test.go b/internal/providers/responses_adapter_test.go index fb8a8dbe..f8856ba2 100644 --- a/internal/providers/responses_adapter_test.go +++ b/internal/providers/responses_adapter_test.go @@ -414,6 +414,42 @@ func TestConvertResponsesRequestToChat_MapsPortableAgentsSDKFields(t *testing.T) } } +func TestConvertResponsesRequestToChat_NormalizesToolChoiceAliases(t *testing.T) { + tests := []struct { + name string + req *core.ResponsesRequest + want string + }{ + { + name: "tool_choice none alias", + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", ToolChoice: map[string]any{"type": "none"}}, + want: "none", + }, + { + name: "tool_choice auto alias", + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", ToolChoice: map[string]any{"type": "auto"}}, + want: "auto", + }, + { + name: "tool_choice required alias", + req: &core.ResponsesRequest{Model: "test-model", Input: "Hello", ToolChoice: map[string]any{"type": "required"}}, + want: "required", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + chatReq, err := ConvertResponsesRequestToChat(tt.req) + if err != nil { + t.Fatalf("ConvertResponsesRequestToChat() error = %v", err) + } + if chatReq.ToolChoice != tt.want { + t.Fatalf("ToolChoice = %#v, want %q", chatReq.ToolChoice, tt.want) + } + }) + } +} + func TestConvertResponsesRequestToChat_RejectsStatefulAgentsSDKFields(t *testing.T) { tests := []struct { name string From 1377cdb99c975272252c2aee0d932bafba9d5db0 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Fri, 5 Jun 2026 08:27:03 -0400 Subject: [PATCH 11/12] fix(agents): tighten conversation reference marshaling --- internal/core/responses_json.go | 18 ++++++++--------- internal/core/responses_json_test.go | 26 ++++++++++++++++++++++++- internal/providers/responses_adapter.go | 1 + 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index 1ad8111f..72f62b2f 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -159,31 +159,29 @@ func (c *ResponsesConversationRef) UnmarshalJSON(data []byte) error { } // MarshalJSON preserves whether the conversation was originally supplied as a -// string or object. Programmatic values default to the compact string ID form. +// string or object. The ID field is authoritative so callers can update or +// clear a decoded reference without leaking the original raw value. func (c ResponsesConversationRef) MarshalJSON() ([]byte, error) { trimmed := bytes.TrimSpace(c.Raw) + if c.ID == "" { + return []byte("null"), nil + } if len(trimmed) > 0 { - if c.ID == "" { - return cloneRawMessage(trimmed), nil - } switch trimmed[0] { case '"': return json.Marshal(c.ID) case '{': var obj map[string]any if err := json.Unmarshal(trimmed, &obj); err != nil { - return cloneRawMessage(trimmed), nil + return nil, err } obj["id"] = c.ID return json.Marshal(obj) default: - return cloneRawMessage(trimmed), nil + return nil, fmt.Errorf("conversation raw must be a string or object") } } - if c.ID != "" { - return json.Marshal(c.ID) - } - return []byte("null"), nil + return json.Marshal(c.ID) } // MarshalJSON preserves dynamic input payloads while supporting Swagger-only schema fields. diff --git a/internal/core/responses_json_test.go b/internal/core/responses_json_test.go index ce280589..457347ea 100644 --- a/internal/core/responses_json_test.go +++ b/internal/core/responses_json_test.go @@ -102,19 +102,32 @@ func TestResponsesRequestUnmarshalJSON_PreservesToolCallingControls(t *testing.T func TestResponsesConversationRefMarshalJSON_UsesUpdatedID(t *testing.T) { tests := []struct { name string + id string raw string want string }{ { name: "string shape", + id: "conv_new", raw: `"conv_old"`, want: `"conv_new"`, }, { name: "object shape", + id: "conv_new", raw: `{"id":"conv_old","metadata":{"team":"alpha"}}`, want: `{"id":"conv_new","metadata":{"team":"alpha"}}`, }, + { + name: "clear string shape", + raw: `"conv_old"`, + want: `null`, + }, + { + name: "clear object shape", + raw: `{"id":"conv_old","metadata":{"team":"alpha"}}`, + want: `null`, + }, } for _, tt := range tests { @@ -123,7 +136,7 @@ func TestResponsesConversationRefMarshalJSON_UsesUpdatedID(t *testing.T) { if err := json.Unmarshal([]byte(tt.raw), &ref); err != nil { t.Fatalf("json.Unmarshal() error = %v", err) } - ref.ID = "conv_new" + ref.ID = tt.id body, err := json.Marshal(ref) if err != nil { @@ -136,6 +149,17 @@ func TestResponsesConversationRefMarshalJSON_UsesUpdatedID(t *testing.T) { } } +func TestResponsesConversationRefMarshalJSON_InvalidRaw(t *testing.T) { + ref := ResponsesConversationRef{ + ID: "conv_new", + Raw: json.RawMessage(`{"id":`), + } + + if _, err := json.Marshal(ref); err == nil { + t.Fatal("json.Marshal() error = nil, want invalid raw conversation error") + } +} + func jsonEqual(a, b []byte) bool { var av any if err := json.Unmarshal(a, &av); err != nil { diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index 2060d7af..90d1f85a 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -294,6 +294,7 @@ func normalizeResponsesToolChoiceForChat(choice any) any { case "auto", "required", "none": return choiceType case "function": + // Function choices stay object-shaped, with legacy name-form normalized below. default: return choice } From d0da78ea10ffd3da92d29b592bdd00d3dd05b1a3 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Sat, 6 Jun 2026 00:57:16 -0400 Subject: [PATCH 12/12] docs(agents): fix conversation schema --- cmd/gomodel/docs/docs.go | 20 ++++++++++++++---- docs/openapi.json | 20 ++++++++++++++---- tools/openapi-postprocess.mjs | 19 +++++++++++++++++ tools/swagger-postprocess.mjs | 39 +++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 8 deletions(-) diff --git a/cmd/gomodel/docs/docs.go b/cmd/gomodel/docs/docs.go index 8eebea74..48e957d5 100644 --- a/cmd/gomodel/docs/docs.go +++ b/cmd/gomodel/docs/docs.go @@ -5837,7 +5837,10 @@ const docTemplate = `{ "context_management": {}, "conversation": { "description": "Conversation accepts either a conversation ID string or an object with id.", - "allOf": [ + "oneOf": [ + { + "type": "string" + }, { "$ref": "#/definitions/core.ResponsesConversationRef" } @@ -5999,7 +6002,10 @@ const docTemplate = `{ "context_management": {}, "conversation": { "description": "Conversation accepts either a conversation ID string or an object with id.", - "allOf": [ + "oneOf": [ + { + "type": "string" + }, { "$ref": "#/definitions/core.ResponsesConversationRef" } @@ -6146,7 +6152,10 @@ const docTemplate = `{ "id": { "type": "string" } - } + }, + "required": [ + "id" + ] }, "core.ResponsesError": { "type": "object", @@ -6198,7 +6207,10 @@ const docTemplate = `{ "context_management": {}, "conversation": { "description": "Conversation accepts either a conversation ID string or an object with id.", - "allOf": [ + "oneOf": [ + { + "type": "string" + }, { "$ref": "#/definitions/core.ResponsesConversationRef" } diff --git a/docs/openapi.json b/docs/openapi.json index 4d962060..9ae9a063 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -7912,7 +7912,10 @@ "context_management": {}, "conversation": { "description": "Conversation accepts either a conversation ID string or an object with id.", - "allOf": [ + "oneOf": [ + { + "type": "string" + }, { "$ref": "#/components/schemas/core.ResponsesConversationRef" } @@ -8085,7 +8088,10 @@ "context_management": {}, "conversation": { "description": "Conversation accepts either a conversation ID string or an object with id.", - "allOf": [ + "oneOf": [ + { + "type": "string" + }, { "$ref": "#/components/schemas/core.ResponsesConversationRef" } @@ -8243,7 +8249,10 @@ "id": { "type": "string" } - } + }, + "required": [ + "id" + ] }, "core.ResponsesError": { "type": "object", @@ -8295,7 +8304,10 @@ "context_management": {}, "conversation": { "description": "Conversation accepts either a conversation ID string or an object with id.", - "allOf": [ + "oneOf": [ + { + "type": "string" + }, { "$ref": "#/components/schemas/core.ResponsesConversationRef" } diff --git a/tools/openapi-postprocess.mjs b/tools/openapi-postprocess.mjs index 4570f6ab..5804e584 100644 --- a/tools/openapi-postprocess.mjs +++ b/tools/openapi-postprocess.mjs @@ -67,6 +67,23 @@ function applyResponseInputOneOf(name) { properties.input = input; } +function applyResponseConversationOneOf(name) { + const properties = schema(name).properties; + if (!properties?.conversation) { + throw new Error(`missing conversation property on schema: ${name}`); + } + + const conversation = {}; + if (properties.conversation.description) { + conversation.description = properties.conversation.description; + } + conversation.oneOf = clone([ + { type: "string" }, + { $ref: "#/components/schemas/core.ResponsesConversationRef" }, + ]); + properties.conversation = conversation; +} + function ensureResponsesInputElementSchema() { const schemas = spec.components?.schemas; if (!schemas) { @@ -420,6 +437,7 @@ ensureRequiredProperty("admin.deleteModelOverrideRequest", "selector"); ensureRequiredProperty("admin.upsertModelPricingOverrideRequest", "selector"); ensureRequiredProperty("admin.upsertModelPricingOverrideRequest", "pricing"); ensureRequiredProperty("admin.deleteModelPricingOverrideRequest", "selector"); +ensureRequiredProperty("core.ResponsesConversationRef", "id"); applyBudgetKeySchemaConstraints(); applyStringArrayPropertyBounds("admin.upsertModelOverrideRequest", "user_paths", 100, 1024); applyPricingSchemaConstraints(); @@ -450,6 +468,7 @@ for (const name of [ "core.ResponseCompactRequest", ]) { applyResponseInputOneOf(name); + applyResponseConversationOneOf(name); } const inputItemList = schema("core.ResponseInputItemListResponse"); diff --git a/tools/swagger-postprocess.mjs b/tools/swagger-postprocess.mjs index 6f75e2e4..d4da8485 100644 --- a/tools/swagger-postprocess.mjs +++ b/tools/swagger-postprocess.mjs @@ -33,6 +33,10 @@ function schema(name) { return result; } +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + function anthropicContentSchema() { return { oneOf: [ @@ -61,6 +65,33 @@ function stringOrFreeFormObjectSchema() { }; } +function applyResponseConversationOneOf(name) { + const properties = schema(name).properties; + if (!properties?.conversation) { + throw new Error(`missing conversation property on definition: ${name}`); + } + + const conversation = {}; + if (properties.conversation.description) { + conversation.description = properties.conversation.description; + } + conversation.oneOf = clone([ + { type: "string" }, + { $ref: "#/definitions/core.ResponsesConversationRef" }, + ]); + properties.conversation = conversation; +} + +function ensureRequiredProperty(schemaName, propertyName) { + const target = schema(schemaName); + if (!target.properties?.[propertyName]) { + throw new Error(`missing ${propertyName} property on definition: ${schemaName}`); + } + const required = new Set(target.required || []); + required.add(propertyName); + target.required = Array.from(required).sort(); +} + function ensureAnthropicContentBlockSchema() { if (!spec.definitions) { throw new Error("missing Swagger definitions"); @@ -91,6 +122,14 @@ function applyAnthropicMessageSchemas() { } applyAnthropicMessageSchemas(); +ensureRequiredProperty("core.ResponsesConversationRef", "id"); +for (const name of [ + "core.ResponsesRequest", + "core.ResponseInputTokensRequest", + "core.ResponseCompactRequest", +]) { + applyResponseConversationOneOf(name); +} let rendered = JSON.stringify(spec, null, 4); rendered = rendered.replace(