diff --git a/apps/docs/content/docs/llm-sdk/generate-text.mdx b/apps/docs/content/docs/llm-sdk/generate-text.mdx index ea6deb0..89c417f 100644 --- a/apps/docs/content/docs/llm-sdk/generate-text.mdx +++ b/apps/docs/content/docs/llm-sdk/generate-text.mdx @@ -42,9 +42,23 @@ const result = await generateText({ temperature: 0.7, maxTokens: 4096, signal: abortController.signal, + + // Optional: Structured JSON output (provider-translated) + responseFormat: { + type: 'json_schema', + json_schema: { + name: 'response', + schema: { type: 'object', properties: { ... }, required: [...] }, + strict: true, + }, + }, }); ``` + + `responseFormat` works across OpenAI, Anthropic, Google, Azure, xAI, Together, Fireworks, OpenRouter, and Ollama — each adapter translates to the provider's native field. See [Structured Output](/docs/llm-sdk/structured-output) for per-provider details and gotchas. + + --- ## Response Object diff --git a/apps/docs/content/docs/llm-sdk/meta.json b/apps/docs/content/docs/llm-sdk/meta.json index 16bae7a..b892436 100644 --- a/apps/docs/content/docs/llm-sdk/meta.json +++ b/apps/docs/content/docs/llm-sdk/meta.json @@ -1,5 +1,5 @@ { "title": "LLM SDK", "icon": "AiChip1", - "pages": ["generate-text", "stream-text", "tools"] + "pages": ["generate-text", "stream-text", "structured-output", "tools"] } diff --git a/apps/docs/content/docs/llm-sdk/stream-text.mdx b/apps/docs/content/docs/llm-sdk/stream-text.mdx index 17ef71e..49567dd 100644 --- a/apps/docs/content/docs/llm-sdk/stream-text.mdx +++ b/apps/docs/content/docs/llm-sdk/stream-text.mdx @@ -54,9 +54,23 @@ const result = await streamText({ // Optional: Generation settings temperature: 0.7, maxTokens: 4096, + + // Optional: Structured JSON output (provider-translated) + responseFormat: { + type: 'json_schema', + json_schema: { + name: 'response', + schema: { type: 'object', properties: { ... }, required: [...] }, + strict: true, + }, + }, }); ``` + + `responseFormat` works across all supported providers — each adapter translates to the provider's native field. See [Structured Output](/docs/llm-sdk/structured-output) for per-provider details and gotchas. + + --- ## Response Object diff --git a/apps/docs/content/docs/llm-sdk/structured-output.mdx b/apps/docs/content/docs/llm-sdk/structured-output.mdx new file mode 100644 index 0000000..7164e77 --- /dev/null +++ b/apps/docs/content/docs/llm-sdk/structured-output.mdx @@ -0,0 +1,219 @@ +--- +title: Structured Output +description: Get JSON-schema-validated responses from any provider +--- + +import { Callout } from 'fumadocs-ui/components/callout'; +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +Pass `responseFormat` to `generateText()` or `streamText()` to get +JSON-schema-validated responses. The SDK translates the unified shape to +each provider's native API — works the same whether you're on OpenAI, +Anthropic, Google, Azure, xAI, Together, Fireworks, OpenRouter, or Ollama. + +```ts +import { generateText } from '@yourgpt/llm-sdk'; +import { openai } from '@yourgpt/llm-sdk/openai'; + +const result = await generateText({ + model: openai('gpt-4o'), + prompt: 'List the top 3 fastest land animals.', + responseFormat: { + type: 'json_schema', + json_schema: { + name: 'animals_response', + schema: { + type: 'object', + properties: { + animals: { + type: 'array', + items: { + type: 'object', + properties: { + name: { type: 'string' }, + top_speed_kmh: { type: 'number' }, + }, + required: ['name', 'top_speed_kmh'], + }, + }, + }, + required: ['animals'], + }, + strict: true, + }, + }, +}); + +const data = JSON.parse(result.text); +// → { animals: [{ name: 'Cheetah', top_speed_kmh: 120 }, ...] } +``` + +--- + +## ResponseFormat shape + +The unified type uses OpenAI's `response_format` shape — callers who already +write `response_format` for OpenAI can pass it through unchanged. + +```ts +type ResponseFormat = + | { type: 'json_object' } + | { + type: 'json_schema'; + json_schema: { + name: string; + schema: Record; // JSON Schema + strict?: boolean; // default: true + }; + }; +``` + +- `type: 'json_object'` — free-form JSON, no schema enforcement. Adapters + that don't have a native "JSON mode without schema" (Anthropic) inject a + system-prompt suffix asking for JSON instead. +- `type: 'json_schema'` — schema-validated output. Recommended. + +--- + +## Per-provider translation + +Each adapter translates `responseFormat` to its provider's native field: + +| Provider | Native field | +|---|---| +| OpenAI Chat / Azure / xAI / Together / Fireworks / OpenRouter | `response_format` | +| OpenAI Responses API | `text.format` (different shape) | +| Anthropic Claude 3.5+ | `output_config.format` | +| Google Gemini | `responseJsonSchema` | +| Ollama 0.5+ | `format` | + +You don't need to think about this — the SDK handles it. The notes below +matter only if you hit edge cases. + +--- + +## Provider gotchas + +### Anthropic — schema sanitization + +Anthropic's structured-output schema subset is narrower than OpenAI's. The +adapter automatically strips keys Anthropic rejects so your call doesn't 400: + +- **Stripped:** `minimum`, `maximum`, `exclusiveMinimum`, `exclusiveMaximum`, + `multipleOf`, `minLength`, `maxLength`, `minItems`, `maxItems`, + `minProperties`, `maxProperties`, `pattern`, `$schema` +- **Converted:** `oneOf` → `anyOf` (Anthropic accepts the latter, not the former) +- **Forced:** `additionalProperties: false` on every object + +If you rely on numeric or length constraints for validation, do that +client-side after `JSON.parse()` rather than encoding it in the schema. + + + Anthropic's `output_config.format` is GA on Claude API and AWS Bedrock for + Claude 3.5 / 3.7 / 4 series. It is NOT available on Google Vertex AI. Older + Claude 3 base models (`claude-3-opus-20240229` etc.) are not supported either. + Tracking: [issue #96](https://github.com/YourGPT/copilot-sdk/issues/96). + + +### Google Gemini — OpenAPI subset + +Gemini's `responseJsonSchema` accepts an OpenAPI 3.0 subset. The adapter +strips keys Gemini doesn't recognize: + +- **Stripped:** `oneOf`, `anyOf`, `$ref`, `$defs`, `definitions`, `pattern`, + `$schema`, `additionalProperties` + +Schemas with discriminated unions or shared definitions need to be inlined +before passing to Gemini. + +### xAI — `additionalProperties` default + +xAI inverts OpenAI's default: `additionalProperties` defaults to `false` and +must be explicitly set `true` if you want extra properties allowed. The +adapter passes your schema through unchanged, so be explicit. + +### Ollama — local only + +Ollama's `format` field requires Ollama v0.5+ for schema-constrained output +(string `"json"` works on older versions for free-form JSON). Ollama Cloud +does not support structured outputs at the time of writing. + +--- + +## Capability gate + +Each model in the registry carries a `supportsJsonMode` capability flag. +When you pass `responseFormat` to a model that doesn't support it, the SDK +logs a warning: + +``` +[llm-sdk] anthropic/claude-3-haiku-20240307 does not support structured +output (responseFormat); the request will be sent but the provider may +ignore it. +``` + +This is a warning, not an error — the request still goes through. Switch +to a supported model (e.g. `claude-3-5-sonnet-latest`) or open an issue if +you need fallback behavior. + +--- + +## Reasoning models — token semantics + +For OpenAI reasoning models (`o1`, `o3`, `o4`, `gpt-5.x`): + +- `maxTokens` is internally translated to `max_completion_tokens` +- `temperature` is silently dropped (these models reject it) +- `max_completion_tokens` includes BOTH reasoning tokens AND visible output + tokens — set generously (`maxTokens: 4000+`) or you may see truncated + responses + +```ts +const result = await generateText({ + model: openai('o3-mini'), + prompt: 'Solve: ...', + maxTokens: 4000, // → max_completion_tokens internally + temperature: 0.7, // → silently dropped + responseFormat: { ... }, +}); +``` + +--- + +## Fallback chains + +`responseFormat` works through fallback chains transparently. Each provider +in the chain receives the schema in its native format: + +```ts +import { createFallbackChain } from '@yourgpt/llm-sdk/fallback'; +import { createOpenAI } from '@yourgpt/llm-sdk/openai'; +import { createAnthropic } from '@yourgpt/llm-sdk/anthropic'; + +const chain = createFallbackChain({ + models: [ + openai.languageModel('gpt-4o'), + anthropic.languageModel('claude-3-5-sonnet-latest'), + ], + strategy: 'priority', +}); + +// Same responseFormat works on either hop +const result = await chain.chat({ + messages: [...], + config: { + responseFormat: { type: 'json_schema', json_schema: { ... } }, + }, +}); +``` + +A working end-to-end demo lives in `examples/fallback-demo` — see the +`/chat/structured` route. + +--- + +## Next Steps + +- [generateText()](/docs/llm-sdk/generate-text) — full text generation API +- [streamText()](/docs/llm-sdk/stream-text) — streaming variant +- [Tools](/docs/llm-sdk/tools) — function calling (orthogonal to structured output) diff --git a/examples/fallback-demo/src/index.ts b/examples/fallback-demo/src/index.ts index dc5a6aa..027530d 100644 --- a/examples/fallback-demo/src/index.ts +++ b/examples/fallback-demo/src/index.ts @@ -345,6 +345,76 @@ app.post("/chat/retry-test", async (req, res) => { } }); +// ─── Route 9: Structured output (responseFormat) ───────────────────────────── +// +// Exercises the unified `responseFormat` field across an OpenAI → Anthropic → +// Google fallback chain. Each adapter translates the OpenAI-shape JSON schema +// to its provider's native structured-output API (`response_format`, +// `output_config.format`, `responseJsonSchema`). +// +// Test: +// curl -s -X POST http://localhost:3000/chat/structured \ +// -H "Content-Type: application/json" \ +// -d '{"messages":[{"role":"user","content":"List the top 3 fastest land animals with their top speed in km/h."}]}' + +const google = createOpenAI({ + apiKey: process.env.GOOGLE_API_KEY, + baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai/", +}); + +const ANIMALS_SCHEMA = { + type: "object", + properties: { + animals: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string" }, + top_speed_kmh: { type: "number" }, + }, + required: ["name", "top_speed_kmh"], + }, + }, + }, + required: ["animals"], +} as const; + +const structuredRuntime = createRuntime({ + adapter: createFallbackChain({ + models: [ + openai.languageModel("gpt-4o"), + anthropic.languageModel("claude-3-5-sonnet-latest"), + google.languageModel("gemini-2.0-flash"), + ], + strategy: "priority", + onFallback: onFallbackLog("structured"), + }), + systemPrompt: "You return data as JSON matching the requested schema.", +}); + +app.post("/chat/structured", async (req, res) => { + try { + const result = await structuredRuntime.chat({ + ...req.body, + config: { + ...req.body.config, + responseFormat: { + type: "json_schema", + json_schema: { + name: "animals_response", + schema: ANIMALS_SCHEMA, + strict: true, + }, + }, + }, + }); + res.json(result); + } catch (err) { + handleError(err, res); + } +}); + // ─── Route 7: Tools + FORCED FALLBACK (dead primary) ───────────────────────── // // Same tools, but primary is a dead URL. @@ -416,4 +486,7 @@ app.listen(PORT, () => { console.log( " POST /chat/retry-test — Retries dead model 2x before falling back to Claude", ); + console.log( + " POST /chat/structured — JSON-schema response across OpenAI → Claude → Gemini", + ); }); diff --git a/packages/llm-sdk/src/adapters/anthropic.ts b/packages/llm-sdk/src/adapters/anthropic.ts index 6c84361..89093c6 100644 --- a/packages/llm-sdk/src/adapters/anthropic.ts +++ b/packages/llm-sdk/src/adapters/anthropic.ts @@ -15,6 +15,7 @@ import { formatMessagesForAnthropic, messageToAnthropicContent, logProviderPayload, + toAnthropicOutputConfig, type AnthropicContentBlock, } from "./base"; @@ -377,8 +378,14 @@ export class AnthropicAdapter implements LLMAdapter { options: Record; messages: Array>; } { - // Extract system message - const systemMessage = request.systemPrompt || ""; + // Extract system message; Anthropic has no schema-less JSON mode, so for + // `responseFormat.type === "json_object"` we coerce via a system suffix. + const responseFormat = request.config?.responseFormat; + const jsonObjectSuffix = + responseFormat?.type === "json_object" + ? "\n\nRespond with a single JSON object and no other text." + : ""; + const systemMessage = (request.systemPrompt || "") + jsonObjectSuffix; // Use raw messages if provided (for agent loop with tool calls) let messages: Array>; @@ -505,6 +512,14 @@ export class AnthropicAdapter implements LLMAdapter { options.server_tool_configuration = serverToolConfiguration; } + // Anthropic structured output (`output_config.format`) — GA on Claude API + // and Bedrock as of late 2025. Vertex AI does not support it; users on + // Vertex should use a forced-tool pattern via `actions` + `toolChoice`. + const outputConfig = toAnthropicOutputConfig(responseFormat); + if (outputConfig) { + options.output_config = outputConfig; + } + // Add thinking configuration if enabled if (this.config.thinking?.type === "enabled") { options.thinking = { diff --git a/packages/llm-sdk/src/adapters/azure.ts b/packages/llm-sdk/src/adapters/azure.ts index c0a7d53..63e16c4 100644 --- a/packages/llm-sdk/src/adapters/azure.ts +++ b/packages/llm-sdk/src/adapters/azure.ts @@ -19,6 +19,7 @@ import { formatMessagesForOpenAI, formatTools, logProviderPayload, + toOpenAIResponseFormat, } from "./base"; // ============================================ @@ -188,6 +189,7 @@ export class AzureAdapter implements LLMAdapter { tools, temperature: request.config?.temperature ?? this.config.temperature, max_tokens: request.config?.maxTokens ?? this.config.maxTokens, + response_format: toOpenAIResponseFormat(request.config?.responseFormat), stream: true, }; logProviderPayload("azure", "request payload", payload, request.debug); @@ -316,6 +318,7 @@ export class AzureAdapter implements LLMAdapter { tools, temperature: request.config?.temperature ?? this.config.temperature, max_tokens: request.config?.maxTokens ?? this.config.maxTokens, + response_format: toOpenAIResponseFormat(request.config?.responseFormat), }; logProviderPayload("azure", "request payload", payload, request.debug); const response = await client.chat.completions.create(payload); diff --git a/packages/llm-sdk/src/adapters/base.ts b/packages/llm-sdk/src/adapters/base.ts index dca81e2..0a38666 100644 --- a/packages/llm-sdk/src/adapters/base.ts +++ b/packages/llm-sdk/src/adapters/base.ts @@ -4,6 +4,7 @@ import type { ActionDefinition, StreamEvent, LLMConfig, + ResponseFormat, ToolDefinition, WebSearchConfig, ProviderToolRuntimeOptions, @@ -17,6 +18,7 @@ export interface RequestLLMConfig { model?: string; temperature?: number; maxTokens?: number; + responseFormat?: ResponseFormat; } /** @@ -285,6 +287,172 @@ export function normalizeObjectJsonSchema( return normalized; } +/** + * Newer OpenAI model families (o1/o3/o4 reasoning, gpt-5.x) require + * `max_completion_tokens` instead of `max_tokens` and reject `temperature` + * on the Chat Completions endpoint. + */ +export function isOpenAIReasoningModel(modelId: string | undefined): boolean { + if (!modelId) return false; + return /^(o1|o3|o4|gpt-5)/i.test(modelId); +} + +/** + * Build the token-limit + temperature fields for a Chat Completions payload, + * accounting for the o-series / gpt-5 parameter rename. + */ +export function buildOpenAITokenParams( + modelId: string | undefined, + maxTokens: number | undefined, + temperature: number | undefined, +): Record { + if (isOpenAIReasoningModel(modelId)) { + return { max_completion_tokens: maxTokens }; + } + return { max_tokens: maxTokens, temperature }; +} + +/** + * Recursively walk a JSON Schema and drop keys the provider rejects. + */ +function stripSchemaKeys( + schema: unknown, + keysToDrop: ReadonlySet, + options: { + forceAdditionalPropertiesFalse?: boolean; + renameKeys?: Record; + } = {}, +): unknown { + if (Array.isArray(schema)) { + return schema.map((item) => stripSchemaKeys(item, keysToDrop, options)); + } + if (!schema || typeof schema !== "object") return schema; + + const out: Record = {}; + for (const [key, value] of Object.entries( + schema as Record, + )) { + if (keysToDrop.has(key)) continue; + const renamed = options.renameKeys?.[key] ?? key; + out[renamed] = stripSchemaKeys(value, keysToDrop, options); + } + + if (options.forceAdditionalPropertiesFalse && out.type === "object") { + out.additionalProperties = false; + } + return out; +} + +/** OpenAI Chat Completions `response_format` payload. */ +export function toOpenAIResponseFormat( + rf: ResponseFormat | undefined, +): Record | undefined { + if (!rf) return undefined; + if (rf.type === "json_object") return { type: "json_object" }; + return { + type: "json_schema", + json_schema: { + name: rf.json_schema.name, + schema: normalizeObjectJsonSchema(rf.json_schema.schema), + strict: rf.json_schema.strict ?? true, + }, + }; +} + +/** OpenAI Responses API `text.format` payload (different shape than Chat Completions). */ +export function toOpenAIResponsesTextFormat( + rf: ResponseFormat | undefined, +): Record | undefined { + if (!rf || rf.type !== "json_schema") return undefined; + return { + type: "json_schema", + name: rf.json_schema.name, + schema: normalizeObjectJsonSchema(rf.json_schema.schema), + strict: rf.json_schema.strict ?? true, + }; +} + +/** + * Anthropic `output_config.format` payload. + * + * Anthropic's structured-output schema subset is narrower than OpenAI's: + * no numeric (minimum/maximum/multipleOf) or length (minLength/maxLength) + * constraints, and `additionalProperties: false` is required on every object. + */ +const ANTHROPIC_UNSUPPORTED_KEYS: ReadonlySet = new Set([ + "minimum", + "maximum", + "exclusiveMinimum", + "exclusiveMaximum", + "multipleOf", + "minLength", + "maxLength", + "minItems", + "maxItems", + "minProperties", + "maxProperties", + "pattern", + "$schema", +]); + +export function toAnthropicOutputConfig( + rf: ResponseFormat | undefined, +): Record | undefined { + if (!rf || rf.type !== "json_schema") return undefined; + // Anthropic accepts `anyOf` but rejects `oneOf` — convert rather than strip, + // otherwise discriminated-union schemas silently lose their union semantics. + const schema = stripSchemaKeys( + rf.json_schema.schema, + ANTHROPIC_UNSUPPORTED_KEYS, + { + forceAdditionalPropertiesFalse: true, + renameKeys: { oneOf: "anyOf" }, + }, + ) as Record; + return { + format: { + type: "json_schema", + schema, + }, + }; +} + +/** + * Gemini `responseJsonSchema` payload. + * + * Gemini accepts an OpenAPI 3.0 subset and silently ignores unknown keywords; + * `oneOf`, `anyOf`, `$ref`, and `pattern` are not supported. + */ +const GEMINI_UNSUPPORTED_KEYS: ReadonlySet = new Set([ + "oneOf", + "anyOf", + "$ref", + "$defs", + "definitions", + "pattern", + "$schema", + "additionalProperties", +]); + +export function toGeminiSchema( + rf: ResponseFormat | undefined, +): Record | undefined { + if (!rf || rf.type !== "json_schema") return undefined; + return stripSchemaKeys( + rf.json_schema.schema, + GEMINI_UNSUPPORTED_KEYS, + ) as Record; +} + +/** Ollama `format` field — `"json"` for free-form, schema object for constrained. */ +export function toOllamaFormat( + rf: ResponseFormat | undefined, +): string | Record | undefined { + if (!rf) return undefined; + if (rf.type === "json_object") return "json"; + return rf.json_schema.schema; +} + /** * Convert actions to OpenAI tool format */ diff --git a/packages/llm-sdk/src/adapters/google.ts b/packages/llm-sdk/src/adapters/google.ts index 83d44e1..373a93f 100644 --- a/packages/llm-sdk/src/adapters/google.ts +++ b/packages/llm-sdk/src/adapters/google.ts @@ -18,7 +18,7 @@ import type { ChatCompletionRequest, CompletionResult, } from "./base"; -import { formatTools, logProviderPayload } from "./base"; +import { formatTools, logProviderPayload, toGeminiSchema } from "./base"; // ============================================ // Types @@ -372,6 +372,15 @@ export class GoogleAdapter implements LLMAdapter { // Emit message start yield { type: "message:start", id: messageId }; + const responseFormat = request.config?.responseFormat; + const geminiSchema = toGeminiSchema(responseFormat); + const responseFormatGenConfig: Record = responseFormat + ? { + responseMimeType: "application/json", + ...(geminiSchema ? { responseJsonSchema: geminiSchema } : {}), + } + : {}; + try { logProviderPayload( "google", @@ -386,6 +395,7 @@ export class GoogleAdapter implements LLMAdapter { generationConfig: { temperature: request.config?.temperature ?? this.config.temperature, maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens, + ...responseFormatGenConfig, }, messageParts: mergedContents[mergedContents.length - 1]?.parts, }, @@ -401,6 +411,7 @@ export class GoogleAdapter implements LLMAdapter { generationConfig: { temperature: request.config?.temperature ?? this.config.temperature, maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens, + ...responseFormatGenConfig, }, }); @@ -647,6 +658,15 @@ export class GoogleAdapter implements LLMAdapter { const tools = formatToolsForGemini(request.actions); + const responseFormat = request.config?.responseFormat; + const geminiSchema = toGeminiSchema(responseFormat); + const responseFormatGenConfig: Record = responseFormat + ? { + responseMimeType: "application/json", + ...(geminiSchema ? { responseJsonSchema: geminiSchema } : {}), + } + : {}; + const payload = { model: modelId, history: mergedContents.slice(0, -1), @@ -657,6 +677,7 @@ export class GoogleAdapter implements LLMAdapter { generationConfig: { temperature: request.config?.temperature ?? this.config.temperature, maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens, + ...responseFormatGenConfig, }, messageParts: mergedContents[mergedContents.length - 1]?.parts, }; @@ -670,6 +691,7 @@ export class GoogleAdapter implements LLMAdapter { generationConfig: { temperature: request.config?.temperature ?? this.config.temperature, maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens, + ...responseFormatGenConfig, }, }); diff --git a/packages/llm-sdk/src/adapters/ollama.ts b/packages/llm-sdk/src/adapters/ollama.ts index 866a82c..88085ed 100644 --- a/packages/llm-sdk/src/adapters/ollama.ts +++ b/packages/llm-sdk/src/adapters/ollama.ts @@ -5,7 +5,12 @@ import type { } from "../core/stream-events"; import { generateMessageId, generateToolCallId } from "../core/utils"; import type { LLMAdapter, ChatCompletionRequest } from "./base"; -import { formatMessages, formatTools, logProviderPayload } from "./base"; +import { + formatMessages, + formatTools, + logProviderPayload, + toOllamaFormat, +} from "./base"; import type { OllamaModelOptions } from "../providers/types"; /** @@ -288,12 +293,14 @@ export class OllamaAdapter implements LLMAdapter { Object.assign(ollamaOptions, this.config.options); } + const ollamaFormat = toOllamaFormat(request.config?.responseFormat); const payload = { model: request.config?.model || this.model, messages, tools, stream: true, options: ollamaOptions, + ...(ollamaFormat !== undefined ? { format: ollamaFormat } : {}), }; logProviderPayload("ollama", "request payload", payload, request.debug); const response = await fetch(`${this.baseUrl}/api/chat`, { diff --git a/packages/llm-sdk/src/adapters/openai.ts b/packages/llm-sdk/src/adapters/openai.ts index d0795ec..114ecec 100644 --- a/packages/llm-sdk/src/adapters/openai.ts +++ b/packages/llm-sdk/src/adapters/openai.ts @@ -12,10 +12,13 @@ import type { CompletionResult, } from "./base"; import { + buildOpenAITokenParams, formatMessagesForOpenAI, formatTools, logProviderPayload, normalizeObjectJsonSchema, + toOpenAIResponseFormat, + toOpenAIResponsesTextFormat, } from "./base"; /** @@ -225,6 +228,9 @@ export class OpenAIAdapter implements LLMAdapter { ): Promise { const client = await this.getClient(); const openaiToolOptions = request.providerToolOptions?.openai; + const responsesTextFormat = toOpenAIResponsesTextFormat( + request.config?.responseFormat, + ); const payload = { model: request.config?.model || this.model, instructions: request.systemPrompt, @@ -239,6 +245,7 @@ export class OpenAIAdapter implements LLMAdapter { parallel_tool_calls: openaiToolOptions?.parallelToolCalls, temperature: request.config?.temperature ?? this.config.temperature, max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens, + ...(responsesTextFormat ? { text: { format: responsesTextFormat } } : {}), stream: false, }; @@ -427,15 +434,20 @@ export class OpenAIAdapter implements LLMAdapter { }, } : openaiToolOptions?.toolChoice; + const modelIdForPayload = request.config?.model || this.model; const payload = { - model: request.config?.model || this.model, + model: modelIdForPayload, messages, tools: tools.length > 0 ? tools : undefined, tool_choice: tools.length > 0 ? toolChoice : undefined, parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : undefined, - temperature: request.config?.temperature ?? this.config.temperature, - max_tokens: request.config?.maxTokens ?? this.config.maxTokens, + ...buildOpenAITokenParams( + modelIdForPayload, + request.config?.maxTokens ?? this.config.maxTokens, + request.config?.temperature ?? this.config.temperature, + ), + response_format: toOpenAIResponseFormat(request.config?.responseFormat), stream: true, stream_options: { include_usage: true }, }; @@ -654,15 +666,20 @@ export class OpenAIAdapter implements LLMAdapter { } : openaiToolOptions?.toolChoice; + const modelIdForCompletePayload = request.config?.model || this.model; const payload = { - model: request.config?.model || this.model, + model: modelIdForCompletePayload, messages, tools: tools.length > 0 ? tools : undefined, tool_choice: tools.length > 0 ? toolChoice : undefined, parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : undefined, - temperature: request.config?.temperature ?? this.config.temperature, - max_tokens: request.config?.maxTokens ?? this.config.maxTokens, + ...buildOpenAITokenParams( + modelIdForCompletePayload, + request.config?.maxTokens ?? this.config.maxTokens, + request.config?.temperature ?? this.config.temperature, + ), + response_format: toOpenAIResponseFormat(request.config?.responseFormat), stream: false, }; diff --git a/packages/llm-sdk/src/core/generate-text.ts b/packages/llm-sdk/src/core/generate-text.ts index 0d9a099..6249969 100644 --- a/packages/llm-sdk/src/core/generate-text.ts +++ b/packages/llm-sdk/src/core/generate-text.ts @@ -47,6 +47,12 @@ export async function generateText( ): Promise { const { model, tools, maxSteps = 1, signal } = params; + if (params.responseFormat && model.capabilities.supportsJsonMode === false) { + console.warn( + `[llm-sdk] ${model.provider}/${model.modelId} does not support structured output (responseFormat); the request will be sent but the provider may ignore it.`, + ); + } + // Build initial messages let messages = buildMessages(params); const steps: GenerateStep[] = []; @@ -71,6 +77,7 @@ export async function generateText( tools: formattedTools, temperature: params.temperature, maxTokens: params.maxTokens, + responseFormat: params.responseFormat, signal, }); diff --git a/packages/llm-sdk/src/core/stream-events.ts b/packages/llm-sdk/src/core/stream-events.ts index 808fb87..d89f5be 100644 --- a/packages/llm-sdk/src/core/stream-events.ts +++ b/packages/llm-sdk/src/core/stream-events.ts @@ -286,12 +286,31 @@ export type StreamEvent = | ThreadCreatedEvent | DoneEvent; +/** + * Structured-output / JSON-mode request format. + * + * Uses OpenAI's `response_format` shape as the unified surface; each adapter + * translates to its provider's native field (Anthropic `output_config`, + * Gemini `responseJsonSchema`, Ollama `format`, etc.). + */ +export type ResponseFormat = + | { type: "json_object" } + | { + type: "json_schema"; + json_schema: { + name: string; + schema: Record; + strict?: boolean; + }; + }; + /** * LLM configuration */ export interface LLMConfig { temperature?: number; maxTokens?: number; + responseFormat?: ResponseFormat; } /** diff --git a/packages/llm-sdk/src/core/stream-text.ts b/packages/llm-sdk/src/core/stream-text.ts index 41ab6f2..8904d4f 100644 --- a/packages/llm-sdk/src/core/stream-text.ts +++ b/packages/llm-sdk/src/core/stream-text.ts @@ -50,6 +50,12 @@ export async function streamText( ): Promise { const { model, tools, maxSteps = 1, signal } = params; + if (params.responseFormat && model.capabilities.supportsJsonMode === false) { + console.warn( + `[llm-sdk] ${model.provider}/${model.modelId} does not support structured output (responseFormat); the request will be sent but the provider may ignore it.`, + ); + } + // State for collecting results let fullText = ""; let finalUsage: TokenUsage = { @@ -90,6 +96,7 @@ export async function streamText( tools: formattedTools, temperature: params.temperature, maxTokens: params.maxTokens, + responseFormat: params.responseFormat, signal, })) { switch (chunk.type) { diff --git a/packages/llm-sdk/src/core/types.ts b/packages/llm-sdk/src/core/types.ts index ae9dad7..53a16e9 100644 --- a/packages/llm-sdk/src/core/types.ts +++ b/packages/llm-sdk/src/core/types.ts @@ -207,6 +207,8 @@ export interface DoGenerateParams { temperature?: number; /** Maximum tokens to generate */ maxTokens?: number; + /** Structured-output / JSON-mode request format (provider-translated) */ + responseFormat?: import("./stream-events").ResponseFormat; /** Abort signal */ signal?: AbortSignal; } @@ -312,6 +314,8 @@ export interface GenerateTextParams { temperature?: number; /** Maximum tokens to generate */ maxTokens?: number; + /** Structured-output / JSON-mode request format */ + responseFormat?: import("./stream-events").ResponseFormat; /** Abort signal */ signal?: AbortSignal; } diff --git a/packages/llm-sdk/src/providers/anthropic/index.ts b/packages/llm-sdk/src/providers/anthropic/index.ts index 9e9c3e7..75e9021 100644 --- a/packages/llm-sdk/src/providers/anthropic/index.ts +++ b/packages/llm-sdk/src/providers/anthropic/index.ts @@ -157,7 +157,8 @@ export function createAnthropic( "image/gif", "image/webp", ], - supportsJsonMode: false, + // Native `output_config.format` — GA on Claude 3.5 and newer. + supportsJsonMode: true, supportsSystemMessages: true, }; }; diff --git a/packages/llm-sdk/src/providers/anthropic/provider.ts b/packages/llm-sdk/src/providers/anthropic/provider.ts index 4b48b6e..e802fe2 100644 --- a/packages/llm-sdk/src/providers/anthropic/provider.ts +++ b/packages/llm-sdk/src/providers/anthropic/provider.ts @@ -22,6 +22,7 @@ import type { FinishReason, CoreMessage, } from "../../core/types"; +import { toAnthropicOutputConfig } from "../../adapters/base"; // ============================================ // Model Definitions @@ -32,6 +33,12 @@ interface AnthropicModelConfig { tools: boolean; thinking: boolean; pdf: boolean; + /** + * Native structured-output (`output_config.format`) support — GA on Claude + * API and Bedrock as of late 2025 for Claude 3.5 and newer. Older Claude 3 + * base models must use a forced-tool fallback. + */ + jsonMode: boolean; maxTokens: number; } @@ -42,6 +49,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: true, pdf: true, + jsonMode: true, maxTokens: 200000, }, "claude-opus-4-20250514": { @@ -49,6 +57,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: true, pdf: true, + jsonMode: true, maxTokens: 200000, }, @@ -58,6 +67,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: true, pdf: true, + jsonMode: true, maxTokens: 200000, }, "claude-3-7-sonnet-latest": { @@ -65,6 +75,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: true, pdf: true, + jsonMode: true, maxTokens: 200000, }, @@ -74,6 +85,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: false, pdf: true, + jsonMode: true, maxTokens: 200000, }, "claude-3-5-sonnet-latest": { @@ -81,6 +93,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: false, pdf: true, + jsonMode: true, maxTokens: 200000, }, "claude-3-5-haiku-20241022": { @@ -88,6 +101,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: false, pdf: false, + jsonMode: true, maxTokens: 200000, }, "claude-3-5-haiku-latest": { @@ -95,6 +109,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: false, pdf: false, + jsonMode: true, maxTokens: 200000, }, @@ -104,6 +119,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: false, pdf: false, + jsonMode: false, maxTokens: 200000, }, "claude-3-sonnet-20240229": { @@ -111,6 +127,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: false, pdf: false, + jsonMode: false, maxTokens: 200000, }, "claude-3-haiku-20240307": { @@ -118,6 +135,7 @@ const ANTHROPIC_MODELS: Record = { tools: true, thinking: false, pdf: false, + jsonMode: false, maxTokens: 200000, }, }; @@ -175,7 +193,7 @@ export function anthropic( supportsVision: modelConfig.vision, supportsTools: modelConfig.tools, supportsStreaming: true, - supportsJsonMode: false, + supportsJsonMode: modelConfig.jsonMode, supportsThinking: modelConfig.thinking, supportsPDF: modelConfig.pdf, maxTokens: modelConfig.maxTokens, @@ -209,6 +227,11 @@ export function anthropic( }; } + const outputConfig = toAnthropicOutputConfig(params.responseFormat); + if (outputConfig) { + requestOptions.output_config = outputConfig; + } + const response = await client.messages.create(requestOptions); // Parse response @@ -266,6 +289,11 @@ export function anthropic( }; } + const outputConfig = toAnthropicOutputConfig(params.responseFormat); + if (outputConfig) { + requestOptions.output_config = outputConfig; + } + const stream = await client.messages.stream(requestOptions); let currentToolUse: { id: string; name: string; input: string } | null = diff --git a/packages/llm-sdk/src/providers/fireworks/provider.ts b/packages/llm-sdk/src/providers/fireworks/provider.ts index 78b3cdb..49d17de 100644 --- a/packages/llm-sdk/src/providers/fireworks/provider.ts +++ b/packages/llm-sdk/src/providers/fireworks/provider.ts @@ -25,6 +25,7 @@ import type { FinishReason, CoreMessage, } from "../../core/types"; +import { toOpenAIResponseFormat } from "../../adapters/base"; // ============================================ // Provider Options @@ -107,6 +108,11 @@ export function fireworks( requestBody.tools = params.tools; } + const responseFormat = toOpenAIResponseFormat(params.responseFormat); + if (responseFormat) { + requestBody.response_format = responseFormat; + } + const response = await client.chat.completions.create(requestBody); const choice = response.choices[0]; const message = choice.message; @@ -148,6 +154,11 @@ export function fireworks( requestBody.tools = params.tools; } + const responseFormat = toOpenAIResponseFormat(params.responseFormat); + if (responseFormat) { + requestBody.response_format = responseFormat; + } + const stream = await client.chat.completions.create(requestBody); // Track tool calls by index (Fireworks may repeat tc.id across chunks) diff --git a/packages/llm-sdk/src/providers/google/provider.ts b/packages/llm-sdk/src/providers/google/provider.ts index 2659ce6..6d55cf3 100644 --- a/packages/llm-sdk/src/providers/google/provider.ts +++ b/packages/llm-sdk/src/providers/google/provider.ts @@ -25,6 +25,7 @@ import type { FinishReason, CoreMessage, } from "../../core/types"; +import { toOpenAIResponseFormat } from "../../adapters/base"; // ============================================ // Model Definitions @@ -211,6 +212,7 @@ export function google( tools: params.tools as any, temperature: params.temperature, max_tokens: params.maxTokens, + response_format: toOpenAIResponseFormat(params.responseFormat), }); const choice = response.choices[0]; @@ -249,6 +251,7 @@ export function google( tools: params.tools as any, temperature: params.temperature, max_tokens: params.maxTokens, + response_format: toOpenAIResponseFormat(params.responseFormat), stream: true, }); diff --git a/packages/llm-sdk/src/providers/ollama/index.ts b/packages/llm-sdk/src/providers/ollama/index.ts index 91377e8..b87cc39 100644 --- a/packages/llm-sdk/src/providers/ollama/index.ts +++ b/packages/llm-sdk/src/providers/ollama/index.ts @@ -204,7 +204,8 @@ export function createOllama(config: OllamaProviderConfig = {}): AIProvider { supportedImageTypes: model.vision ? ["image/png", "image/jpeg", "image/gif"] : [], - supportsJsonMode: false, + // Ollama 0.5+ supports `format: "json"` and JSON-schema constrained output. + supportsJsonMode: true, supportsSystemMessages: true, }; }; diff --git a/packages/llm-sdk/src/providers/openai/provider.ts b/packages/llm-sdk/src/providers/openai/provider.ts index 4c6d6e8..fc9a153 100644 --- a/packages/llm-sdk/src/providers/openai/provider.ts +++ b/packages/llm-sdk/src/providers/openai/provider.ts @@ -26,6 +26,10 @@ import type { FinishReason, CoreMessage, } from "../../core/types"; +import { + buildOpenAITokenParams, + toOpenAIResponseFormat, +} from "../../adapters/base"; // ============================================ // Model Definitions @@ -194,8 +198,12 @@ export function openai( model: modelId, messages, tools: params.tools as any, - temperature: params.temperature, - max_tokens: params.maxTokens, + ...buildOpenAITokenParams( + modelId, + params.maxTokens, + params.temperature, + ), + response_format: toOpenAIResponseFormat(params.responseFormat), }); const choice = response.choices[0]; @@ -232,8 +240,12 @@ export function openai( model: modelId, messages, tools: params.tools as any, - temperature: params.temperature, - max_tokens: params.maxTokens, + ...buildOpenAITokenParams( + modelId, + params.maxTokens, + params.temperature, + ), + response_format: toOpenAIResponseFormat(params.responseFormat), stream: true, }); diff --git a/packages/llm-sdk/src/providers/openrouter/provider.ts b/packages/llm-sdk/src/providers/openrouter/provider.ts index f85d858..713ee90 100644 --- a/packages/llm-sdk/src/providers/openrouter/provider.ts +++ b/packages/llm-sdk/src/providers/openrouter/provider.ts @@ -25,6 +25,7 @@ import type { FinishReason, CoreMessage, } from "../../core/types"; +import { toOpenAIResponseFormat } from "../../adapters/base"; // ============================================ // Model Configuration @@ -170,6 +171,11 @@ export function openrouter( requestBody.provider = options.providerPreferences; } + const responseFormat = toOpenAIResponseFormat(params.responseFormat); + if (responseFormat) { + requestBody.response_format = responseFormat; + } + const response = await client.chat.completions.create(requestBody); const choice = response.choices[0]; @@ -221,6 +227,11 @@ export function openrouter( requestBody.provider = options.providerPreferences; } + const responseFormat = toOpenAIResponseFormat(params.responseFormat); + if (responseFormat) { + requestBody.response_format = responseFormat; + } + const stream = await client.chat.completions.create(requestBody); // Track current tool call being built diff --git a/packages/llm-sdk/src/providers/togetherai/provider.ts b/packages/llm-sdk/src/providers/togetherai/provider.ts index 7c1c718..e016cfa 100644 --- a/packages/llm-sdk/src/providers/togetherai/provider.ts +++ b/packages/llm-sdk/src/providers/togetherai/provider.ts @@ -29,6 +29,7 @@ import type { FinishReason, CoreMessage, } from "../../core/types"; +import { toOpenAIResponseFormat } from "../../adapters/base"; // ============================================ // Provider Options @@ -116,6 +117,11 @@ export function togetherai( requestBody.tools = params.tools; } + const responseFormat = toOpenAIResponseFormat(params.responseFormat); + if (responseFormat) { + requestBody.response_format = responseFormat; + } + const response = await client.chat.completions.create(requestBody); const choice = response.choices[0]; const message = choice.message; @@ -157,6 +163,11 @@ export function togetherai( requestBody.tools = params.tools; } + const responseFormat = toOpenAIResponseFormat(params.responseFormat); + if (responseFormat) { + requestBody.response_format = responseFormat; + } + const stream = await client.chat.completions.create(requestBody); // Track tool calls by index diff --git a/packages/llm-sdk/src/providers/xai/index.ts b/packages/llm-sdk/src/providers/xai/index.ts index 4bc8d00..0eca6df 100644 --- a/packages/llm-sdk/src/providers/xai/index.ts +++ b/packages/llm-sdk/src/providers/xai/index.ts @@ -180,7 +180,8 @@ export function createXAI(config: XAIProviderConfig = {}): AIProvider { supportedImageTypes: model.vision ? ["image/png", "image/jpeg", "image/gif", "image/webp"] : [], - supportsJsonMode: false, + // xAI accepts OpenAI-compatible `response_format` on grok-2-1212+. + supportsJsonMode: true, supportsSystemMessages: true, }; }; diff --git a/packages/llm-sdk/src/providers/xai/provider.ts b/packages/llm-sdk/src/providers/xai/provider.ts index ab773c6..959e135 100644 --- a/packages/llm-sdk/src/providers/xai/provider.ts +++ b/packages/llm-sdk/src/providers/xai/provider.ts @@ -25,6 +25,7 @@ import type { FinishReason, CoreMessage, } from "../../core/types"; +import { toOpenAIResponseFormat } from "../../adapters/base"; // ============================================ // Model Definitions @@ -136,7 +137,7 @@ export function xai( supportsVision: modelConfig.vision, supportsTools: modelConfig.tools, supportsStreaming: true, - supportsJsonMode: false, // xAI doesn't support JSON mode yet + supportsJsonMode: true, // OpenAI-compatible `response_format` supportsThinking: false, supportsPDF: false, maxTokens: modelConfig.maxTokens, @@ -156,6 +157,7 @@ export function xai( tools: params.tools as any, temperature: params.temperature, max_tokens: params.maxTokens, + response_format: toOpenAIResponseFormat(params.responseFormat), }); const choice = response.choices[0]; @@ -194,6 +196,7 @@ export function xai( tools: params.tools as any, temperature: params.temperature, max_tokens: params.maxTokens, + response_format: toOpenAIResponseFormat(params.responseFormat), stream: true, }); diff --git a/packages/llm-sdk/src/server/types.ts b/packages/llm-sdk/src/server/types.ts index a2c0d8e..d182309 100644 --- a/packages/llm-sdk/src/server/types.ts +++ b/packages/llm-sdk/src/server/types.ts @@ -1,6 +1,7 @@ import type { ActionDefinition, KnowledgeBaseConfig, + ResponseFormat, ToolDefinition, ToolProfile, WebSearchConfig, @@ -222,7 +223,11 @@ export interface ChatRequest { /** Bot ID (for cloud) */ botId?: string; /** LLM config overrides */ - config?: { temperature?: number; maxTokens?: number }; + config?: { + temperature?: number; + maxTokens?: number; + responseFormat?: ResponseFormat; + }; /** System prompt override */ systemPrompt?: string; /** Actions from client (legacy) */