diff --git a/apps/docs/content/docs/llm-sdk/generate-text.mdx b/apps/docs/content/docs/llm-sdk/generate-text.mdx
index ea6deb0..89c417f 100644
--- a/apps/docs/content/docs/llm-sdk/generate-text.mdx
+++ b/apps/docs/content/docs/llm-sdk/generate-text.mdx
@@ -42,9 +42,23 @@ const result = await generateText({
temperature: 0.7,
maxTokens: 4096,
signal: abortController.signal,
+
+ // Optional: Structured JSON output (provider-translated)
+ responseFormat: {
+ type: 'json_schema',
+ json_schema: {
+ name: 'response',
+ schema: { type: 'object', properties: { ... }, required: [...] },
+ strict: true,
+ },
+ },
});
```
+
+ `responseFormat` works across OpenAI, Anthropic, Google, Azure, xAI, Together, Fireworks, OpenRouter, and Ollama — each adapter translates to the provider's native field. See [Structured Output](/docs/llm-sdk/structured-output) for per-provider details and gotchas.
+
+
---
## Response Object
diff --git a/apps/docs/content/docs/llm-sdk/meta.json b/apps/docs/content/docs/llm-sdk/meta.json
index 16bae7a..b892436 100644
--- a/apps/docs/content/docs/llm-sdk/meta.json
+++ b/apps/docs/content/docs/llm-sdk/meta.json
@@ -1,5 +1,5 @@
{
"title": "LLM SDK",
"icon": "AiChip1",
- "pages": ["generate-text", "stream-text", "tools"]
+ "pages": ["generate-text", "stream-text", "structured-output", "tools"]
}
diff --git a/apps/docs/content/docs/llm-sdk/stream-text.mdx b/apps/docs/content/docs/llm-sdk/stream-text.mdx
index 17ef71e..49567dd 100644
--- a/apps/docs/content/docs/llm-sdk/stream-text.mdx
+++ b/apps/docs/content/docs/llm-sdk/stream-text.mdx
@@ -54,9 +54,23 @@ const result = await streamText({
// Optional: Generation settings
temperature: 0.7,
maxTokens: 4096,
+
+ // Optional: Structured JSON output (provider-translated)
+ responseFormat: {
+ type: 'json_schema',
+ json_schema: {
+ name: 'response',
+ schema: { type: 'object', properties: { ... }, required: [...] },
+ strict: true,
+ },
+ },
});
```
+
+ `responseFormat` works across all supported providers — each adapter translates to the provider's native field. See [Structured Output](/docs/llm-sdk/structured-output) for per-provider details and gotchas.
+
+
---
## Response Object
diff --git a/apps/docs/content/docs/llm-sdk/structured-output.mdx b/apps/docs/content/docs/llm-sdk/structured-output.mdx
new file mode 100644
index 0000000..7164e77
--- /dev/null
+++ b/apps/docs/content/docs/llm-sdk/structured-output.mdx
@@ -0,0 +1,219 @@
+---
+title: Structured Output
+description: Get JSON-schema-validated responses from any provider
+---
+
+import { Callout } from 'fumadocs-ui/components/callout';
+import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
+
+Pass `responseFormat` to `generateText()` or `streamText()` to get
+JSON-schema-validated responses. The SDK translates the unified shape to
+each provider's native API — works the same whether you're on OpenAI,
+Anthropic, Google, Azure, xAI, Together, Fireworks, OpenRouter, or Ollama.
+
+```ts
+import { generateText } from '@yourgpt/llm-sdk';
+import { openai } from '@yourgpt/llm-sdk/openai';
+
+const result = await generateText({
+ model: openai('gpt-4o'),
+ prompt: 'List the top 3 fastest land animals.',
+ responseFormat: {
+ type: 'json_schema',
+ json_schema: {
+ name: 'animals_response',
+ schema: {
+ type: 'object',
+ properties: {
+ animals: {
+ type: 'array',
+ items: {
+ type: 'object',
+ properties: {
+ name: { type: 'string' },
+ top_speed_kmh: { type: 'number' },
+ },
+ required: ['name', 'top_speed_kmh'],
+ },
+ },
+ },
+ required: ['animals'],
+ },
+ strict: true,
+ },
+ },
+});
+
+const data = JSON.parse(result.text);
+// → { animals: [{ name: 'Cheetah', top_speed_kmh: 120 }, ...] }
+```
+
+---
+
+## ResponseFormat shape
+
+The unified type uses OpenAI's `response_format` shape — callers who already
+write `response_format` for OpenAI can pass it through unchanged.
+
+```ts
+type ResponseFormat =
+ | { type: 'json_object' }
+ | {
+ type: 'json_schema';
+ json_schema: {
+ name: string;
+ schema: Record; // JSON Schema
+ strict?: boolean; // default: true
+ };
+ };
+```
+
+- `type: 'json_object'` — free-form JSON, no schema enforcement. Adapters
+ that don't have a native "JSON mode without schema" (Anthropic) inject a
+ system-prompt suffix asking for JSON instead.
+- `type: 'json_schema'` — schema-validated output. Recommended.
+
+---
+
+## Per-provider translation
+
+Each adapter translates `responseFormat` to its provider's native field:
+
+| Provider | Native field |
+|---|---|
+| OpenAI Chat / Azure / xAI / Together / Fireworks / OpenRouter | `response_format` |
+| OpenAI Responses API | `text.format` (different shape) |
+| Anthropic Claude 3.5+ | `output_config.format` |
+| Google Gemini | `responseJsonSchema` |
+| Ollama 0.5+ | `format` |
+
+You don't need to think about this — the SDK handles it. The notes below
+matter only if you hit edge cases.
+
+---
+
+## Provider gotchas
+
+### Anthropic — schema sanitization
+
+Anthropic's structured-output schema subset is narrower than OpenAI's. The
+adapter automatically strips keys Anthropic rejects so your call doesn't 400:
+
+- **Stripped:** `minimum`, `maximum`, `exclusiveMinimum`, `exclusiveMaximum`,
+ `multipleOf`, `minLength`, `maxLength`, `minItems`, `maxItems`,
+ `minProperties`, `maxProperties`, `pattern`, `$schema`
+- **Converted:** `oneOf` → `anyOf` (Anthropic accepts the latter, not the former)
+- **Forced:** `additionalProperties: false` on every object
+
+If you rely on numeric or length constraints for validation, do that
+client-side after `JSON.parse()` rather than encoding it in the schema.
+
+
+ Anthropic's `output_config.format` is GA on Claude API and AWS Bedrock for
+ Claude 3.5 / 3.7 / 4 series. It is NOT available on Google Vertex AI. Older
+ Claude 3 base models (`claude-3-opus-20240229` etc.) are not supported either.
+ Tracking: [issue #96](https://github.com/YourGPT/copilot-sdk/issues/96).
+
+
+### Google Gemini — OpenAPI subset
+
+Gemini's `responseJsonSchema` accepts an OpenAPI 3.0 subset. The adapter
+strips keys Gemini doesn't recognize:
+
+- **Stripped:** `oneOf`, `anyOf`, `$ref`, `$defs`, `definitions`, `pattern`,
+ `$schema`, `additionalProperties`
+
+Schemas with discriminated unions or shared definitions need to be inlined
+before passing to Gemini.
+
+### xAI — `additionalProperties` default
+
+xAI inverts OpenAI's default: `additionalProperties` defaults to `false` and
+must be explicitly set `true` if you want extra properties allowed. The
+adapter passes your schema through unchanged, so be explicit.
+
+### Ollama — local only
+
+Ollama's `format` field requires Ollama v0.5+ for schema-constrained output
+(string `"json"` works on older versions for free-form JSON). Ollama Cloud
+does not support structured outputs at the time of writing.
+
+---
+
+## Capability gate
+
+Each model in the registry carries a `supportsJsonMode` capability flag.
+When you pass `responseFormat` to a model that doesn't support it, the SDK
+logs a warning:
+
+```
+[llm-sdk] anthropic/claude-3-haiku-20240307 does not support structured
+output (responseFormat); the request will be sent but the provider may
+ignore it.
+```
+
+This is a warning, not an error — the request still goes through. Switch
+to a supported model (e.g. `claude-3-5-sonnet-latest`) or open an issue if
+you need fallback behavior.
+
+---
+
+## Reasoning models — token semantics
+
+For OpenAI reasoning models (`o1`, `o3`, `o4`, `gpt-5.x`):
+
+- `maxTokens` is internally translated to `max_completion_tokens`
+- `temperature` is silently dropped (these models reject it)
+- `max_completion_tokens` includes BOTH reasoning tokens AND visible output
+ tokens — set generously (`maxTokens: 4000+`) or you may see truncated
+ responses
+
+```ts
+const result = await generateText({
+ model: openai('o3-mini'),
+ prompt: 'Solve: ...',
+ maxTokens: 4000, // → max_completion_tokens internally
+ temperature: 0.7, // → silently dropped
+ responseFormat: { ... },
+});
+```
+
+---
+
+## Fallback chains
+
+`responseFormat` works through fallback chains transparently. Each provider
+in the chain receives the schema in its native format:
+
+```ts
+import { createFallbackChain } from '@yourgpt/llm-sdk/fallback';
+import { createOpenAI } from '@yourgpt/llm-sdk/openai';
+import { createAnthropic } from '@yourgpt/llm-sdk/anthropic';
+
+const chain = createFallbackChain({
+ models: [
+ openai.languageModel('gpt-4o'),
+ anthropic.languageModel('claude-3-5-sonnet-latest'),
+ ],
+ strategy: 'priority',
+});
+
+// Same responseFormat works on either hop
+const result = await chain.chat({
+ messages: [...],
+ config: {
+ responseFormat: { type: 'json_schema', json_schema: { ... } },
+ },
+});
+```
+
+A working end-to-end demo lives in `examples/fallback-demo` — see the
+`/chat/structured` route.
+
+---
+
+## Next Steps
+
+- [generateText()](/docs/llm-sdk/generate-text) — full text generation API
+- [streamText()](/docs/llm-sdk/stream-text) — streaming variant
+- [Tools](/docs/llm-sdk/tools) — function calling (orthogonal to structured output)
diff --git a/examples/fallback-demo/src/index.ts b/examples/fallback-demo/src/index.ts
index dc5a6aa..027530d 100644
--- a/examples/fallback-demo/src/index.ts
+++ b/examples/fallback-demo/src/index.ts
@@ -345,6 +345,76 @@ app.post("/chat/retry-test", async (req, res) => {
}
});
+// ─── Route 9: Structured output (responseFormat) ─────────────────────────────
+//
+// Exercises the unified `responseFormat` field across an OpenAI → Anthropic →
+// Google fallback chain. Each adapter translates the OpenAI-shape JSON schema
+// to its provider's native structured-output API (`response_format`,
+// `output_config.format`, `responseJsonSchema`).
+//
+// Test:
+// curl -s -X POST http://localhost:3000/chat/structured \
+// -H "Content-Type: application/json" \
+// -d '{"messages":[{"role":"user","content":"List the top 3 fastest land animals with their top speed in km/h."}]}'
+
+const google = createOpenAI({
+ apiKey: process.env.GOOGLE_API_KEY,
+ baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai/",
+});
+
+const ANIMALS_SCHEMA = {
+ type: "object",
+ properties: {
+ animals: {
+ type: "array",
+ items: {
+ type: "object",
+ properties: {
+ name: { type: "string" },
+ top_speed_kmh: { type: "number" },
+ },
+ required: ["name", "top_speed_kmh"],
+ },
+ },
+ },
+ required: ["animals"],
+} as const;
+
+const structuredRuntime = createRuntime({
+ adapter: createFallbackChain({
+ models: [
+ openai.languageModel("gpt-4o"),
+ anthropic.languageModel("claude-3-5-sonnet-latest"),
+ google.languageModel("gemini-2.0-flash"),
+ ],
+ strategy: "priority",
+ onFallback: onFallbackLog("structured"),
+ }),
+ systemPrompt: "You return data as JSON matching the requested schema.",
+});
+
+app.post("/chat/structured", async (req, res) => {
+ try {
+ const result = await structuredRuntime.chat({
+ ...req.body,
+ config: {
+ ...req.body.config,
+ responseFormat: {
+ type: "json_schema",
+ json_schema: {
+ name: "animals_response",
+ schema: ANIMALS_SCHEMA,
+ strict: true,
+ },
+ },
+ },
+ });
+ res.json(result);
+ } catch (err) {
+ handleError(err, res);
+ }
+});
+
// ─── Route 7: Tools + FORCED FALLBACK (dead primary) ─────────────────────────
//
// Same tools, but primary is a dead URL.
@@ -416,4 +486,7 @@ app.listen(PORT, () => {
console.log(
" POST /chat/retry-test — Retries dead model 2x before falling back to Claude",
);
+ console.log(
+ " POST /chat/structured — JSON-schema response across OpenAI → Claude → Gemini",
+ );
});
diff --git a/packages/llm-sdk/src/adapters/anthropic.ts b/packages/llm-sdk/src/adapters/anthropic.ts
index 6c84361..89093c6 100644
--- a/packages/llm-sdk/src/adapters/anthropic.ts
+++ b/packages/llm-sdk/src/adapters/anthropic.ts
@@ -15,6 +15,7 @@ import {
formatMessagesForAnthropic,
messageToAnthropicContent,
logProviderPayload,
+ toAnthropicOutputConfig,
type AnthropicContentBlock,
} from "./base";
@@ -377,8 +378,14 @@ export class AnthropicAdapter implements LLMAdapter {
options: Record;
messages: Array>;
} {
- // Extract system message
- const systemMessage = request.systemPrompt || "";
+ // Extract system message; Anthropic has no schema-less JSON mode, so for
+ // `responseFormat.type === "json_object"` we coerce via a system suffix.
+ const responseFormat = request.config?.responseFormat;
+ const jsonObjectSuffix =
+ responseFormat?.type === "json_object"
+ ? "\n\nRespond with a single JSON object and no other text."
+ : "";
+ const systemMessage = (request.systemPrompt || "") + jsonObjectSuffix;
// Use raw messages if provided (for agent loop with tool calls)
let messages: Array>;
@@ -505,6 +512,14 @@ export class AnthropicAdapter implements LLMAdapter {
options.server_tool_configuration = serverToolConfiguration;
}
+ // Anthropic structured output (`output_config.format`) — GA on Claude API
+ // and Bedrock as of late 2025. Vertex AI does not support it; users on
+ // Vertex should use a forced-tool pattern via `actions` + `toolChoice`.
+ const outputConfig = toAnthropicOutputConfig(responseFormat);
+ if (outputConfig) {
+ options.output_config = outputConfig;
+ }
+
// Add thinking configuration if enabled
if (this.config.thinking?.type === "enabled") {
options.thinking = {
diff --git a/packages/llm-sdk/src/adapters/azure.ts b/packages/llm-sdk/src/adapters/azure.ts
index c0a7d53..63e16c4 100644
--- a/packages/llm-sdk/src/adapters/azure.ts
+++ b/packages/llm-sdk/src/adapters/azure.ts
@@ -19,6 +19,7 @@ import {
formatMessagesForOpenAI,
formatTools,
logProviderPayload,
+ toOpenAIResponseFormat,
} from "./base";
// ============================================
@@ -188,6 +189,7 @@ export class AzureAdapter implements LLMAdapter {
tools,
temperature: request.config?.temperature ?? this.config.temperature,
max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
stream: true,
};
logProviderPayload("azure", "request payload", payload, request.debug);
@@ -316,6 +318,7 @@ export class AzureAdapter implements LLMAdapter {
tools,
temperature: request.config?.temperature ?? this.config.temperature,
max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
};
logProviderPayload("azure", "request payload", payload, request.debug);
const response = await client.chat.completions.create(payload);
diff --git a/packages/llm-sdk/src/adapters/base.ts b/packages/llm-sdk/src/adapters/base.ts
index dca81e2..0a38666 100644
--- a/packages/llm-sdk/src/adapters/base.ts
+++ b/packages/llm-sdk/src/adapters/base.ts
@@ -4,6 +4,7 @@ import type {
ActionDefinition,
StreamEvent,
LLMConfig,
+ ResponseFormat,
ToolDefinition,
WebSearchConfig,
ProviderToolRuntimeOptions,
@@ -17,6 +18,7 @@ export interface RequestLLMConfig {
model?: string;
temperature?: number;
maxTokens?: number;
+ responseFormat?: ResponseFormat;
}
/**
@@ -285,6 +287,172 @@ export function normalizeObjectJsonSchema(
return normalized;
}
+/**
+ * Newer OpenAI model families (o1/o3/o4 reasoning, gpt-5.x) require
+ * `max_completion_tokens` instead of `max_tokens` and reject `temperature`
+ * on the Chat Completions endpoint.
+ */
+export function isOpenAIReasoningModel(modelId: string | undefined): boolean {
+ if (!modelId) return false;
+ return /^(o1|o3|o4|gpt-5)/i.test(modelId);
+}
+
+/**
+ * Build the token-limit + temperature fields for a Chat Completions payload,
+ * accounting for the o-series / gpt-5 parameter rename.
+ */
+export function buildOpenAITokenParams(
+ modelId: string | undefined,
+ maxTokens: number | undefined,
+ temperature: number | undefined,
+): Record {
+ if (isOpenAIReasoningModel(modelId)) {
+ return { max_completion_tokens: maxTokens };
+ }
+ return { max_tokens: maxTokens, temperature };
+}
+
+/**
+ * Recursively walk a JSON Schema and drop keys the provider rejects.
+ */
+function stripSchemaKeys(
+ schema: unknown,
+ keysToDrop: ReadonlySet,
+ options: {
+ forceAdditionalPropertiesFalse?: boolean;
+ renameKeys?: Record;
+ } = {},
+): unknown {
+ if (Array.isArray(schema)) {
+ return schema.map((item) => stripSchemaKeys(item, keysToDrop, options));
+ }
+ if (!schema || typeof schema !== "object") return schema;
+
+ const out: Record = {};
+ for (const [key, value] of Object.entries(
+ schema as Record,
+ )) {
+ if (keysToDrop.has(key)) continue;
+ const renamed = options.renameKeys?.[key] ?? key;
+ out[renamed] = stripSchemaKeys(value, keysToDrop, options);
+ }
+
+ if (options.forceAdditionalPropertiesFalse && out.type === "object") {
+ out.additionalProperties = false;
+ }
+ return out;
+}
+
+/** OpenAI Chat Completions `response_format` payload. */
+export function toOpenAIResponseFormat(
+ rf: ResponseFormat | undefined,
+): Record | undefined {
+ if (!rf) return undefined;
+ if (rf.type === "json_object") return { type: "json_object" };
+ return {
+ type: "json_schema",
+ json_schema: {
+ name: rf.json_schema.name,
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
+ strict: rf.json_schema.strict ?? true,
+ },
+ };
+}
+
+/** OpenAI Responses API `text.format` payload (different shape than Chat Completions). */
+export function toOpenAIResponsesTextFormat(
+ rf: ResponseFormat | undefined,
+): Record | undefined {
+ if (!rf || rf.type !== "json_schema") return undefined;
+ return {
+ type: "json_schema",
+ name: rf.json_schema.name,
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
+ strict: rf.json_schema.strict ?? true,
+ };
+}
+
+/**
+ * Anthropic `output_config.format` payload.
+ *
+ * Anthropic's structured-output schema subset is narrower than OpenAI's:
+ * no numeric (minimum/maximum/multipleOf) or length (minLength/maxLength)
+ * constraints, and `additionalProperties: false` is required on every object.
+ */
+const ANTHROPIC_UNSUPPORTED_KEYS: ReadonlySet = new Set([
+ "minimum",
+ "maximum",
+ "exclusiveMinimum",
+ "exclusiveMaximum",
+ "multipleOf",
+ "minLength",
+ "maxLength",
+ "minItems",
+ "maxItems",
+ "minProperties",
+ "maxProperties",
+ "pattern",
+ "$schema",
+]);
+
+export function toAnthropicOutputConfig(
+ rf: ResponseFormat | undefined,
+): Record | undefined {
+ if (!rf || rf.type !== "json_schema") return undefined;
+ // Anthropic accepts `anyOf` but rejects `oneOf` — convert rather than strip,
+ // otherwise discriminated-union schemas silently lose their union semantics.
+ const schema = stripSchemaKeys(
+ rf.json_schema.schema,
+ ANTHROPIC_UNSUPPORTED_KEYS,
+ {
+ forceAdditionalPropertiesFalse: true,
+ renameKeys: { oneOf: "anyOf" },
+ },
+ ) as Record;
+ return {
+ format: {
+ type: "json_schema",
+ schema,
+ },
+ };
+}
+
+/**
+ * Gemini `responseJsonSchema` payload.
+ *
+ * Gemini accepts an OpenAPI 3.0 subset and silently ignores unknown keywords;
+ * `oneOf`, `anyOf`, `$ref`, and `pattern` are not supported.
+ */
+const GEMINI_UNSUPPORTED_KEYS: ReadonlySet = new Set([
+ "oneOf",
+ "anyOf",
+ "$ref",
+ "$defs",
+ "definitions",
+ "pattern",
+ "$schema",
+ "additionalProperties",
+]);
+
+export function toGeminiSchema(
+ rf: ResponseFormat | undefined,
+): Record | undefined {
+ if (!rf || rf.type !== "json_schema") return undefined;
+ return stripSchemaKeys(
+ rf.json_schema.schema,
+ GEMINI_UNSUPPORTED_KEYS,
+ ) as Record;
+}
+
+/** Ollama `format` field — `"json"` for free-form, schema object for constrained. */
+export function toOllamaFormat(
+ rf: ResponseFormat | undefined,
+): string | Record | undefined {
+ if (!rf) return undefined;
+ if (rf.type === "json_object") return "json";
+ return rf.json_schema.schema;
+}
+
/**
* Convert actions to OpenAI tool format
*/
diff --git a/packages/llm-sdk/src/adapters/google.ts b/packages/llm-sdk/src/adapters/google.ts
index 83d44e1..373a93f 100644
--- a/packages/llm-sdk/src/adapters/google.ts
+++ b/packages/llm-sdk/src/adapters/google.ts
@@ -18,7 +18,7 @@ import type {
ChatCompletionRequest,
CompletionResult,
} from "./base";
-import { formatTools, logProviderPayload } from "./base";
+import { formatTools, logProviderPayload, toGeminiSchema } from "./base";
// ============================================
// Types
@@ -372,6 +372,15 @@ export class GoogleAdapter implements LLMAdapter {
// Emit message start
yield { type: "message:start", id: messageId };
+ const responseFormat = request.config?.responseFormat;
+ const geminiSchema = toGeminiSchema(responseFormat);
+ const responseFormatGenConfig: Record = responseFormat
+ ? {
+ responseMimeType: "application/json",
+ ...(geminiSchema ? { responseJsonSchema: geminiSchema } : {}),
+ }
+ : {};
+
try {
logProviderPayload(
"google",
@@ -386,6 +395,7 @@ export class GoogleAdapter implements LLMAdapter {
generationConfig: {
temperature: request.config?.temperature ?? this.config.temperature,
maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+ ...responseFormatGenConfig,
},
messageParts: mergedContents[mergedContents.length - 1]?.parts,
},
@@ -401,6 +411,7 @@ export class GoogleAdapter implements LLMAdapter {
generationConfig: {
temperature: request.config?.temperature ?? this.config.temperature,
maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+ ...responseFormatGenConfig,
},
});
@@ -647,6 +658,15 @@ export class GoogleAdapter implements LLMAdapter {
const tools = formatToolsForGemini(request.actions);
+ const responseFormat = request.config?.responseFormat;
+ const geminiSchema = toGeminiSchema(responseFormat);
+ const responseFormatGenConfig: Record = responseFormat
+ ? {
+ responseMimeType: "application/json",
+ ...(geminiSchema ? { responseJsonSchema: geminiSchema } : {}),
+ }
+ : {};
+
const payload = {
model: modelId,
history: mergedContents.slice(0, -1),
@@ -657,6 +677,7 @@ export class GoogleAdapter implements LLMAdapter {
generationConfig: {
temperature: request.config?.temperature ?? this.config.temperature,
maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+ ...responseFormatGenConfig,
},
messageParts: mergedContents[mergedContents.length - 1]?.parts,
};
@@ -670,6 +691,7 @@ export class GoogleAdapter implements LLMAdapter {
generationConfig: {
temperature: request.config?.temperature ?? this.config.temperature,
maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+ ...responseFormatGenConfig,
},
});
diff --git a/packages/llm-sdk/src/adapters/ollama.ts b/packages/llm-sdk/src/adapters/ollama.ts
index 866a82c..88085ed 100644
--- a/packages/llm-sdk/src/adapters/ollama.ts
+++ b/packages/llm-sdk/src/adapters/ollama.ts
@@ -5,7 +5,12 @@ import type {
} from "../core/stream-events";
import { generateMessageId, generateToolCallId } from "../core/utils";
import type { LLMAdapter, ChatCompletionRequest } from "./base";
-import { formatMessages, formatTools, logProviderPayload } from "./base";
+import {
+ formatMessages,
+ formatTools,
+ logProviderPayload,
+ toOllamaFormat,
+} from "./base";
import type { OllamaModelOptions } from "../providers/types";
/**
@@ -288,12 +293,14 @@ export class OllamaAdapter implements LLMAdapter {
Object.assign(ollamaOptions, this.config.options);
}
+ const ollamaFormat = toOllamaFormat(request.config?.responseFormat);
const payload = {
model: request.config?.model || this.model,
messages,
tools,
stream: true,
options: ollamaOptions,
+ ...(ollamaFormat !== undefined ? { format: ollamaFormat } : {}),
};
logProviderPayload("ollama", "request payload", payload, request.debug);
const response = await fetch(`${this.baseUrl}/api/chat`, {
diff --git a/packages/llm-sdk/src/adapters/openai.ts b/packages/llm-sdk/src/adapters/openai.ts
index d0795ec..114ecec 100644
--- a/packages/llm-sdk/src/adapters/openai.ts
+++ b/packages/llm-sdk/src/adapters/openai.ts
@@ -12,10 +12,13 @@ import type {
CompletionResult,
} from "./base";
import {
+ buildOpenAITokenParams,
formatMessagesForOpenAI,
formatTools,
logProviderPayload,
normalizeObjectJsonSchema,
+ toOpenAIResponseFormat,
+ toOpenAIResponsesTextFormat,
} from "./base";
/**
@@ -225,6 +228,9 @@ export class OpenAIAdapter implements LLMAdapter {
): Promise {
const client = await this.getClient();
const openaiToolOptions = request.providerToolOptions?.openai;
+ const responsesTextFormat = toOpenAIResponsesTextFormat(
+ request.config?.responseFormat,
+ );
const payload = {
model: request.config?.model || this.model,
instructions: request.systemPrompt,
@@ -239,6 +245,7 @@ export class OpenAIAdapter implements LLMAdapter {
parallel_tool_calls: openaiToolOptions?.parallelToolCalls,
temperature: request.config?.temperature ?? this.config.temperature,
max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+ ...(responsesTextFormat ? { text: { format: responsesTextFormat } } : {}),
stream: false,
};
@@ -427,15 +434,20 @@ export class OpenAIAdapter implements LLMAdapter {
},
}
: openaiToolOptions?.toolChoice;
+ const modelIdForPayload = request.config?.model || this.model;
const payload = {
- model: request.config?.model || this.model,
+ model: modelIdForPayload,
messages,
tools: tools.length > 0 ? tools : undefined,
tool_choice: tools.length > 0 ? toolChoice : undefined,
parallel_tool_calls:
tools.length > 0 ? openaiToolOptions?.parallelToolCalls : undefined,
- temperature: request.config?.temperature ?? this.config.temperature,
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+ ...buildOpenAITokenParams(
+ modelIdForPayload,
+ request.config?.maxTokens ?? this.config.maxTokens,
+ request.config?.temperature ?? this.config.temperature,
+ ),
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
stream: true,
stream_options: { include_usage: true },
};
@@ -654,15 +666,20 @@ export class OpenAIAdapter implements LLMAdapter {
}
: openaiToolOptions?.toolChoice;
+ const modelIdForCompletePayload = request.config?.model || this.model;
const payload = {
- model: request.config?.model || this.model,
+ model: modelIdForCompletePayload,
messages,
tools: tools.length > 0 ? tools : undefined,
tool_choice: tools.length > 0 ? toolChoice : undefined,
parallel_tool_calls:
tools.length > 0 ? openaiToolOptions?.parallelToolCalls : undefined,
- temperature: request.config?.temperature ?? this.config.temperature,
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+ ...buildOpenAITokenParams(
+ modelIdForCompletePayload,
+ request.config?.maxTokens ?? this.config.maxTokens,
+ request.config?.temperature ?? this.config.temperature,
+ ),
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
stream: false,
};
diff --git a/packages/llm-sdk/src/core/generate-text.ts b/packages/llm-sdk/src/core/generate-text.ts
index 0d9a099..6249969 100644
--- a/packages/llm-sdk/src/core/generate-text.ts
+++ b/packages/llm-sdk/src/core/generate-text.ts
@@ -47,6 +47,12 @@ export async function generateText(
): Promise {
const { model, tools, maxSteps = 1, signal } = params;
+ if (params.responseFormat && model.capabilities.supportsJsonMode === false) {
+ console.warn(
+ `[llm-sdk] ${model.provider}/${model.modelId} does not support structured output (responseFormat); the request will be sent but the provider may ignore it.`,
+ );
+ }
+
// Build initial messages
let messages = buildMessages(params);
const steps: GenerateStep[] = [];
@@ -71,6 +77,7 @@ export async function generateText(
tools: formattedTools,
temperature: params.temperature,
maxTokens: params.maxTokens,
+ responseFormat: params.responseFormat,
signal,
});
diff --git a/packages/llm-sdk/src/core/stream-events.ts b/packages/llm-sdk/src/core/stream-events.ts
index 808fb87..d89f5be 100644
--- a/packages/llm-sdk/src/core/stream-events.ts
+++ b/packages/llm-sdk/src/core/stream-events.ts
@@ -286,12 +286,31 @@ export type StreamEvent =
| ThreadCreatedEvent
| DoneEvent;
+/**
+ * Structured-output / JSON-mode request format.
+ *
+ * Uses OpenAI's `response_format` shape as the unified surface; each adapter
+ * translates to its provider's native field (Anthropic `output_config`,
+ * Gemini `responseJsonSchema`, Ollama `format`, etc.).
+ */
+export type ResponseFormat =
+ | { type: "json_object" }
+ | {
+ type: "json_schema";
+ json_schema: {
+ name: string;
+ schema: Record;
+ strict?: boolean;
+ };
+ };
+
/**
* LLM configuration
*/
export interface LLMConfig {
temperature?: number;
maxTokens?: number;
+ responseFormat?: ResponseFormat;
}
/**
diff --git a/packages/llm-sdk/src/core/stream-text.ts b/packages/llm-sdk/src/core/stream-text.ts
index 41ab6f2..8904d4f 100644
--- a/packages/llm-sdk/src/core/stream-text.ts
+++ b/packages/llm-sdk/src/core/stream-text.ts
@@ -50,6 +50,12 @@ export async function streamText(
): Promise {
const { model, tools, maxSteps = 1, signal } = params;
+ if (params.responseFormat && model.capabilities.supportsJsonMode === false) {
+ console.warn(
+ `[llm-sdk] ${model.provider}/${model.modelId} does not support structured output (responseFormat); the request will be sent but the provider may ignore it.`,
+ );
+ }
+
// State for collecting results
let fullText = "";
let finalUsage: TokenUsage = {
@@ -90,6 +96,7 @@ export async function streamText(
tools: formattedTools,
temperature: params.temperature,
maxTokens: params.maxTokens,
+ responseFormat: params.responseFormat,
signal,
})) {
switch (chunk.type) {
diff --git a/packages/llm-sdk/src/core/types.ts b/packages/llm-sdk/src/core/types.ts
index ae9dad7..53a16e9 100644
--- a/packages/llm-sdk/src/core/types.ts
+++ b/packages/llm-sdk/src/core/types.ts
@@ -207,6 +207,8 @@ export interface DoGenerateParams {
temperature?: number;
/** Maximum tokens to generate */
maxTokens?: number;
+ /** Structured-output / JSON-mode request format (provider-translated) */
+ responseFormat?: import("./stream-events").ResponseFormat;
/** Abort signal */
signal?: AbortSignal;
}
@@ -312,6 +314,8 @@ export interface GenerateTextParams {
temperature?: number;
/** Maximum tokens to generate */
maxTokens?: number;
+ /** Structured-output / JSON-mode request format */
+ responseFormat?: import("./stream-events").ResponseFormat;
/** Abort signal */
signal?: AbortSignal;
}
diff --git a/packages/llm-sdk/src/providers/anthropic/index.ts b/packages/llm-sdk/src/providers/anthropic/index.ts
index 9e9c3e7..75e9021 100644
--- a/packages/llm-sdk/src/providers/anthropic/index.ts
+++ b/packages/llm-sdk/src/providers/anthropic/index.ts
@@ -157,7 +157,8 @@ export function createAnthropic(
"image/gif",
"image/webp",
],
- supportsJsonMode: false,
+ // Native `output_config.format` — GA on Claude 3.5 and newer.
+ supportsJsonMode: true,
supportsSystemMessages: true,
};
};
diff --git a/packages/llm-sdk/src/providers/anthropic/provider.ts b/packages/llm-sdk/src/providers/anthropic/provider.ts
index 4b48b6e..e802fe2 100644
--- a/packages/llm-sdk/src/providers/anthropic/provider.ts
+++ b/packages/llm-sdk/src/providers/anthropic/provider.ts
@@ -22,6 +22,7 @@ import type {
FinishReason,
CoreMessage,
} from "../../core/types";
+import { toAnthropicOutputConfig } from "../../adapters/base";
// ============================================
// Model Definitions
@@ -32,6 +33,12 @@ interface AnthropicModelConfig {
tools: boolean;
thinking: boolean;
pdf: boolean;
+ /**
+ * Native structured-output (`output_config.format`) support — GA on Claude
+ * API and Bedrock as of late 2025 for Claude 3.5 and newer. Older Claude 3
+ * base models must use a forced-tool fallback.
+ */
+ jsonMode: boolean;
maxTokens: number;
}
@@ -42,6 +49,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: true,
pdf: true,
+ jsonMode: true,
maxTokens: 200000,
},
"claude-opus-4-20250514": {
@@ -49,6 +57,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: true,
pdf: true,
+ jsonMode: true,
maxTokens: 200000,
},
@@ -58,6 +67,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: true,
pdf: true,
+ jsonMode: true,
maxTokens: 200000,
},
"claude-3-7-sonnet-latest": {
@@ -65,6 +75,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: true,
pdf: true,
+ jsonMode: true,
maxTokens: 200000,
},
@@ -74,6 +85,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: false,
pdf: true,
+ jsonMode: true,
maxTokens: 200000,
},
"claude-3-5-sonnet-latest": {
@@ -81,6 +93,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: false,
pdf: true,
+ jsonMode: true,
maxTokens: 200000,
},
"claude-3-5-haiku-20241022": {
@@ -88,6 +101,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: false,
pdf: false,
+ jsonMode: true,
maxTokens: 200000,
},
"claude-3-5-haiku-latest": {
@@ -95,6 +109,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: false,
pdf: false,
+ jsonMode: true,
maxTokens: 200000,
},
@@ -104,6 +119,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: false,
pdf: false,
+ jsonMode: false,
maxTokens: 200000,
},
"claude-3-sonnet-20240229": {
@@ -111,6 +127,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: false,
pdf: false,
+ jsonMode: false,
maxTokens: 200000,
},
"claude-3-haiku-20240307": {
@@ -118,6 +135,7 @@ const ANTHROPIC_MODELS: Record = {
tools: true,
thinking: false,
pdf: false,
+ jsonMode: false,
maxTokens: 200000,
},
};
@@ -175,7 +193,7 @@ export function anthropic(
supportsVision: modelConfig.vision,
supportsTools: modelConfig.tools,
supportsStreaming: true,
- supportsJsonMode: false,
+ supportsJsonMode: modelConfig.jsonMode,
supportsThinking: modelConfig.thinking,
supportsPDF: modelConfig.pdf,
maxTokens: modelConfig.maxTokens,
@@ -209,6 +227,11 @@ export function anthropic(
};
}
+ const outputConfig = toAnthropicOutputConfig(params.responseFormat);
+ if (outputConfig) {
+ requestOptions.output_config = outputConfig;
+ }
+
const response = await client.messages.create(requestOptions);
// Parse response
@@ -266,6 +289,11 @@ export function anthropic(
};
}
+ const outputConfig = toAnthropicOutputConfig(params.responseFormat);
+ if (outputConfig) {
+ requestOptions.output_config = outputConfig;
+ }
+
const stream = await client.messages.stream(requestOptions);
let currentToolUse: { id: string; name: string; input: string } | null =
diff --git a/packages/llm-sdk/src/providers/fireworks/provider.ts b/packages/llm-sdk/src/providers/fireworks/provider.ts
index 78b3cdb..49d17de 100644
--- a/packages/llm-sdk/src/providers/fireworks/provider.ts
+++ b/packages/llm-sdk/src/providers/fireworks/provider.ts
@@ -25,6 +25,7 @@ import type {
FinishReason,
CoreMessage,
} from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
// ============================================
// Provider Options
@@ -107,6 +108,11 @@ export function fireworks(
requestBody.tools = params.tools;
}
+ const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+ if (responseFormat) {
+ requestBody.response_format = responseFormat;
+ }
+
const response = await client.chat.completions.create(requestBody);
const choice = response.choices[0];
const message = choice.message;
@@ -148,6 +154,11 @@ export function fireworks(
requestBody.tools = params.tools;
}
+ const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+ if (responseFormat) {
+ requestBody.response_format = responseFormat;
+ }
+
const stream = await client.chat.completions.create(requestBody);
// Track tool calls by index (Fireworks may repeat tc.id across chunks)
diff --git a/packages/llm-sdk/src/providers/google/provider.ts b/packages/llm-sdk/src/providers/google/provider.ts
index 2659ce6..6d55cf3 100644
--- a/packages/llm-sdk/src/providers/google/provider.ts
+++ b/packages/llm-sdk/src/providers/google/provider.ts
@@ -25,6 +25,7 @@ import type {
FinishReason,
CoreMessage,
} from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
// ============================================
// Model Definitions
@@ -211,6 +212,7 @@ export function google(
tools: params.tools as any,
temperature: params.temperature,
max_tokens: params.maxTokens,
+ response_format: toOpenAIResponseFormat(params.responseFormat),
});
const choice = response.choices[0];
@@ -249,6 +251,7 @@ export function google(
tools: params.tools as any,
temperature: params.temperature,
max_tokens: params.maxTokens,
+ response_format: toOpenAIResponseFormat(params.responseFormat),
stream: true,
});
diff --git a/packages/llm-sdk/src/providers/ollama/index.ts b/packages/llm-sdk/src/providers/ollama/index.ts
index 91377e8..b87cc39 100644
--- a/packages/llm-sdk/src/providers/ollama/index.ts
+++ b/packages/llm-sdk/src/providers/ollama/index.ts
@@ -204,7 +204,8 @@ export function createOllama(config: OllamaProviderConfig = {}): AIProvider {
supportedImageTypes: model.vision
? ["image/png", "image/jpeg", "image/gif"]
: [],
- supportsJsonMode: false,
+ // Ollama 0.5+ supports `format: "json"` and JSON-schema constrained output.
+ supportsJsonMode: true,
supportsSystemMessages: true,
};
};
diff --git a/packages/llm-sdk/src/providers/openai/provider.ts b/packages/llm-sdk/src/providers/openai/provider.ts
index 4c6d6e8..fc9a153 100644
--- a/packages/llm-sdk/src/providers/openai/provider.ts
+++ b/packages/llm-sdk/src/providers/openai/provider.ts
@@ -26,6 +26,10 @@ import type {
FinishReason,
CoreMessage,
} from "../../core/types";
+import {
+ buildOpenAITokenParams,
+ toOpenAIResponseFormat,
+} from "../../adapters/base";
// ============================================
// Model Definitions
@@ -194,8 +198,12 @@ export function openai(
model: modelId,
messages,
tools: params.tools as any,
- temperature: params.temperature,
- max_tokens: params.maxTokens,
+ ...buildOpenAITokenParams(
+ modelId,
+ params.maxTokens,
+ params.temperature,
+ ),
+ response_format: toOpenAIResponseFormat(params.responseFormat),
});
const choice = response.choices[0];
@@ -232,8 +240,12 @@ export function openai(
model: modelId,
messages,
tools: params.tools as any,
- temperature: params.temperature,
- max_tokens: params.maxTokens,
+ ...buildOpenAITokenParams(
+ modelId,
+ params.maxTokens,
+ params.temperature,
+ ),
+ response_format: toOpenAIResponseFormat(params.responseFormat),
stream: true,
});
diff --git a/packages/llm-sdk/src/providers/openrouter/provider.ts b/packages/llm-sdk/src/providers/openrouter/provider.ts
index f85d858..713ee90 100644
--- a/packages/llm-sdk/src/providers/openrouter/provider.ts
+++ b/packages/llm-sdk/src/providers/openrouter/provider.ts
@@ -25,6 +25,7 @@ import type {
FinishReason,
CoreMessage,
} from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
// ============================================
// Model Configuration
@@ -170,6 +171,11 @@ export function openrouter(
requestBody.provider = options.providerPreferences;
}
+ const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+ if (responseFormat) {
+ requestBody.response_format = responseFormat;
+ }
+
const response = await client.chat.completions.create(requestBody);
const choice = response.choices[0];
@@ -221,6 +227,11 @@ export function openrouter(
requestBody.provider = options.providerPreferences;
}
+ const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+ if (responseFormat) {
+ requestBody.response_format = responseFormat;
+ }
+
const stream = await client.chat.completions.create(requestBody);
// Track current tool call being built
diff --git a/packages/llm-sdk/src/providers/togetherai/provider.ts b/packages/llm-sdk/src/providers/togetherai/provider.ts
index 7c1c718..e016cfa 100644
--- a/packages/llm-sdk/src/providers/togetherai/provider.ts
+++ b/packages/llm-sdk/src/providers/togetherai/provider.ts
@@ -29,6 +29,7 @@ import type {
FinishReason,
CoreMessage,
} from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
// ============================================
// Provider Options
@@ -116,6 +117,11 @@ export function togetherai(
requestBody.tools = params.tools;
}
+ const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+ if (responseFormat) {
+ requestBody.response_format = responseFormat;
+ }
+
const response = await client.chat.completions.create(requestBody);
const choice = response.choices[0];
const message = choice.message;
@@ -157,6 +163,11 @@ export function togetherai(
requestBody.tools = params.tools;
}
+ const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+ if (responseFormat) {
+ requestBody.response_format = responseFormat;
+ }
+
const stream = await client.chat.completions.create(requestBody);
// Track tool calls by index
diff --git a/packages/llm-sdk/src/providers/xai/index.ts b/packages/llm-sdk/src/providers/xai/index.ts
index 4bc8d00..0eca6df 100644
--- a/packages/llm-sdk/src/providers/xai/index.ts
+++ b/packages/llm-sdk/src/providers/xai/index.ts
@@ -180,7 +180,8 @@ export function createXAI(config: XAIProviderConfig = {}): AIProvider {
supportedImageTypes: model.vision
? ["image/png", "image/jpeg", "image/gif", "image/webp"]
: [],
- supportsJsonMode: false,
+ // xAI accepts OpenAI-compatible `response_format` on grok-2-1212+.
+ supportsJsonMode: true,
supportsSystemMessages: true,
};
};
diff --git a/packages/llm-sdk/src/providers/xai/provider.ts b/packages/llm-sdk/src/providers/xai/provider.ts
index ab773c6..959e135 100644
--- a/packages/llm-sdk/src/providers/xai/provider.ts
+++ b/packages/llm-sdk/src/providers/xai/provider.ts
@@ -25,6 +25,7 @@ import type {
FinishReason,
CoreMessage,
} from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
// ============================================
// Model Definitions
@@ -136,7 +137,7 @@ export function xai(
supportsVision: modelConfig.vision,
supportsTools: modelConfig.tools,
supportsStreaming: true,
- supportsJsonMode: false, // xAI doesn't support JSON mode yet
+ supportsJsonMode: true, // OpenAI-compatible `response_format`
supportsThinking: false,
supportsPDF: false,
maxTokens: modelConfig.maxTokens,
@@ -156,6 +157,7 @@ export function xai(
tools: params.tools as any,
temperature: params.temperature,
max_tokens: params.maxTokens,
+ response_format: toOpenAIResponseFormat(params.responseFormat),
});
const choice = response.choices[0];
@@ -194,6 +196,7 @@ export function xai(
tools: params.tools as any,
temperature: params.temperature,
max_tokens: params.maxTokens,
+ response_format: toOpenAIResponseFormat(params.responseFormat),
stream: true,
});
diff --git a/packages/llm-sdk/src/server/types.ts b/packages/llm-sdk/src/server/types.ts
index a2c0d8e..d182309 100644
--- a/packages/llm-sdk/src/server/types.ts
+++ b/packages/llm-sdk/src/server/types.ts
@@ -1,6 +1,7 @@
import type {
ActionDefinition,
KnowledgeBaseConfig,
+ ResponseFormat,
ToolDefinition,
ToolProfile,
WebSearchConfig,
@@ -222,7 +223,11 @@ export interface ChatRequest {
/** Bot ID (for cloud) */
botId?: string;
/** LLM config overrides */
- config?: { temperature?: number; maxTokens?: number };
+ config?: {
+ temperature?: number;
+ maxTokens?: number;
+ responseFormat?: ResponseFormat;
+ };
/** System prompt override */
systemPrompt?: string;
/** Actions from client (legacy) */