diff --git a/apps/docs/content/docs/llm-sdk/generate-text.mdx b/apps/docs/content/docs/llm-sdk/generate-text.mdx
index ea6deb0..89c417f 100644
--- a/apps/docs/content/docs/llm-sdk/generate-text.mdx
+++ b/apps/docs/content/docs/llm-sdk/generate-text.mdx
@@ -42,9 +42,23 @@ const result = await generateText({
   temperature: 0.7,
   maxTokens: 4096,
   signal: abortController.signal,
+
+  // Optional: Structured JSON output (provider-translated)
+  responseFormat: {
+    type: 'json_schema',
+    json_schema: {
+      name: 'response',
+      schema: { type: 'object', properties: { ... }, required: [...] },
+      strict: true,
+    },
+  },
 });
 ```
 
+<Callout>
+  `responseFormat` works across OpenAI, Anthropic, Google, Azure, xAI, Together, Fireworks, OpenRouter, and Ollama — each adapter translates to the provider's native field. See [Structured Output](/docs/llm-sdk/structured-output) for per-provider details and gotchas.
+</Callout>
+
 ---
 
 ## Response Object
diff --git a/apps/docs/content/docs/llm-sdk/meta.json b/apps/docs/content/docs/llm-sdk/meta.json
index 16bae7a..b892436 100644
--- a/apps/docs/content/docs/llm-sdk/meta.json
+++ b/apps/docs/content/docs/llm-sdk/meta.json
@@ -1,5 +1,5 @@
 {
   "title": "LLM SDK",
   "icon": "AiChip1",
-  "pages": ["generate-text", "stream-text", "tools"]
+  "pages": ["generate-text", "stream-text", "structured-output", "tools"]
 }
diff --git a/apps/docs/content/docs/llm-sdk/stream-text.mdx b/apps/docs/content/docs/llm-sdk/stream-text.mdx
index 17ef71e..49567dd 100644
--- a/apps/docs/content/docs/llm-sdk/stream-text.mdx
+++ b/apps/docs/content/docs/llm-sdk/stream-text.mdx
@@ -54,9 +54,23 @@ const result = await streamText({
   // Optional: Generation settings
   temperature: 0.7,
   maxTokens: 4096,
+
+  // Optional: Structured JSON output (provider-translated)
+  responseFormat: {
+    type: 'json_schema',
+    json_schema: {
+      name: 'response',
+      schema: { type: 'object', properties: { ... }, required: [...] },
+      strict: true,
+    },
+  },
 });
 ```
 
+<Callout>
+  `responseFormat` works across all supported providers — each adapter translates to the provider's native field. See [Structured Output](/docs/llm-sdk/structured-output) for per-provider details and gotchas.
+</Callout>
+
 ---
 
 ## Response Object
diff --git a/apps/docs/content/docs/llm-sdk/structured-output.mdx b/apps/docs/content/docs/llm-sdk/structured-output.mdx
new file mode 100644
index 0000000..7164e77
--- /dev/null
+++ b/apps/docs/content/docs/llm-sdk/structured-output.mdx
@@ -0,0 +1,219 @@
+---
+title: Structured Output
+description: Get JSON-schema-validated responses from any provider
+---
+
+import { Callout } from 'fumadocs-ui/components/callout';
+import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
+
+Pass `responseFormat` to `generateText()` or `streamText()` to get
+JSON-schema-validated responses. The SDK translates the unified shape to
+each provider's native API — works the same whether you're on OpenAI,
+Anthropic, Google, Azure, xAI, Together, Fireworks, OpenRouter, or Ollama.
+
+```ts
+import { generateText } from '@yourgpt/llm-sdk';
+import { openai } from '@yourgpt/llm-sdk/openai';
+
+const result = await generateText({
+  model: openai('gpt-4o'),
+  prompt: 'List the top 3 fastest land animals.',
+  responseFormat: {
+    type: 'json_schema',
+    json_schema: {
+      name: 'animals_response',
+      schema: {
+        type: 'object',
+        properties: {
+          animals: {
+            type: 'array',
+            items: {
+              type: 'object',
+              properties: {
+                name: { type: 'string' },
+                top_speed_kmh: { type: 'number' },
+              },
+              required: ['name', 'top_speed_kmh'],
+            },
+          },
+        },
+        required: ['animals'],
+      },
+      strict: true,
+    },
+  },
+});
+
+const data = JSON.parse(result.text);
+// → { animals: [{ name: 'Cheetah', top_speed_kmh: 120 }, ...] }
+```
+
+---
+
+## ResponseFormat shape
+
+The unified type uses OpenAI's `response_format` shape — callers who already
+write `response_format` for OpenAI can pass it through unchanged.
+
+```ts
+type ResponseFormat =
+  | { type: 'json_object' }
+  | {
+      type: 'json_schema';
+      json_schema: {
+        name: string;
+        schema: Record<string, unknown>; // JSON Schema
+        strict?: boolean;                // default: true
+      };
+    };
+```
+
+- `type: 'json_object'` — free-form JSON, no schema enforcement. Adapters
+  that don't have a native "JSON mode without schema" (Anthropic) inject a
+  system-prompt suffix asking for JSON instead.
+- `type: 'json_schema'` — schema-validated output. Recommended.
+
+---
+
+## Per-provider translation
+
+Each adapter translates `responseFormat` to its provider's native field:
+
+| Provider | Native field |
+|---|---|
+| OpenAI Chat / Azure / xAI / Together / Fireworks / OpenRouter | `response_format` |
+| OpenAI Responses API | `text.format` (different shape) |
+| Anthropic Claude 3.5+ | `output_config.format` |
+| Google Gemini | `responseJsonSchema` |
+| Ollama 0.5+ | `format` |
+
+You don't need to think about this — the SDK handles it. The notes below
+matter only if you hit edge cases.
+
+---
+
+## Provider gotchas
+
+### Anthropic — schema sanitization
+
+Anthropic's structured-output schema subset is narrower than OpenAI's. The
+adapter automatically strips keys Anthropic rejects so your call doesn't 400:
+
+- **Stripped:** `minimum`, `maximum`, `exclusiveMinimum`, `exclusiveMaximum`,
+  `multipleOf`, `minLength`, `maxLength`, `minItems`, `maxItems`,
+  `minProperties`, `maxProperties`, `pattern`, `$schema`
+- **Converted:** `oneOf` → `anyOf` (Anthropic accepts the latter, not the former)
+- **Forced:** `additionalProperties: false` on every object
+
+If you rely on numeric or length constraints for validation, do that
+client-side after `JSON.parse()` rather than encoding it in the schema.
+
+<Callout type="warn">
+  Anthropic's `output_config.format` is GA on Claude API and AWS Bedrock for
+  Claude 3.5 / 3.7 / 4 series. It is NOT available on Google Vertex AI. Older
+  Claude 3 base models (`claude-3-opus-20240229` etc.) are not supported either.
+  Tracking: [issue #96](https://github.com/YourGPT/copilot-sdk/issues/96).
+</Callout>
+
+### Google Gemini — OpenAPI subset
+
+Gemini's `responseJsonSchema` accepts an OpenAPI 3.0 subset. The adapter
+strips keys Gemini doesn't recognize:
+
+- **Stripped:** `oneOf`, `anyOf`, `$ref`, `$defs`, `definitions`, `pattern`,
+  `$schema`, `additionalProperties`
+
+Schemas with discriminated unions or shared definitions need to be inlined
+before passing to Gemini.
+
+### xAI — `additionalProperties` default
+
+xAI inverts OpenAI's default: `additionalProperties` defaults to `false` and
+must be explicitly set `true` if you want extra properties allowed. The
+adapter passes your schema through unchanged, so be explicit.
+
+### Ollama — local only
+
+Ollama's `format` field requires Ollama v0.5+ for schema-constrained output
+(string `"json"` works on older versions for free-form JSON). Ollama Cloud
+does not support structured outputs at the time of writing.
+
+---
+
+## Capability gate
+
+Each model in the registry carries a `supportsJsonMode` capability flag.
+When you pass `responseFormat` to a model that doesn't support it, the SDK
+logs a warning:
+
+```
+[llm-sdk] anthropic/claude-3-haiku-20240307 does not support structured
+output (responseFormat); the request will be sent but the provider may
+ignore it.
+```
+
+This is a warning, not an error — the request still goes through. Switch
+to a supported model (e.g. `claude-3-5-sonnet-latest`) or open an issue if
+you need fallback behavior.
+
+---
+
+## Reasoning models — token semantics
+
+For OpenAI reasoning models (`o1`, `o3`, `o4`, `gpt-5.x`):
+
+- `maxTokens` is internally translated to `max_completion_tokens`
+- `temperature` is silently dropped (these models reject it)
+- `max_completion_tokens` includes BOTH reasoning tokens AND visible output
+  tokens — set generously (`maxTokens: 4000+`) or you may see truncated
+  responses
+
+```ts
+const result = await generateText({
+  model: openai('o3-mini'),
+  prompt: 'Solve: ...',
+  maxTokens: 4000,           // → max_completion_tokens internally
+  temperature: 0.7,          // → silently dropped
+  responseFormat: { ... },
+});
+```
+
+---
+
+## Fallback chains
+
+`responseFormat` works through fallback chains transparently. Each provider
+in the chain receives the schema in its native format:
+
+```ts
+import { createFallbackChain } from '@yourgpt/llm-sdk/fallback';
+import { createOpenAI } from '@yourgpt/llm-sdk/openai';
+import { createAnthropic } from '@yourgpt/llm-sdk/anthropic';
+
+const chain = createFallbackChain({
+  models: [
+    openai.languageModel('gpt-4o'),
+    anthropic.languageModel('claude-3-5-sonnet-latest'),
+  ],
+  strategy: 'priority',
+});
+
+// Same responseFormat works on either hop
+const result = await chain.chat({
+  messages: [...],
+  config: {
+    responseFormat: { type: 'json_schema', json_schema: { ... } },
+  },
+});
+```
+
+A working end-to-end demo lives in `examples/fallback-demo` — see the
+`/chat/structured` route.
+
+---
+
+## Next Steps
+
+- [generateText()](/docs/llm-sdk/generate-text) — full text generation API
+- [streamText()](/docs/llm-sdk/stream-text) — streaming variant
+- [Tools](/docs/llm-sdk/tools) — function calling (orthogonal to structured output)
diff --git a/examples/fallback-demo/src/index.ts b/examples/fallback-demo/src/index.ts
index dc5a6aa..027530d 100644
--- a/examples/fallback-demo/src/index.ts
+++ b/examples/fallback-demo/src/index.ts
@@ -345,6 +345,76 @@ app.post("/chat/retry-test", async (req, res) => {
   }
 });
 
+// ─── Route 9: Structured output (responseFormat) ─────────────────────────────
+//
+// Exercises the unified `responseFormat` field across an OpenAI → Anthropic →
+// Google fallback chain. Each adapter translates the OpenAI-shape JSON schema
+// to its provider's native structured-output API (`response_format`,
+// `output_config.format`, `responseJsonSchema`).
+//
+// Test:
+//   curl -s -X POST http://localhost:3000/chat/structured \
+//     -H "Content-Type: application/json" \
+//     -d '{"messages":[{"role":"user","content":"List the top 3 fastest land animals with their top speed in km/h."}]}'
+
+const google = createOpenAI({
+  apiKey: process.env.GOOGLE_API_KEY,
+  baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai/",
+});
+
+const ANIMALS_SCHEMA = {
+  type: "object",
+  properties: {
+    animals: {
+      type: "array",
+      items: {
+        type: "object",
+        properties: {
+          name: { type: "string" },
+          top_speed_kmh: { type: "number" },
+        },
+        required: ["name", "top_speed_kmh"],
+      },
+    },
+  },
+  required: ["animals"],
+} as const;
+
+const structuredRuntime = createRuntime({
+  adapter: createFallbackChain({
+    models: [
+      openai.languageModel("gpt-4o"),
+      anthropic.languageModel("claude-3-5-sonnet-latest"),
+      google.languageModel("gemini-2.0-flash"),
+    ],
+    strategy: "priority",
+    onFallback: onFallbackLog("structured"),
+  }),
+  systemPrompt: "You return data as JSON matching the requested schema.",
+});
+
+app.post("/chat/structured", async (req, res) => {
+  try {
+    const result = await structuredRuntime.chat({
+      ...req.body,
+      config: {
+        ...req.body.config,
+        responseFormat: {
+          type: "json_schema",
+          json_schema: {
+            name: "animals_response",
+            schema: ANIMALS_SCHEMA,
+            strict: true,
+          },
+        },
+      },
+    });
+    res.json(result);
+  } catch (err) {
+    handleError(err, res);
+  }
+});
+
 // ─── Route 7: Tools + FORCED FALLBACK (dead primary) ─────────────────────────
 //
 // Same tools, but primary is a dead URL.
@@ -416,4 +486,7 @@ app.listen(PORT, () => {
   console.log(
     "  POST /chat/retry-test          — Retries dead model 2x before falling back to Claude",
   );
+  console.log(
+    "  POST /chat/structured          — JSON-schema response across OpenAI → Claude → Gemini",
+  );
 });
diff --git a/packages/llm-sdk/src/adapters/anthropic.ts b/packages/llm-sdk/src/adapters/anthropic.ts
index 6c84361..89093c6 100644
--- a/packages/llm-sdk/src/adapters/anthropic.ts
+++ b/packages/llm-sdk/src/adapters/anthropic.ts
@@ -15,6 +15,7 @@ import {
   formatMessagesForAnthropic,
   messageToAnthropicContent,
   logProviderPayload,
+  toAnthropicOutputConfig,
   type AnthropicContentBlock,
 } from "./base";
 
@@ -377,8 +378,14 @@ export class AnthropicAdapter implements LLMAdapter {
     options: Record<string, unknown>;
     messages: Array<Record<string, unknown>>;
   } {
-    // Extract system message
-    const systemMessage = request.systemPrompt || "";
+    // Extract system message; Anthropic has no schema-less JSON mode, so for
+    // `responseFormat.type === "json_object"` we coerce via a system suffix.
+    const responseFormat = request.config?.responseFormat;
+    const jsonObjectSuffix =
+      responseFormat?.type === "json_object"
+        ? "\n\nRespond with a single JSON object and no other text."
+        : "";
+    const systemMessage = (request.systemPrompt || "") + jsonObjectSuffix;
 
     // Use raw messages if provided (for agent loop with tool calls)
     let messages: Array<Record<string, unknown>>;
@@ -505,6 +512,14 @@ export class AnthropicAdapter implements LLMAdapter {
       options.server_tool_configuration = serverToolConfiguration;
     }
 
+    // Anthropic structured output (`output_config.format`) — GA on Claude API
+    // and Bedrock as of late 2025. Vertex AI does not support it; users on
+    // Vertex should use a forced-tool pattern via `actions` + `toolChoice`.
+    const outputConfig = toAnthropicOutputConfig(responseFormat);
+    if (outputConfig) {
+      options.output_config = outputConfig;
+    }
+
     // Add thinking configuration if enabled
     if (this.config.thinking?.type === "enabled") {
       options.thinking = {
diff --git a/packages/llm-sdk/src/adapters/azure.ts b/packages/llm-sdk/src/adapters/azure.ts
index c0a7d53..63e16c4 100644
--- a/packages/llm-sdk/src/adapters/azure.ts
+++ b/packages/llm-sdk/src/adapters/azure.ts
@@ -19,6 +19,7 @@ import {
   formatMessagesForOpenAI,
   formatTools,
   logProviderPayload,
+  toOpenAIResponseFormat,
 } from "./base";
 
 // ============================================
@@ -188,6 +189,7 @@ export class AzureAdapter implements LLMAdapter {
         tools,
         temperature: request.config?.temperature ?? this.config.temperature,
         max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+        response_format: toOpenAIResponseFormat(request.config?.responseFormat),
         stream: true,
       };
       logProviderPayload("azure", "request payload", payload, request.debug);
@@ -316,6 +318,7 @@ export class AzureAdapter implements LLMAdapter {
       tools,
       temperature: request.config?.temperature ?? this.config.temperature,
       max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+      response_format: toOpenAIResponseFormat(request.config?.responseFormat),
     };
     logProviderPayload("azure", "request payload", payload, request.debug);
     const response = await client.chat.completions.create(payload);
diff --git a/packages/llm-sdk/src/adapters/base.ts b/packages/llm-sdk/src/adapters/base.ts
index dca81e2..0a38666 100644
--- a/packages/llm-sdk/src/adapters/base.ts
+++ b/packages/llm-sdk/src/adapters/base.ts
@@ -4,6 +4,7 @@ import type {
   ActionDefinition,
   StreamEvent,
   LLMConfig,
+  ResponseFormat,
   ToolDefinition,
   WebSearchConfig,
   ProviderToolRuntimeOptions,
@@ -17,6 +18,7 @@ export interface RequestLLMConfig {
   model?: string;
   temperature?: number;
   maxTokens?: number;
+  responseFormat?: ResponseFormat;
 }
 
 /**
@@ -285,6 +287,172 @@ export function normalizeObjectJsonSchema(
   return normalized;
 }
 
+/**
+ * Newer OpenAI model families (o1/o3/o4 reasoning, gpt-5.x) require
+ * `max_completion_tokens` instead of `max_tokens` and reject `temperature`
+ * on the Chat Completions endpoint.
+ */
+export function isOpenAIReasoningModel(modelId: string | undefined): boolean {
+  if (!modelId) return false;
+  return /^(o1|o3|o4|gpt-5)/i.test(modelId);
+}
+
+/**
+ * Build the token-limit + temperature fields for a Chat Completions payload,
+ * accounting for the o-series / gpt-5 parameter rename.
+ */
+export function buildOpenAITokenParams(
+  modelId: string | undefined,
+  maxTokens: number | undefined,
+  temperature: number | undefined,
+): Record<string, number | undefined> {
+  if (isOpenAIReasoningModel(modelId)) {
+    return { max_completion_tokens: maxTokens };
+  }
+  return { max_tokens: maxTokens, temperature };
+}
+
+/**
+ * Recursively walk a JSON Schema and drop keys the provider rejects.
+ */
+function stripSchemaKeys(
+  schema: unknown,
+  keysToDrop: ReadonlySet<string>,
+  options: {
+    forceAdditionalPropertiesFalse?: boolean;
+    renameKeys?: Record<string, string>;
+  } = {},
+): unknown {
+  if (Array.isArray(schema)) {
+    return schema.map((item) => stripSchemaKeys(item, keysToDrop, options));
+  }
+  if (!schema || typeof schema !== "object") return schema;
+
+  const out: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(
+    schema as Record<string, unknown>,
+  )) {
+    if (keysToDrop.has(key)) continue;
+    const renamed = options.renameKeys?.[key] ?? key;
+    out[renamed] = stripSchemaKeys(value, keysToDrop, options);
+  }
+
+  if (options.forceAdditionalPropertiesFalse && out.type === "object") {
+    out.additionalProperties = false;
+  }
+  return out;
+}
+
+/** OpenAI Chat Completions `response_format` payload. */
+export function toOpenAIResponseFormat(
+  rf: ResponseFormat | undefined,
+): Record<string, unknown> | undefined {
+  if (!rf) return undefined;
+  if (rf.type === "json_object") return { type: "json_object" };
+  return {
+    type: "json_schema",
+    json_schema: {
+      name: rf.json_schema.name,
+      schema: normalizeObjectJsonSchema(rf.json_schema.schema),
+      strict: rf.json_schema.strict ?? true,
+    },
+  };
+}
+
+/** OpenAI Responses API `text.format` payload (different shape than Chat Completions). */
+export function toOpenAIResponsesTextFormat(
+  rf: ResponseFormat | undefined,
+): Record<string, unknown> | undefined {
+  if (!rf || rf.type !== "json_schema") return undefined;
+  return {
+    type: "json_schema",
+    name: rf.json_schema.name,
+    schema: normalizeObjectJsonSchema(rf.json_schema.schema),
+    strict: rf.json_schema.strict ?? true,
+  };
+}
+
+/**
+ * Anthropic `output_config.format` payload.
+ *
+ * Anthropic's structured-output schema subset is narrower than OpenAI's:
+ * no numeric (minimum/maximum/multipleOf) or length (minLength/maxLength)
+ * constraints, and `additionalProperties: false` is required on every object.
+ */
+const ANTHROPIC_UNSUPPORTED_KEYS: ReadonlySet<string> = new Set([
+  "minimum",
+  "maximum",
+  "exclusiveMinimum",
+  "exclusiveMaximum",
+  "multipleOf",
+  "minLength",
+  "maxLength",
+  "minItems",
+  "maxItems",
+  "minProperties",
+  "maxProperties",
+  "pattern",
+  "$schema",
+]);
+
+export function toAnthropicOutputConfig(
+  rf: ResponseFormat | undefined,
+): Record<string, unknown> | undefined {
+  if (!rf || rf.type !== "json_schema") return undefined;
+  // Anthropic accepts `anyOf` but rejects `oneOf` — convert rather than strip,
+  // otherwise discriminated-union schemas silently lose their union semantics.
+  const schema = stripSchemaKeys(
+    rf.json_schema.schema,
+    ANTHROPIC_UNSUPPORTED_KEYS,
+    {
+      forceAdditionalPropertiesFalse: true,
+      renameKeys: { oneOf: "anyOf" },
+    },
+  ) as Record<string, unknown>;
+  return {
+    format: {
+      type: "json_schema",
+      schema,
+    },
+  };
+}
+
+/**
+ * Gemini `responseJsonSchema` payload.
+ *
+ * Gemini accepts an OpenAPI 3.0 subset and silently ignores unknown keywords;
+ * `oneOf`, `anyOf`, `$ref`, and `pattern` are not supported.
+ */
+const GEMINI_UNSUPPORTED_KEYS: ReadonlySet<string> = new Set([
+  "oneOf",
+  "anyOf",
+  "$ref",
+  "$defs",
+  "definitions",
+  "pattern",
+  "$schema",
+  "additionalProperties",
+]);
+
+export function toGeminiSchema(
+  rf: ResponseFormat | undefined,
+): Record<string, unknown> | undefined {
+  if (!rf || rf.type !== "json_schema") return undefined;
+  return stripSchemaKeys(
+    rf.json_schema.schema,
+    GEMINI_UNSUPPORTED_KEYS,
+  ) as Record<string, unknown>;
+}
+
+/** Ollama `format` field — `"json"` for free-form, schema object for constrained. */
+export function toOllamaFormat(
+  rf: ResponseFormat | undefined,
+): string | Record<string, unknown> | undefined {
+  if (!rf) return undefined;
+  if (rf.type === "json_object") return "json";
+  return rf.json_schema.schema;
+}
+
 /**
  * Convert actions to OpenAI tool format
  */
diff --git a/packages/llm-sdk/src/adapters/google.ts b/packages/llm-sdk/src/adapters/google.ts
index 83d44e1..373a93f 100644
--- a/packages/llm-sdk/src/adapters/google.ts
+++ b/packages/llm-sdk/src/adapters/google.ts
@@ -18,7 +18,7 @@ import type {
   ChatCompletionRequest,
   CompletionResult,
 } from "./base";
-import { formatTools, logProviderPayload } from "./base";
+import { formatTools, logProviderPayload, toGeminiSchema } from "./base";
 
 // ============================================
 // Types
@@ -372,6 +372,15 @@ export class GoogleAdapter implements LLMAdapter {
     // Emit message start
     yield { type: "message:start", id: messageId };
 
+    const responseFormat = request.config?.responseFormat;
+    const geminiSchema = toGeminiSchema(responseFormat);
+    const responseFormatGenConfig: Record<string, unknown> = responseFormat
+      ? {
+          responseMimeType: "application/json",
+          ...(geminiSchema ? { responseJsonSchema: geminiSchema } : {}),
+        }
+      : {};
+
     try {
       logProviderPayload(
         "google",
@@ -386,6 +395,7 @@ export class GoogleAdapter implements LLMAdapter {
           generationConfig: {
             temperature: request.config?.temperature ?? this.config.temperature,
             maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+            ...responseFormatGenConfig,
           },
           messageParts: mergedContents[mergedContents.length - 1]?.parts,
         },
@@ -401,6 +411,7 @@ export class GoogleAdapter implements LLMAdapter {
         generationConfig: {
           temperature: request.config?.temperature ?? this.config.temperature,
           maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+          ...responseFormatGenConfig,
         },
       });
 
@@ -647,6 +658,15 @@ export class GoogleAdapter implements LLMAdapter {
 
     const tools = formatToolsForGemini(request.actions);
 
+    const responseFormat = request.config?.responseFormat;
+    const geminiSchema = toGeminiSchema(responseFormat);
+    const responseFormatGenConfig: Record<string, unknown> = responseFormat
+      ? {
+          responseMimeType: "application/json",
+          ...(geminiSchema ? { responseJsonSchema: geminiSchema } : {}),
+        }
+      : {};
+
     const payload = {
       model: modelId,
       history: mergedContents.slice(0, -1),
@@ -657,6 +677,7 @@ export class GoogleAdapter implements LLMAdapter {
       generationConfig: {
         temperature: request.config?.temperature ?? this.config.temperature,
         maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+        ...responseFormatGenConfig,
       },
       messageParts: mergedContents[mergedContents.length - 1]?.parts,
     };
@@ -670,6 +691,7 @@ export class GoogleAdapter implements LLMAdapter {
       generationConfig: {
         temperature: request.config?.temperature ?? this.config.temperature,
         maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
+        ...responseFormatGenConfig,
       },
     });
 
diff --git a/packages/llm-sdk/src/adapters/ollama.ts b/packages/llm-sdk/src/adapters/ollama.ts
index 866a82c..88085ed 100644
--- a/packages/llm-sdk/src/adapters/ollama.ts
+++ b/packages/llm-sdk/src/adapters/ollama.ts
@@ -5,7 +5,12 @@ import type {
 } from "../core/stream-events";
 import { generateMessageId, generateToolCallId } from "../core/utils";
 import type { LLMAdapter, ChatCompletionRequest } from "./base";
-import { formatMessages, formatTools, logProviderPayload } from "./base";
+import {
+  formatMessages,
+  formatTools,
+  logProviderPayload,
+  toOllamaFormat,
+} from "./base";
 import type { OllamaModelOptions } from "../providers/types";
 
 /**
@@ -288,12 +293,14 @@ export class OllamaAdapter implements LLMAdapter {
         Object.assign(ollamaOptions, this.config.options);
       }
 
+      const ollamaFormat = toOllamaFormat(request.config?.responseFormat);
       const payload = {
         model: request.config?.model || this.model,
         messages,
         tools,
         stream: true,
         options: ollamaOptions,
+        ...(ollamaFormat !== undefined ? { format: ollamaFormat } : {}),
       };
       logProviderPayload("ollama", "request payload", payload, request.debug);
       const response = await fetch(`${this.baseUrl}/api/chat`, {
diff --git a/packages/llm-sdk/src/adapters/openai.ts b/packages/llm-sdk/src/adapters/openai.ts
index d0795ec..114ecec 100644
--- a/packages/llm-sdk/src/adapters/openai.ts
+++ b/packages/llm-sdk/src/adapters/openai.ts
@@ -12,10 +12,13 @@ import type {
   CompletionResult,
 } from "./base";
 import {
+  buildOpenAITokenParams,
   formatMessagesForOpenAI,
   formatTools,
   logProviderPayload,
   normalizeObjectJsonSchema,
+  toOpenAIResponseFormat,
+  toOpenAIResponsesTextFormat,
 } from "./base";
 
 /**
@@ -225,6 +228,9 @@ export class OpenAIAdapter implements LLMAdapter {
   ): Promise<CompletionResult> {
     const client = await this.getClient();
     const openaiToolOptions = request.providerToolOptions?.openai;
+    const responsesTextFormat = toOpenAIResponsesTextFormat(
+      request.config?.responseFormat,
+    );
     const payload = {
       model: request.config?.model || this.model,
       instructions: request.systemPrompt,
@@ -239,6 +245,7 @@ export class OpenAIAdapter implements LLMAdapter {
       parallel_tool_calls: openaiToolOptions?.parallelToolCalls,
       temperature: request.config?.temperature ?? this.config.temperature,
       max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+      ...(responsesTextFormat ? { text: { format: responsesTextFormat } } : {}),
       stream: false,
     };
 
@@ -427,15 +434,20 @@ export class OpenAIAdapter implements LLMAdapter {
               },
             }
           : openaiToolOptions?.toolChoice;
+      const modelIdForPayload = request.config?.model || this.model;
       const payload = {
-        model: request.config?.model || this.model,
+        model: modelIdForPayload,
         messages,
         tools: tools.length > 0 ? tools : undefined,
         tool_choice: tools.length > 0 ? toolChoice : undefined,
         parallel_tool_calls:
           tools.length > 0 ? openaiToolOptions?.parallelToolCalls : undefined,
-        temperature: request.config?.temperature ?? this.config.temperature,
-        max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+        ...buildOpenAITokenParams(
+          modelIdForPayload,
+          request.config?.maxTokens ?? this.config.maxTokens,
+          request.config?.temperature ?? this.config.temperature,
+        ),
+        response_format: toOpenAIResponseFormat(request.config?.responseFormat),
         stream: true,
         stream_options: { include_usage: true },
       };
@@ -654,15 +666,20 @@ export class OpenAIAdapter implements LLMAdapter {
           }
         : openaiToolOptions?.toolChoice;
 
+    const modelIdForCompletePayload = request.config?.model || this.model;
     const payload = {
-      model: request.config?.model || this.model,
+      model: modelIdForCompletePayload,
       messages,
       tools: tools.length > 0 ? tools : undefined,
       tool_choice: tools.length > 0 ? toolChoice : undefined,
       parallel_tool_calls:
         tools.length > 0 ? openaiToolOptions?.parallelToolCalls : undefined,
-      temperature: request.config?.temperature ?? this.config.temperature,
-      max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
+      ...buildOpenAITokenParams(
+        modelIdForCompletePayload,
+        request.config?.maxTokens ?? this.config.maxTokens,
+        request.config?.temperature ?? this.config.temperature,
+      ),
+      response_format: toOpenAIResponseFormat(request.config?.responseFormat),
       stream: false,
     };
 
diff --git a/packages/llm-sdk/src/core/generate-text.ts b/packages/llm-sdk/src/core/generate-text.ts
index 0d9a099..6249969 100644
--- a/packages/llm-sdk/src/core/generate-text.ts
+++ b/packages/llm-sdk/src/core/generate-text.ts
@@ -47,6 +47,12 @@ export async function generateText(
 ): Promise<GenerateTextResult> {
   const { model, tools, maxSteps = 1, signal } = params;
 
+  if (params.responseFormat && model.capabilities.supportsJsonMode === false) {
+    console.warn(
+      `[llm-sdk] ${model.provider}/${model.modelId} does not support structured output (responseFormat); the request will be sent but the provider may ignore it.`,
+    );
+  }
+
   // Build initial messages
   let messages = buildMessages(params);
   const steps: GenerateStep[] = [];
@@ -71,6 +77,7 @@ export async function generateText(
       tools: formattedTools,
       temperature: params.temperature,
       maxTokens: params.maxTokens,
+      responseFormat: params.responseFormat,
       signal,
     });
 
diff --git a/packages/llm-sdk/src/core/stream-events.ts b/packages/llm-sdk/src/core/stream-events.ts
index 808fb87..d89f5be 100644
--- a/packages/llm-sdk/src/core/stream-events.ts
+++ b/packages/llm-sdk/src/core/stream-events.ts
@@ -286,12 +286,31 @@ export type StreamEvent =
   | ThreadCreatedEvent
   | DoneEvent;
 
+/**
+ * Structured-output / JSON-mode request format.
+ *
+ * Uses OpenAI's `response_format` shape as the unified surface; each adapter
+ * translates to its provider's native field (Anthropic `output_config`,
+ * Gemini `responseJsonSchema`, Ollama `format`, etc.).
+ */
+export type ResponseFormat =
+  | { type: "json_object" }
+  | {
+      type: "json_schema";
+      json_schema: {
+        name: string;
+        schema: Record<string, unknown>;
+        strict?: boolean;
+      };
+    };
+
 /**
  * LLM configuration
  */
 export interface LLMConfig {
   temperature?: number;
   maxTokens?: number;
+  responseFormat?: ResponseFormat;
 }
 
 /**
diff --git a/packages/llm-sdk/src/core/stream-text.ts b/packages/llm-sdk/src/core/stream-text.ts
index 41ab6f2..8904d4f 100644
--- a/packages/llm-sdk/src/core/stream-text.ts
+++ b/packages/llm-sdk/src/core/stream-text.ts
@@ -50,6 +50,12 @@ export async function streamText(
 ): Promise<StreamTextResult> {
   const { model, tools, maxSteps = 1, signal } = params;
 
+  if (params.responseFormat && model.capabilities.supportsJsonMode === false) {
+    console.warn(
+      `[llm-sdk] ${model.provider}/${model.modelId} does not support structured output (responseFormat); the request will be sent but the provider may ignore it.`,
+    );
+  }
+
   // State for collecting results
   let fullText = "";
   let finalUsage: TokenUsage = {
@@ -90,6 +96,7 @@ export async function streamText(
           tools: formattedTools,
           temperature: params.temperature,
           maxTokens: params.maxTokens,
+          responseFormat: params.responseFormat,
           signal,
         })) {
           switch (chunk.type) {
diff --git a/packages/llm-sdk/src/core/types.ts b/packages/llm-sdk/src/core/types.ts
index ae9dad7..53a16e9 100644
--- a/packages/llm-sdk/src/core/types.ts
+++ b/packages/llm-sdk/src/core/types.ts
@@ -207,6 +207,8 @@ export interface DoGenerateParams {
   temperature?: number;
   /** Maximum tokens to generate */
   maxTokens?: number;
+  /** Structured-output / JSON-mode request format (provider-translated) */
+  responseFormat?: import("./stream-events").ResponseFormat;
   /** Abort signal */
   signal?: AbortSignal;
 }
@@ -312,6 +314,8 @@ export interface GenerateTextParams {
   temperature?: number;
   /** Maximum tokens to generate */
   maxTokens?: number;
+  /** Structured-output / JSON-mode request format */
+  responseFormat?: import("./stream-events").ResponseFormat;
   /** Abort signal */
   signal?: AbortSignal;
 }
diff --git a/packages/llm-sdk/src/providers/anthropic/index.ts b/packages/llm-sdk/src/providers/anthropic/index.ts
index 9e9c3e7..75e9021 100644
--- a/packages/llm-sdk/src/providers/anthropic/index.ts
+++ b/packages/llm-sdk/src/providers/anthropic/index.ts
@@ -157,7 +157,8 @@ export function createAnthropic(
         "image/gif",
         "image/webp",
       ],
-      supportsJsonMode: false,
+      // Native `output_config.format` — GA on Claude 3.5 and newer.
+      supportsJsonMode: true,
       supportsSystemMessages: true,
     };
   };
diff --git a/packages/llm-sdk/src/providers/anthropic/provider.ts b/packages/llm-sdk/src/providers/anthropic/provider.ts
index 4b48b6e..e802fe2 100644
--- a/packages/llm-sdk/src/providers/anthropic/provider.ts
+++ b/packages/llm-sdk/src/providers/anthropic/provider.ts
@@ -22,6 +22,7 @@ import type {
   FinishReason,
   CoreMessage,
 } from "../../core/types";
+import { toAnthropicOutputConfig } from "../../adapters/base";
 
 // ============================================
 // Model Definitions
@@ -32,6 +33,12 @@ interface AnthropicModelConfig {
   tools: boolean;
   thinking: boolean;
   pdf: boolean;
+  /**
+   * Native structured-output (`output_config.format`) support — GA on Claude
+   * API and Bedrock as of late 2025 for Claude 3.5 and newer. Older Claude 3
+   * base models must use a forced-tool fallback.
+   */
+  jsonMode: boolean;
   maxTokens: number;
 }
 
@@ -42,6 +49,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: true,
     pdf: true,
+    jsonMode: true,
     maxTokens: 200000,
   },
   "claude-opus-4-20250514": {
@@ -49,6 +57,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: true,
     pdf: true,
+    jsonMode: true,
     maxTokens: 200000,
   },
 
@@ -58,6 +67,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: true,
     pdf: true,
+    jsonMode: true,
     maxTokens: 200000,
   },
   "claude-3-7-sonnet-latest": {
@@ -65,6 +75,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: true,
     pdf: true,
+    jsonMode: true,
     maxTokens: 200000,
   },
 
@@ -74,6 +85,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: false,
     pdf: true,
+    jsonMode: true,
     maxTokens: 200000,
   },
   "claude-3-5-sonnet-latest": {
@@ -81,6 +93,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: false,
     pdf: true,
+    jsonMode: true,
     maxTokens: 200000,
   },
   "claude-3-5-haiku-20241022": {
@@ -88,6 +101,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: false,
     pdf: false,
+    jsonMode: true,
     maxTokens: 200000,
   },
   "claude-3-5-haiku-latest": {
@@ -95,6 +109,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: false,
     pdf: false,
+    jsonMode: true,
     maxTokens: 200000,
   },
 
@@ -104,6 +119,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: false,
     pdf: false,
+    jsonMode: false,
     maxTokens: 200000,
   },
   "claude-3-sonnet-20240229": {
@@ -111,6 +127,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: false,
     pdf: false,
+    jsonMode: false,
     maxTokens: 200000,
   },
   "claude-3-haiku-20240307": {
@@ -118,6 +135,7 @@ const ANTHROPIC_MODELS: Record<string, AnthropicModelConfig> = {
     tools: true,
     thinking: false,
     pdf: false,
+    jsonMode: false,
     maxTokens: 200000,
   },
 };
@@ -175,7 +193,7 @@ export function anthropic(
       supportsVision: modelConfig.vision,
       supportsTools: modelConfig.tools,
       supportsStreaming: true,
-      supportsJsonMode: false,
+      supportsJsonMode: modelConfig.jsonMode,
       supportsThinking: modelConfig.thinking,
       supportsPDF: modelConfig.pdf,
       maxTokens: modelConfig.maxTokens,
@@ -209,6 +227,11 @@ export function anthropic(
         };
       }
 
+      const outputConfig = toAnthropicOutputConfig(params.responseFormat);
+      if (outputConfig) {
+        requestOptions.output_config = outputConfig;
+      }
+
       const response = await client.messages.create(requestOptions);
 
       // Parse response
@@ -266,6 +289,11 @@ export function anthropic(
         };
       }
 
+      const outputConfig = toAnthropicOutputConfig(params.responseFormat);
+      if (outputConfig) {
+        requestOptions.output_config = outputConfig;
+      }
+
       const stream = await client.messages.stream(requestOptions);
 
       let currentToolUse: { id: string; name: string; input: string } | null =
diff --git a/packages/llm-sdk/src/providers/fireworks/provider.ts b/packages/llm-sdk/src/providers/fireworks/provider.ts
index 78b3cdb..49d17de 100644
--- a/packages/llm-sdk/src/providers/fireworks/provider.ts
+++ b/packages/llm-sdk/src/providers/fireworks/provider.ts
@@ -25,6 +25,7 @@ import type {
   FinishReason,
   CoreMessage,
 } from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
 
 // ============================================
 // Provider Options
@@ -107,6 +108,11 @@ export function fireworks(
         requestBody.tools = params.tools;
       }
 
+      const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+      if (responseFormat) {
+        requestBody.response_format = responseFormat;
+      }
+
       const response = await client.chat.completions.create(requestBody);
       const choice = response.choices[0];
       const message = choice.message;
@@ -148,6 +154,11 @@ export function fireworks(
         requestBody.tools = params.tools;
       }
 
+      const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+      if (responseFormat) {
+        requestBody.response_format = responseFormat;
+      }
+
       const stream = await client.chat.completions.create(requestBody);
 
       // Track tool calls by index (Fireworks may repeat tc.id across chunks)
diff --git a/packages/llm-sdk/src/providers/google/provider.ts b/packages/llm-sdk/src/providers/google/provider.ts
index 2659ce6..6d55cf3 100644
--- a/packages/llm-sdk/src/providers/google/provider.ts
+++ b/packages/llm-sdk/src/providers/google/provider.ts
@@ -25,6 +25,7 @@ import type {
   FinishReason,
   CoreMessage,
 } from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
 
 // ============================================
 // Model Definitions
@@ -211,6 +212,7 @@ export function google(
         tools: params.tools as any,
         temperature: params.temperature,
         max_tokens: params.maxTokens,
+        response_format: toOpenAIResponseFormat(params.responseFormat),
       });
 
       const choice = response.choices[0];
@@ -249,6 +251,7 @@ export function google(
         tools: params.tools as any,
         temperature: params.temperature,
         max_tokens: params.maxTokens,
+        response_format: toOpenAIResponseFormat(params.responseFormat),
         stream: true,
       });
 
diff --git a/packages/llm-sdk/src/providers/ollama/index.ts b/packages/llm-sdk/src/providers/ollama/index.ts
index 91377e8..b87cc39 100644
--- a/packages/llm-sdk/src/providers/ollama/index.ts
+++ b/packages/llm-sdk/src/providers/ollama/index.ts
@@ -204,7 +204,8 @@ export function createOllama(config: OllamaProviderConfig = {}): AIProvider {
       supportedImageTypes: model.vision
         ? ["image/png", "image/jpeg", "image/gif"]
         : [],
-      supportsJsonMode: false,
+      // Ollama 0.5+ supports `format: "json"` and JSON-schema constrained output.
+      supportsJsonMode: true,
       supportsSystemMessages: true,
     };
   };
diff --git a/packages/llm-sdk/src/providers/openai/provider.ts b/packages/llm-sdk/src/providers/openai/provider.ts
index 4c6d6e8..fc9a153 100644
--- a/packages/llm-sdk/src/providers/openai/provider.ts
+++ b/packages/llm-sdk/src/providers/openai/provider.ts
@@ -26,6 +26,10 @@ import type {
   FinishReason,
   CoreMessage,
 } from "../../core/types";
+import {
+  buildOpenAITokenParams,
+  toOpenAIResponseFormat,
+} from "../../adapters/base";
 
 // ============================================
 // Model Definitions
@@ -194,8 +198,12 @@ export function openai(
         model: modelId,
         messages,
         tools: params.tools as any,
-        temperature: params.temperature,
-        max_tokens: params.maxTokens,
+        ...buildOpenAITokenParams(
+          modelId,
+          params.maxTokens,
+          params.temperature,
+        ),
+        response_format: toOpenAIResponseFormat(params.responseFormat),
       });
 
       const choice = response.choices[0];
@@ -232,8 +240,12 @@ export function openai(
         model: modelId,
         messages,
         tools: params.tools as any,
-        temperature: params.temperature,
-        max_tokens: params.maxTokens,
+        ...buildOpenAITokenParams(
+          modelId,
+          params.maxTokens,
+          params.temperature,
+        ),
+        response_format: toOpenAIResponseFormat(params.responseFormat),
         stream: true,
       });
 
diff --git a/packages/llm-sdk/src/providers/openrouter/provider.ts b/packages/llm-sdk/src/providers/openrouter/provider.ts
index f85d858..713ee90 100644
--- a/packages/llm-sdk/src/providers/openrouter/provider.ts
+++ b/packages/llm-sdk/src/providers/openrouter/provider.ts
@@ -25,6 +25,7 @@ import type {
   FinishReason,
   CoreMessage,
 } from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
 
 // ============================================
 // Model Configuration
@@ -170,6 +171,11 @@ export function openrouter(
         requestBody.provider = options.providerPreferences;
       }
 
+      const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+      if (responseFormat) {
+        requestBody.response_format = responseFormat;
+      }
+
       const response = await client.chat.completions.create(requestBody);
 
       const choice = response.choices[0];
@@ -221,6 +227,11 @@ export function openrouter(
         requestBody.provider = options.providerPreferences;
       }
 
+      const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+      if (responseFormat) {
+        requestBody.response_format = responseFormat;
+      }
+
       const stream = await client.chat.completions.create(requestBody);
 
       // Track current tool call being built
diff --git a/packages/llm-sdk/src/providers/togetherai/provider.ts b/packages/llm-sdk/src/providers/togetherai/provider.ts
index 7c1c718..e016cfa 100644
--- a/packages/llm-sdk/src/providers/togetherai/provider.ts
+++ b/packages/llm-sdk/src/providers/togetherai/provider.ts
@@ -29,6 +29,7 @@ import type {
   FinishReason,
   CoreMessage,
 } from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
 
 // ============================================
 // Provider Options
@@ -116,6 +117,11 @@ export function togetherai(
         requestBody.tools = params.tools;
       }
 
+      const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+      if (responseFormat) {
+        requestBody.response_format = responseFormat;
+      }
+
       const response = await client.chat.completions.create(requestBody);
       const choice = response.choices[0];
       const message = choice.message;
@@ -157,6 +163,11 @@ export function togetherai(
         requestBody.tools = params.tools;
       }
 
+      const responseFormat = toOpenAIResponseFormat(params.responseFormat);
+      if (responseFormat) {
+        requestBody.response_format = responseFormat;
+      }
+
       const stream = await client.chat.completions.create(requestBody);
 
       // Track tool calls by index
diff --git a/packages/llm-sdk/src/providers/xai/index.ts b/packages/llm-sdk/src/providers/xai/index.ts
index 4bc8d00..0eca6df 100644
--- a/packages/llm-sdk/src/providers/xai/index.ts
+++ b/packages/llm-sdk/src/providers/xai/index.ts
@@ -180,7 +180,8 @@ export function createXAI(config: XAIProviderConfig = {}): AIProvider {
       supportedImageTypes: model.vision
         ? ["image/png", "image/jpeg", "image/gif", "image/webp"]
         : [],
-      supportsJsonMode: false,
+      // xAI accepts OpenAI-compatible `response_format` on grok-2-1212+.
+      supportsJsonMode: true,
       supportsSystemMessages: true,
     };
   };
diff --git a/packages/llm-sdk/src/providers/xai/provider.ts b/packages/llm-sdk/src/providers/xai/provider.ts
index ab773c6..959e135 100644
--- a/packages/llm-sdk/src/providers/xai/provider.ts
+++ b/packages/llm-sdk/src/providers/xai/provider.ts
@@ -25,6 +25,7 @@ import type {
   FinishReason,
   CoreMessage,
 } from "../../core/types";
+import { toOpenAIResponseFormat } from "../../adapters/base";
 
 // ============================================
 // Model Definitions
@@ -136,7 +137,7 @@ export function xai(
       supportsVision: modelConfig.vision,
       supportsTools: modelConfig.tools,
       supportsStreaming: true,
-      supportsJsonMode: false, // xAI doesn't support JSON mode yet
+      supportsJsonMode: true, // OpenAI-compatible `response_format`
       supportsThinking: false,
       supportsPDF: false,
       maxTokens: modelConfig.maxTokens,
@@ -156,6 +157,7 @@ export function xai(
         tools: params.tools as any,
         temperature: params.temperature,
         max_tokens: params.maxTokens,
+        response_format: toOpenAIResponseFormat(params.responseFormat),
       });
 
       const choice = response.choices[0];
@@ -194,6 +196,7 @@ export function xai(
         tools: params.tools as any,
         temperature: params.temperature,
         max_tokens: params.maxTokens,
+        response_format: toOpenAIResponseFormat(params.responseFormat),
         stream: true,
       });
 
diff --git a/packages/llm-sdk/src/server/types.ts b/packages/llm-sdk/src/server/types.ts
index a2c0d8e..d182309 100644
--- a/packages/llm-sdk/src/server/types.ts
+++ b/packages/llm-sdk/src/server/types.ts
@@ -1,6 +1,7 @@
 import type {
   ActionDefinition,
   KnowledgeBaseConfig,
+  ResponseFormat,
   ToolDefinition,
   ToolProfile,
   WebSearchConfig,
@@ -222,7 +223,11 @@ export interface ChatRequest {
   /** Bot ID (for cloud) */
   botId?: string;
   /** LLM config overrides */
-  config?: { temperature?: number; maxTokens?: number };
+  config?: {
+    temperature?: number;
+    maxTokens?: number;
+    responseFormat?: ResponseFormat;
+  };
   /** System prompt override */
   systemPrompt?: string;
   /** Actions from client (legacy) */