feat: [ENG-3645] AI Gateway Reasoning Support - Anthropic (#5303)

connortbot · web-flow · commit d03a379ade97 · 2025-12-01T11:18:53.000-08:00
* anthropic thinking support on AI Gateway

* update responses&lt;-&gt;completions mappers for reasoning options and effort

* nit

* nits

* fix accumulation of thinking details bug
diff --git a/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap b/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap
@@ -706,7 +706,7 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = `
       "context": 200000,
       "crossRegion": false,
       "maxTokens": 8192,
-      "modelId": "anthropic/claude-4.5-haiku",
+      "modelId": "anthropic/claude-haiku-4.5",
       "parameters": [
         "max_tokens",
         "stop",
diff --git a/packages/cost/models/authors/anthropic/claude-4.5-haiku/endpoints.ts b/packages/cost/models/authors/anthropic/claude-4.5-haiku/endpoints.ts
@@ -110,7 +110,7 @@ export const endpoints = {
   "claude-4.5-haiku:openrouter": {
     provider: "openrouter",
     author: "anthropic",
-    providerModelId: "anthropic/claude-4.5-haiku",
+    providerModelId: "anthropic/claude-haiku-4.5",
     pricing: [
       {
         threshold: 0,
diff --git a/packages/llm-mapper/transform/providers/anthropic/response/toOpenai.ts b/packages/llm-mapper/transform/providers/anthropic/response/toOpenai.ts
@@ -16,16 +16,26 @@ export function toOpenAI(response: AnthropicResponseBody): OpenAIResponseBody {
   const toolUseBlocks = response.content.filter(
     (block) => block.type === "tool_use"
   );
+  const thinkingBlocks = response.content.filter(
+    (block) => block.type === "thinking"
+  );
 
   const { content, annotations } = buildContentAndAnnotations(textBlocks);
 
   const tool_calls = mapToolCalls(toolUseBlocks);
+  const reasoning = thinkingBlocks.map((b) => b.thinking || "").join("");
+  const reasoning_details = thinkingBlocks.map((b) => ({
+    thinking: b.thinking || "",
+    signature: b.signature || "",
+  }));
 
   const choice: OpenAIChoice = {
     index: 0,
     message: {
       role: "assistant",
       content: content || null,
+      ...(reasoning && { reasoning }),
+      ...(reasoning_details.length > 0 && { reasoning_details }),
       ...(tool_calls.length > 0 && { tool_calls }),
       ...(annotations.length > 0 && { annotations }),
     },
diff --git a/packages/llm-mapper/transform/providers/anthropic/streamedResponse/toOpenai.ts b/packages/llm-mapper/transform/providers/anthropic/streamedResponse/toOpenai.ts
@@ -25,6 +25,7 @@ export class AnthropicToOpenAIStreamConverter {
   private nextToolCallIndex: number = 0;
   private annotations: OpenAIAnnotation[] = [];
   private currentContentLength: number = 0;
+  private thinkingBlockState: Map<number, { thinking: string; signature: string }> = new Map(); // Track thinking blocks with their signatures
 
   constructor() {
     this.created = Math.floor(Date.now() / 1000);
@@ -72,6 +73,7 @@ export class AnthropicToOpenAIStreamConverter {
         this.nextToolCallIndex = 0;
         this.annotations = [];
         this.currentContentLength = 0;
+        this.thinkingBlockState.clear();
 
         chunks.push(
           this.createChunk({
@@ -153,6 +155,9 @@ export class AnthropicToOpenAIStreamConverter {
               ],
             })
           );
+        } else if (event.content_block.type === "thinking") {
+          // Initialize thinking block state for this index
+          this.thinkingBlockState.set(event.index, { thinking: "", signature: "" });
         } else if (
           event.content_block.type === "web_search_tool_result" ||
           event.content_block.type === "server_tool_use"
@@ -218,6 +223,31 @@ export class AnthropicToOpenAIStreamConverter {
               );
             }
           }
+        } else if (event.delta.type === "thinking_delta") {
+          // Accumulate thinking content for this block
+          const thinkingState = this.thinkingBlockState.get(event.index);
+          if (thinkingState) {
+            thinkingState.thinking += event.delta.thinking;
+          }
+
+          chunks.push(
+            this.createChunk({
+              choices: [
+                {
+                  index: 0,
+                  delta: { reasoning: event.delta.thinking },
+                  logprobs: null,
+                  finish_reason: null,
+                },
+              ],
+            })
+          );
+        } else if (event.delta.type === "signature_delta") {
+          // Accumulate signature for this thinking block
+          const thinkingState = this.thinkingBlockState.get(event.index);
+          if (thinkingState) {
+            thinkingState.signature += event.delta.signature;
+          }
         } else if (event.delta.type === "citations_delta") {
           // Collect citations - will be sent at the end in message_delta
           const citation = event.delta.citation;
@@ -287,6 +317,10 @@ export class AnthropicToOpenAIStreamConverter {
 
         const finishReason = this.mapStopReason(event.delta.stop_reason);
 
+        // Collect reasoning_details from accumulated thinking blocks
+        const reasoning_details = Array.from(this.thinkingBlockState.values())
+          .filter(state => state.thinking || state.signature);
+
         chunks.push(
           this.createChunk({
             choices: [
@@ -296,6 +330,9 @@ export class AnthropicToOpenAIStreamConverter {
                   ...(this.annotations.length > 0 && {
                     annotations: this.annotations,
                   }),
+                  ...(reasoning_details.length > 0 && {
+                    reasoning_details,
+                  }),
                 },
                 logprobs: null,
                 finish_reason: finishReason,
diff --git a/packages/llm-mapper/transform/providers/openai/request/toAnthropic.ts b/packages/llm-mapper/transform/providers/openai/request/toAnthropic.ts
@@ -31,6 +31,24 @@ export function toAnthropic(
     stream: openAIBody.stream ?? undefined,
   };
 
+  if (openAIBody.reasoning_effort) {
+    if (!openAIBody.reasoning_options) {
+      throw new Error("reasoning_options are required for Anthropic models");
+    }
+    
+    if (!openAIBody.reasoning_options.budget_tokens) {
+      throw new Error("reasoning_options.budget_tokens are required for Anthropic models");
+    }
+
+    antBody.thinking = {
+      type: "enabled",
+      budget_tokens: openAIBody.reasoning_options.budget_tokens,
+    };
+
+    // temperature 1 is required for Anthropic models to enable reasoning
+    antBody.temperature = 1;
+  }
+
   if (openAIBody.stop) {
     antBody.stop_sequences = Array.isArray(openAIBody.stop)
       ? openAIBody.stop
@@ -299,8 +317,8 @@ function extractSystemMessage(
       systemMessageBlocks.push({
         type: "text",
         text: convertedBlock,
-        ...(includeCache && (msg as any).cache_control
-          ? { cache_control: (msg as any).cache_control }
+        ...(includeCache && msg.cache_control
+          ? { cache_control: msg.cache_control }
           : {}),
       });
     } else {
@@ -335,8 +353,8 @@ function mapMessages(
             type: "tool_result",
             tool_use_id: message.tool_call_id,
             content: typeof message.content === "string" ? message.content : "",
-            ...(includeCache && (message as any).cache_control
-              ? { cache_control: (message as any).cache_control }
+            ...(includeCache && message.cache_control
+              ? { cache_control: message.cache_control }
               : {}),
           },
         ],
@@ -364,8 +382,29 @@ function mapMessages(
           }
         });
       }
-      
+
+      const hasReasoningDetails = message.role === "assistant" && message.reasoning_details && message.reasoning_details?.length > 0;
+      const hasReasoning = message.role === "assistant" && !!message.reasoning;
       let processedContent: string | AnthropicContentBlock[] = [];
+
+      // Thinking blocks MUST be first in Anthropic format to be valid
+      if (message.reasoning_details && hasReasoningDetails) {
+        // Use reasoning_details when available (includes signatures for multi-turn)
+        for (const detail of message.reasoning_details) {
+          processedContent.push({
+            type: "thinking",
+            thinking: detail.thinking,
+            signature: detail.signature,
+          });
+        }
+      } else if (hasReasoning) {
+        // Fallback to simple reasoning string (no signature - may fail on multi-turn)
+        processedContent.push({
+          type: "thinking",
+          thinking: message.reasoning,
+        });
+      }
+
       if (message.content) {
         const convertedContent = openAIContentToAnthropicContent(
           message.content,
@@ -374,7 +413,7 @@ function mapMessages(
         if (typeof convertedContent === "string") {
           // if the message requires forming a content array
           const hasMsgCache = includeCache && !!(message as any).cache_control;
-          if (hasMsgCache || processedToolCallContent.length > 0) {
+          if (hasMsgCache || processedToolCallContent.length > 0 || hasReasoning) {
             processedContent.push({
               type: "text",
               text: convertedContent,
@@ -384,7 +423,7 @@ function mapMessages(
             });
           } else {
             // there was no cache control breakpoint, the content was just string,
-            // and no tool calls, so we create a non-content array message.
+            // no tool calls, and no reasoning, so we create a non-content array message.
             antMessage.content = convertedContent;
             return antMessage;
           }
diff --git a/packages/llm-mapper/transform/providers/responses/request/toChatCompletions.ts b/packages/llm-mapper/transform/providers/responses/request/toChatCompletions.ts
@@ -2,7 +2,6 @@ import {
   ResponsesRequestBody,
   ResponsesInputItem,
   ResponsesMessageInputItem,
-  ResponsesInputContentPart,
 } from "../../../types/responses";
 import {
   HeliconeChatCreateParams,
@@ -138,6 +137,11 @@ export function toChatCompletions(
     }
   }
 
+  let reasoning_effort: HeliconeChatCreateParams["reasoning_effort"] | undefined;
+  if (body.reasoning) {
+    reasoning_effort = body.reasoning.effort === "minimal" ? "low" : body.reasoning.effort;
+  }
+
   const heliconeBody: HeliconeChatCreateParams = {
     model: body.model,
     messages,
@@ -148,6 +152,8 @@ export function toChatCompletions(
     stream: body.stream,
     tools,
     tool_choice,
+    reasoning_effort,
+    reasoning_options: body.reasoning_options,
     frequency_penalty: body.frequency_penalty,
     presence_penalty: body.presence_penalty,
     logit_bias: body.logit_bias,
@@ -162,6 +168,7 @@ export function toChatCompletions(
     // Deprecated passthroughs (supported by Chat Completions clients)
     function_call: (body as any).function_call,
     functions: (body as any).functions,
+    ...(body.stream ? { stream_options: { include_usage: true } } : {}),
   } as HeliconeChatCreateParams;
 
   return heliconeBody;
diff --git a/packages/llm-mapper/transform/types/anthropic.ts b/packages/llm-mapper/transform/types/anthropic.ts
@@ -26,8 +26,16 @@ export interface AnthropicRequestBody {
   stream?: boolean;
   tools?: (AnthropicTool | AnthropicWebSearchTool)[];
   tool_choice?: AnthropicToolChoice;
+  thinking?: AnthropicThinkingConfig;
 }
 
+export type AnthropicThinkingConfig = {
+  type: "enabled";
+  budget_tokens: number;
+} | {
+  type: "disabled";
+};
+
 export interface AnthropicMessage {
   role: "user" | "assistant";
   content: string | AnthropicContentBlock[];
diff --git a/packages/llm-mapper/transform/types/openai.ts b/packages/llm-mapper/transform/types/openai.ts
@@ -26,9 +26,16 @@ export interface OpenAIChoice {
   logprobs: null | OpenAILogProbs;
 }
 
+export interface OpenAIReasoningDetail {
+  thinking: string;
+  signature: string;
+}
+
 export interface OpenAIResponseMessage {
   role: "assistant" | "system" | "user" | "function" | "tool";
   content: string | null;
+  reasoning?: string;
+  reasoning_details?: OpenAIReasoningDetail[];
   function_call?: OpenAIFunctionCall;
   tool_calls?: OpenAIToolCall[];
   annotations?: OpenAIAnnotation[];
@@ -65,6 +72,8 @@ export interface OpenAIStreamChoice {
 export interface OpenAIDelta {
   role?: Role;
   content?: string;
+  reasoning?: string;
+  reasoning_details?: OpenAIReasoningDetail[];
   function_call?: {
     name?: string;
     arguments?: string;
diff --git a/packages/llm-mapper/transform/types/responses.ts b/packages/llm-mapper/transform/types/responses.ts
@@ -85,6 +85,11 @@ export interface ResponsesRequestBody {
   previous_response_id?: string;
   reasoning?: {
     effort?: "low" | "medium" | "high" | "minimal";
+    summary?: "auto" | "concise" | "detailed";
+    generate_summary?: "auto" | "concise" | "detailed";
+  };
+  reasoning_options?: {
+    budget_tokens?: number;
   };
   text?: {
     verbosity?: "low" | "medium" | "high";
diff --git a/packages/prompts/types.ts b/packages/prompts/types.ts
@@ -117,12 +117,23 @@ export type HeliconeChatCompletionContentPart = (ChatCompletionContentPart | Cha
   cache_control?: CacheControl;
 };
 
+/**
+ * Reasoning detail for Anthropic thinking blocks with signatures.
+ * Required for multi-turn conversations with thinking enabled.
+ */
+export interface ReasoningDetail {
+  thinking: string;
+  signature: string;
+}
+
 /**
  * OpenAI message with optional cache control support
  */
 type HeliconeMessageParam<T> = Omit<T, 'content'> & {
   content: string | HeliconeChatCompletionContentPart[] | null;
   cache_control?: CacheControl;
+  reasoning?: string;
+  reasoning_details?: ReasoningDetail[];
 };
 
 export type HeliconeChatCompletionMessageParam = 
@@ -177,6 +188,12 @@ export type HeliconePromptParams = {
   inputs?: Record<string, any>;
 };
 
+export interface HeliconeReasoningOptions {
+  reasoning_options?: {
+    budget_tokens: number;
+  }
+};
+
 /**
  * OpenAI ChatCompletion parameters extended with Helicone prompt template support.
  * Use this type when creating non-streaming chat completions with Helicone prompts.
@@ -186,6 +203,12 @@ export type HeliconePromptParams = {
  * const response = await openai.chat.completions.create({
  *   prompt_id: "123",
  *   model: "gpt-4o",
+ *   
+ *   // Optional: only for reasoning models
+ *   reasoning_options: {
+ *     // For Anthropic models
+ *     budget_tokens: 1000,
+ *   },
  *   messages: [
  *     // Message-level cache control (string content)
  *     {
@@ -217,7 +240,7 @@ export type HeliconePromptParams = {
  * ```
  */
 export type HeliconeChatCreateParams = ChatCompletionCreateParamsNonStreamingPartialMessages &
-  HeliconePromptParams;
+  HeliconePromptParams & HeliconeReasoningOptions;
 
 /**
  * OpenAI ChatCompletion parameters extended with Helicone prompt template support for streaming responses.
diff --git a/valhalla/jawn/src/utils/streamParser.ts b/valhalla/jawn/src/utils/streamParser.ts
@@ -47,6 +47,13 @@ export function consolidateTextFields(responseBody: any[]): any {
                   content: c.delta.content
                     ? c.delta.content + (cur.choices[i].delta.content ?? "")
                     : cur.choices[i].delta.content,
+                  reasoning: c.delta.reasoning
+                    ? c.delta.reasoning + (cur.choices[i].delta.reasoning ?? "")
+                    : cur.choices[i].delta.reasoning,
+                  reasoning_details:
+                    c.delta.reasoning_details && cur.choices[i].delta.reasoning_details
+                      ? [...c.delta.reasoning_details, ...cur.choices[i].delta.reasoning_details]
+                      : c.delta.reasoning_details || cur.choices[i].delta.reasoning_details,
                   function_call: c.delta.function_call
                     ? recursivelyConsolidate(
                         c.delta.function_call,
diff --git a/worker/src/lib/ai-gateway/SimpleAIGateway.ts b/worker/src/lib/ai-gateway/SimpleAIGateway.ts
diff --git a/worker/src/lib/ai-gateway/validators/chat-completion-types.ts b/worker/src/lib/ai-gateway/validators/chat-completion-types.ts
diff --git a/worker/src/lib/ai-gateway/validators/responses-types.ts b/worker/src/lib/ai-gateway/validators/responses-types.ts

Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,7 @@ export const endpoints = {`
`110`	`110`	`"claude-4.5-haiku:openrouter": {`
`111`	`111`	`provider: "openrouter",`
`112`	`112`	`author: "anthropic",`
`113`		`- providerModelId: "anthropic/claude-4.5-haiku",`
	`113`	`+ providerModelId: "anthropic/claude-haiku-4.5",`
`114`	`114`	`pricing: [`
`115`	`115`	`{`
`116`	`116`	`threshold: 0,`