Skip to content

Commit d03a379

Browse files
authored
feat: [ENG-3645] AI Gateway Reasoning Support - Anthropic (#5303)
* anthropic thinking support on AI Gateway * update responses<->completions mappers for reasoning options and effort * nit * nits * fix accumulation of thinking details bug
1 parent c7f9651 commit d03a379

File tree

14 files changed

+177
-14
lines changed

14 files changed

+177
-14
lines changed

packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = `
706706
"context": 200000,
707707
"crossRegion": false,
708708
"maxTokens": 8192,
709-
"modelId": "anthropic/claude-4.5-haiku",
709+
"modelId": "anthropic/claude-haiku-4.5",
710710
"parameters": [
711711
"max_tokens",
712712
"stop",

packages/cost/models/authors/anthropic/claude-4.5-haiku/endpoints.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ export const endpoints = {
110110
"claude-4.5-haiku:openrouter": {
111111
provider: "openrouter",
112112
author: "anthropic",
113-
providerModelId: "anthropic/claude-4.5-haiku",
113+
providerModelId: "anthropic/claude-haiku-4.5",
114114
pricing: [
115115
{
116116
threshold: 0,

packages/llm-mapper/transform/providers/anthropic/response/toOpenai.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,26 @@ export function toOpenAI(response: AnthropicResponseBody): OpenAIResponseBody {
1616
const toolUseBlocks = response.content.filter(
1717
(block) => block.type === "tool_use"
1818
);
19+
const thinkingBlocks = response.content.filter(
20+
(block) => block.type === "thinking"
21+
);
1922

2023
const { content, annotations } = buildContentAndAnnotations(textBlocks);
2124

2225
const tool_calls = mapToolCalls(toolUseBlocks);
26+
const reasoning = thinkingBlocks.map((b) => b.thinking || "").join("");
27+
const reasoning_details = thinkingBlocks.map((b) => ({
28+
thinking: b.thinking || "",
29+
signature: b.signature || "",
30+
}));
2331

2432
const choice: OpenAIChoice = {
2533
index: 0,
2634
message: {
2735
role: "assistant",
2836
content: content || null,
37+
...(reasoning && { reasoning }),
38+
...(reasoning_details.length > 0 && { reasoning_details }),
2939
...(tool_calls.length > 0 && { tool_calls }),
3040
...(annotations.length > 0 && { annotations }),
3141
},

packages/llm-mapper/transform/providers/anthropic/streamedResponse/toOpenai.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export class AnthropicToOpenAIStreamConverter {
2525
private nextToolCallIndex: number = 0;
2626
private annotations: OpenAIAnnotation[] = [];
2727
private currentContentLength: number = 0;
28+
private thinkingBlockState: Map<number, { thinking: string; signature: string }> = new Map(); // Track thinking blocks with their signatures
2829

2930
constructor() {
3031
this.created = Math.floor(Date.now() / 1000);
@@ -72,6 +73,7 @@ export class AnthropicToOpenAIStreamConverter {
7273
this.nextToolCallIndex = 0;
7374
this.annotations = [];
7475
this.currentContentLength = 0;
76+
this.thinkingBlockState.clear();
7577

7678
chunks.push(
7779
this.createChunk({
@@ -153,6 +155,9 @@ export class AnthropicToOpenAIStreamConverter {
153155
],
154156
})
155157
);
158+
} else if (event.content_block.type === "thinking") {
159+
// Initialize thinking block state for this index
160+
this.thinkingBlockState.set(event.index, { thinking: "", signature: "" });
156161
} else if (
157162
event.content_block.type === "web_search_tool_result" ||
158163
event.content_block.type === "server_tool_use"
@@ -218,6 +223,31 @@ export class AnthropicToOpenAIStreamConverter {
218223
);
219224
}
220225
}
226+
} else if (event.delta.type === "thinking_delta") {
227+
// Accumulate thinking content for this block
228+
const thinkingState = this.thinkingBlockState.get(event.index);
229+
if (thinkingState) {
230+
thinkingState.thinking += event.delta.thinking;
231+
}
232+
233+
chunks.push(
234+
this.createChunk({
235+
choices: [
236+
{
237+
index: 0,
238+
delta: { reasoning: event.delta.thinking },
239+
logprobs: null,
240+
finish_reason: null,
241+
},
242+
],
243+
})
244+
);
245+
} else if (event.delta.type === "signature_delta") {
246+
// Accumulate signature for this thinking block
247+
const thinkingState = this.thinkingBlockState.get(event.index);
248+
if (thinkingState) {
249+
thinkingState.signature += event.delta.signature;
250+
}
221251
} else if (event.delta.type === "citations_delta") {
222252
// Collect citations - will be sent at the end in message_delta
223253
const citation = event.delta.citation;
@@ -287,6 +317,10 @@ export class AnthropicToOpenAIStreamConverter {
287317

288318
const finishReason = this.mapStopReason(event.delta.stop_reason);
289319

320+
// Collect reasoning_details from accumulated thinking blocks
321+
const reasoning_details = Array.from(this.thinkingBlockState.values())
322+
.filter(state => state.thinking || state.signature);
323+
290324
chunks.push(
291325
this.createChunk({
292326
choices: [
@@ -296,6 +330,9 @@ export class AnthropicToOpenAIStreamConverter {
296330
...(this.annotations.length > 0 && {
297331
annotations: this.annotations,
298332
}),
333+
...(reasoning_details.length > 0 && {
334+
reasoning_details,
335+
}),
299336
},
300337
logprobs: null,
301338
finish_reason: finishReason,

packages/llm-mapper/transform/providers/openai/request/toAnthropic.ts

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,24 @@ export function toAnthropic(
3131
stream: openAIBody.stream ?? undefined,
3232
};
3333

34+
if (openAIBody.reasoning_effort) {
35+
if (!openAIBody.reasoning_options) {
36+
throw new Error("reasoning_options are required for Anthropic models");
37+
}
38+
39+
if (!openAIBody.reasoning_options.budget_tokens) {
40+
throw new Error("reasoning_options.budget_tokens are required for Anthropic models");
41+
}
42+
43+
antBody.thinking = {
44+
type: "enabled",
45+
budget_tokens: openAIBody.reasoning_options.budget_tokens,
46+
};
47+
48+
// temperature 1 is required for Anthropic models to enable reasoning
49+
antBody.temperature = 1;
50+
}
51+
3452
if (openAIBody.stop) {
3553
antBody.stop_sequences = Array.isArray(openAIBody.stop)
3654
? openAIBody.stop
@@ -299,8 +317,8 @@ function extractSystemMessage(
299317
systemMessageBlocks.push({
300318
type: "text",
301319
text: convertedBlock,
302-
...(includeCache && (msg as any).cache_control
303-
? { cache_control: (msg as any).cache_control }
320+
...(includeCache && msg.cache_control
321+
? { cache_control: msg.cache_control }
304322
: {}),
305323
});
306324
} else {
@@ -335,8 +353,8 @@ function mapMessages(
335353
type: "tool_result",
336354
tool_use_id: message.tool_call_id,
337355
content: typeof message.content === "string" ? message.content : "",
338-
...(includeCache && (message as any).cache_control
339-
? { cache_control: (message as any).cache_control }
356+
...(includeCache && message.cache_control
357+
? { cache_control: message.cache_control }
340358
: {}),
341359
},
342360
],
@@ -364,8 +382,29 @@ function mapMessages(
364382
}
365383
});
366384
}
367-
385+
386+
const hasReasoningDetails = message.role === "assistant" && message.reasoning_details && message.reasoning_details?.length > 0;
387+
const hasReasoning = message.role === "assistant" && !!message.reasoning;
368388
let processedContent: string | AnthropicContentBlock[] = [];
389+
390+
// Thinking blocks MUST be first in Anthropic format to be valid
391+
if (message.reasoning_details && hasReasoningDetails) {
392+
// Use reasoning_details when available (includes signatures for multi-turn)
393+
for (const detail of message.reasoning_details) {
394+
processedContent.push({
395+
type: "thinking",
396+
thinking: detail.thinking,
397+
signature: detail.signature,
398+
});
399+
}
400+
} else if (hasReasoning) {
401+
// Fallback to simple reasoning string (no signature - may fail on multi-turn)
402+
processedContent.push({
403+
type: "thinking",
404+
thinking: message.reasoning,
405+
});
406+
}
407+
369408
if (message.content) {
370409
const convertedContent = openAIContentToAnthropicContent(
371410
message.content,
@@ -374,7 +413,7 @@ function mapMessages(
374413
if (typeof convertedContent === "string") {
375414
// if the message requires forming a content array
376415
const hasMsgCache = includeCache && !!(message as any).cache_control;
377-
if (hasMsgCache || processedToolCallContent.length > 0) {
416+
if (hasMsgCache || processedToolCallContent.length > 0 || hasReasoning) {
378417
processedContent.push({
379418
type: "text",
380419
text: convertedContent,
@@ -384,7 +423,7 @@ function mapMessages(
384423
});
385424
} else {
386425
// there was no cache control breakpoint, the content was just string,
387-
// and no tool calls, so we create a non-content array message.
426+
// no tool calls, and no reasoning, so we create a non-content array message.
388427
antMessage.content = convertedContent;
389428
return antMessage;
390429
}

packages/llm-mapper/transform/providers/responses/request/toChatCompletions.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import {
22
ResponsesRequestBody,
33
ResponsesInputItem,
44
ResponsesMessageInputItem,
5-
ResponsesInputContentPart,
65
} from "../../../types/responses";
76
import {
87
HeliconeChatCreateParams,
@@ -138,6 +137,11 @@ export function toChatCompletions(
138137
}
139138
}
140139

140+
let reasoning_effort: HeliconeChatCreateParams["reasoning_effort"] | undefined;
141+
if (body.reasoning) {
142+
reasoning_effort = body.reasoning.effort === "minimal" ? "low" : body.reasoning.effort;
143+
}
144+
141145
const heliconeBody: HeliconeChatCreateParams = {
142146
model: body.model,
143147
messages,
@@ -148,6 +152,8 @@ export function toChatCompletions(
148152
stream: body.stream,
149153
tools,
150154
tool_choice,
155+
reasoning_effort,
156+
reasoning_options: body.reasoning_options,
151157
frequency_penalty: body.frequency_penalty,
152158
presence_penalty: body.presence_penalty,
153159
logit_bias: body.logit_bias,
@@ -162,6 +168,7 @@ export function toChatCompletions(
162168
// Deprecated passthroughs (supported by Chat Completions clients)
163169
function_call: (body as any).function_call,
164170
functions: (body as any).functions,
171+
...(body.stream ? { stream_options: { include_usage: true } } : {}),
165172
} as HeliconeChatCreateParams;
166173

167174
return heliconeBody;

packages/llm-mapper/transform/types/anthropic.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,16 @@ export interface AnthropicRequestBody {
2626
stream?: boolean;
2727
tools?: (AnthropicTool | AnthropicWebSearchTool)[];
2828
tool_choice?: AnthropicToolChoice;
29+
thinking?: AnthropicThinkingConfig;
2930
}
3031

32+
export type AnthropicThinkingConfig = {
33+
type: "enabled";
34+
budget_tokens: number;
35+
} | {
36+
type: "disabled";
37+
};
38+
3139
export interface AnthropicMessage {
3240
role: "user" | "assistant";
3341
content: string | AnthropicContentBlock[];

packages/llm-mapper/transform/types/openai.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,16 @@ export interface OpenAIChoice {
2626
logprobs: null | OpenAILogProbs;
2727
}
2828

29+
export interface OpenAIReasoningDetail {
30+
thinking: string;
31+
signature: string;
32+
}
33+
2934
export interface OpenAIResponseMessage {
3035
role: "assistant" | "system" | "user" | "function" | "tool";
3136
content: string | null;
37+
reasoning?: string;
38+
reasoning_details?: OpenAIReasoningDetail[];
3239
function_call?: OpenAIFunctionCall;
3340
tool_calls?: OpenAIToolCall[];
3441
annotations?: OpenAIAnnotation[];
@@ -65,6 +72,8 @@ export interface OpenAIStreamChoice {
6572
export interface OpenAIDelta {
6673
role?: Role;
6774
content?: string;
75+
reasoning?: string;
76+
reasoning_details?: OpenAIReasoningDetail[];
6877
function_call?: {
6978
name?: string;
7079
arguments?: string;

packages/llm-mapper/transform/types/responses.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ export interface ResponsesRequestBody {
8585
previous_response_id?: string;
8686
reasoning?: {
8787
effort?: "low" | "medium" | "high" | "minimal";
88+
summary?: "auto" | "concise" | "detailed";
89+
generate_summary?: "auto" | "concise" | "detailed";
90+
};
91+
reasoning_options?: {
92+
budget_tokens?: number;
8893
};
8994
text?: {
9095
verbosity?: "low" | "medium" | "high";

packages/prompts/types.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,23 @@ export type HeliconeChatCompletionContentPart = (ChatCompletionContentPart | Cha
117117
cache_control?: CacheControl;
118118
};
119119

120+
/**
121+
* Reasoning detail for Anthropic thinking blocks with signatures.
122+
* Required for multi-turn conversations with thinking enabled.
123+
*/
124+
export interface ReasoningDetail {
125+
thinking: string;
126+
signature: string;
127+
}
128+
120129
/**
121130
* OpenAI message with optional cache control support
122131
*/
123132
type HeliconeMessageParam<T> = Omit<T, 'content'> & {
124133
content: string | HeliconeChatCompletionContentPart[] | null;
125134
cache_control?: CacheControl;
135+
reasoning?: string;
136+
reasoning_details?: ReasoningDetail[];
126137
};
127138

128139
export type HeliconeChatCompletionMessageParam =
@@ -177,6 +188,12 @@ export type HeliconePromptParams = {
177188
inputs?: Record<string, any>;
178189
};
179190

191+
export interface HeliconeReasoningOptions {
192+
reasoning_options?: {
193+
budget_tokens: number;
194+
}
195+
};
196+
180197
/**
181198
* OpenAI ChatCompletion parameters extended with Helicone prompt template support.
182199
* Use this type when creating non-streaming chat completions with Helicone prompts.
@@ -186,6 +203,12 @@ export type HeliconePromptParams = {
186203
* const response = await openai.chat.completions.create({
187204
* prompt_id: "123",
188205
* model: "gpt-4o",
206+
*
207+
* // Optional: only for reasoning models
208+
* reasoning_options: {
209+
* // For Anthropic models
210+
* budget_tokens: 1000,
211+
* },
189212
* messages: [
190213
* // Message-level cache control (string content)
191214
* {
@@ -217,7 +240,7 @@ export type HeliconePromptParams = {
217240
* ```
218241
*/
219242
export type HeliconeChatCreateParams = ChatCompletionCreateParamsNonStreamingPartialMessages &
220-
HeliconePromptParams;
243+
HeliconePromptParams & HeliconeReasoningOptions;
221244

222245
/**
223246
* OpenAI ChatCompletion parameters extended with Helicone prompt template support for streaming responses.

0 commit comments

Comments
 (0)