Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions src/models/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,45 +43,6 @@ impl ReasoningConfig {

Ok(())
}

// For OpenAI/Azure - Direct passthrough (but prioritize max_tokens over effort)
pub fn to_openai_effort(&self) -> Option<String> {
if self.max_tokens.is_some() {
// If max_tokens is specified, don't use effort for OpenAI
None
} else {
// Only return effort if it's not empty
self.effort
.as_ref()
.filter(|e| !e.trim().is_empty())
.cloned()
}
}

// For Vertex AI (Gemini) - Use max_tokens directly
pub fn to_gemini_thinking_budget(&self) -> Option<i32> {
self.max_tokens.map(|tokens| tokens as i32)
}

// For Anthropic/Bedrock - Custom prompt generation (prioritize max_tokens over effort)
pub fn to_thinking_prompt(&self) -> Option<String> {
if self.max_tokens.is_some() {
// If max_tokens is specified, use a generic thinking prompt
Some("Think through this step-by-step with detailed reasoning.".to_string())
} else {
match self.effort.as_deref() {
Some(effort) if !effort.trim().is_empty() => match effort {
"high" => {
Some("Think through this step-by-step with detailed reasoning.".to_string())
}
"medium" => Some("Consider this problem thoughtfully.".to_string()),
"low" => Some("Think about this briefly.".to_string()),
_ => None,
},
_ => None,
}
}
}
}

#[derive(Deserialize, Serialize, Clone, ToSchema)]
Expand Down
20 changes: 19 additions & 1 deletion src/providers/anthropic/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,25 @@ impl From<ChatCompletionRequest> for AnthropicChatCompletionRequest {

// Add reasoning prompt if reasoning is requested
if let Some(reasoning_config) = &request.reasoning {
if let Some(thinking_prompt) = reasoning_config.to_thinking_prompt() {
// Handle Anthropic reasoning logic inline
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we adding this? 🤔

let thinking_prompt = if reasoning_config.max_tokens.is_some() {
// If max_tokens is specified, use a generic thinking prompt
Some("Think through this step-by-step with detailed reasoning.".to_string())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inline reasoning logic now duplicates the prompt strings (e.g. 'Think through this step-by-step with detailed reasoning.'). Consider extracting these magic strings to constants or a shared function for maintainability.

} else {
match reasoning_config.effort.as_deref() {
Some(effort) if !effort.trim().is_empty() => match effort {
"high" => Some(
"Think through this step-by-step with detailed reasoning.".to_string(),
),
"medium" => Some("Consider this problem thoughtfully.".to_string()),
"low" => Some("Think about this briefly.".to_string()),
_ => None,
},
_ => None,
}
};

if let Some(thinking_prompt) = thinking_prompt {
system = Some(match system {
Some(existing) => format!("{}\n\n{}", existing, thinking_prompt),
None => thinking_prompt,
Expand Down
19 changes: 0 additions & 19 deletions src/providers/anthropic/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,25 +46,6 @@ impl Provider for AnthropicProvider {
tracing::error!("Invalid reasoning config: {}", e);
return Err(StatusCode::BAD_REQUEST);
}

if let Some(max_tokens) = reasoning.max_tokens {
info!(
"✅ Anthropic reasoning enabled with max_tokens: {}",
max_tokens
);
} else if let Some(thinking_prompt) = reasoning.to_thinking_prompt() {
info!(
"✅ Anthropic reasoning enabled with effort level: {:?} -> prompt: \"{}\"",
reasoning.effort,
thinking_prompt.chars().take(50).collect::<String>() + "..."
);
} else {
tracing::debug!(
"ℹ️ Anthropic reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
reasoning.effort,
reasoning.max_tokens
);
}
}

let request = AnthropicChatCompletionRequest::from(payload);
Expand Down
33 changes: 14 additions & 19 deletions src/providers/azure/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,20 @@ struct AzureChatCompletionRequest {

impl From<ChatCompletionRequest> for AzureChatCompletionRequest {
fn from(mut base: ChatCompletionRequest) -> Self {
let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort());
// Handle Azure reasoning effort logic inline (same as OpenAI)
let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| {
if reasoning.max_tokens.is_some() {
// If max_tokens is specified, don't use effort for Azure
None
} else {
// Only return effort if it's not empty
reasoning
.effort
.as_ref()
.filter(|e| !e.trim().is_empty())
.cloned()
}
});

// Remove reasoning field from base request since Azure uses reasoning_effort
base.reasoning = None;
Expand Down Expand Up @@ -85,24 +98,6 @@ impl Provider for AzureProvider {
tracing::error!("Invalid reasoning config: {}", e);
return Err(StatusCode::BAD_REQUEST);
}

if let Some(max_tokens) = reasoning.max_tokens {
info!(
"✅ Azure reasoning with max_tokens: {} (note: Azure uses effort levels, max_tokens ignored)",
max_tokens
);
} else if let Some(effort) = reasoning.to_openai_effort() {
info!(
"✅ Azure reasoning enabled with effort level: \"{}\"",
effort
);
} else {
tracing::debug!(
"ℹ️ Azure reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
reasoning.effort,
reasoning.max_tokens
);
}
}

let deployment = model_config.params.get("deployment").unwrap();
Expand Down
108 changes: 86 additions & 22 deletions src/providers/bedrock/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -708,35 +708,99 @@ mod arn_tests {

#[test]
fn test_reasoning_config_to_thinking_prompt() {
// Test effort-based prompts
let high_effort_config = crate::models::chat::ReasoningConfig {
effort: Some("high".to_string()),
max_tokens: None,
exclude: None,
};
assert!(high_effort_config.to_thinking_prompt().is_some());
use crate::models::chat::ChatCompletionRequest;
use crate::models::content::ChatCompletionMessage;
use crate::providers::anthropic::AnthropicChatCompletionRequest;

let medium_effort_config = crate::models::chat::ReasoningConfig {
effort: Some("medium".to_string()),
// Test effort-based prompts by converting through AnthropicChatCompletionRequest
let high_effort_request = ChatCompletionRequest {
model: "test".to_string(),
messages: vec![ChatCompletionMessage {
role: "user".to_string(),
content: Some(crate::models::content::ChatMessageContent::String(
"test".to_string(),
)),
name: None,
tool_calls: None,
refusal: None,
}],
reasoning: Some(crate::models::chat::ReasoningConfig {
effort: Some("high".to_string()),
max_tokens: None,
exclude: None,
}),
temperature: None,
top_p: None,
n: None,
stream: None,
stop: None,
max_tokens: None,
exclude: None,
max_completion_tokens: None,
parallel_tool_calls: None,
presence_penalty: None,
frequency_penalty: None,
logit_bias: None,
tool_choice: None,
tools: None,
user: None,
logprobs: None,
top_logprobs: None,
response_format: None,
};
assert!(medium_effort_config.to_thinking_prompt().is_some());

let low_effort_config = crate::models::chat::ReasoningConfig {
effort: Some("low".to_string()),
max_tokens: None,
exclude: None,
};
assert!(low_effort_config.to_thinking_prompt().is_some());
let anthropic_request = AnthropicChatCompletionRequest::from(high_effort_request);
assert!(anthropic_request.system.is_some());
assert!(
anthropic_request
.system
.unwrap()
.contains("Think through this step-by-step")
);

// Test max_tokens takes priority over effort
let max_tokens_config = crate::models::chat::ReasoningConfig {
effort: Some("high".to_string()),
max_tokens: Some(1000),
exclude: None,
let max_tokens_request = ChatCompletionRequest {
model: "test".to_string(),
messages: vec![ChatCompletionMessage {
role: "user".to_string(),
content: Some(crate::models::content::ChatMessageContent::String(
"test".to_string(),
)),
name: None,
tool_calls: None,
refusal: None,
}],
reasoning: Some(crate::models::chat::ReasoningConfig {
effort: Some("high".to_string()),
max_tokens: Some(1000),
exclude: None,
}),
temperature: None,
top_p: None,
n: None,
stream: None,
stop: None,
max_tokens: None,
max_completion_tokens: None,
parallel_tool_calls: None,
presence_penalty: None,
frequency_penalty: None,
logit_bias: None,
tool_choice: None,
tools: None,
user: None,
logprobs: None,
top_logprobs: None,
response_format: None,
};
assert!(max_tokens_config.to_thinking_prompt().is_some());

let anthropic_request = AnthropicChatCompletionRequest::from(max_tokens_request);
assert!(anthropic_request.system.is_some());
assert!(
anthropic_request
.system
.unwrap()
.contains("Think through this step-by-step")
);
}

#[tokio::test]
Expand Down
27 changes: 18 additions & 9 deletions src/providers/openai/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,25 @@ struct OpenAIChatCompletionRequest {

impl From<ChatCompletionRequest> for OpenAIChatCompletionRequest {
fn from(mut base: ChatCompletionRequest) -> Self {
let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort());

// Handle max_completion_tokens logic - use max_completion_tokens if provided and > 0,
// otherwise fall back to max_tokens
base.max_completion_tokens = match (base.max_completion_tokens, base.max_tokens) {
(Some(v), _) if v > 0 => Some(v),
(_, Some(v)) if v > 0 => Some(v),
_ => None,
};
// Handle OpenAI reasoning effort logic inline
let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| {
if reasoning.max_tokens.is_some() {
// If max_tokens is specified, don't use effort for OpenAI
None
} else {
// Only return effort if it's not empty
reasoning
.effort
.as_ref()
.filter(|e| !e.trim().is_empty())
.cloned()
}
});

// Convert max_tokens to max_completion_tokens if present
if base.max_tokens.is_some() && base.max_completion_tokens.is_none() {
base.max_completion_tokens = base.max_tokens;
}
base.max_tokens = None;

// Remove reasoning field from base request since OpenAI uses reasoning_effort
Expand Down
14 changes: 2 additions & 12 deletions src/providers/vertexai/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,10 +333,6 @@ impl GeminiSchema {

impl From<ChatCompletionRequest> for GeminiChatRequest {
fn from(req: ChatCompletionRequest) -> Self {
tracing::debug!(
"🔄 Converting ChatCompletionRequest to GeminiChatRequest, reasoning: {:?}",
req.reasoning
);
let system_instruction = req
.messages
.iter()
Expand Down Expand Up @@ -415,17 +411,15 @@ impl From<ChatCompletionRequest> for GeminiChatRequest {
.reasoning
.as_ref()
.and_then(|r| {
tracing::debug!("📝 Processing reasoning config for thinkingConfig: {:?}", r);
r.to_gemini_thinking_budget()
// Handle Gemini thinking budget logic inline
r.max_tokens.map(|tokens| tokens as i32)
})
.map(|budget| {
tracing::debug!("🎛️ Creating ThinkingConfig with budget: {} tokens", budget);
ThinkingConfig {
thinking_budget: Some(budget),
}
});

tracing::debug!("🔧 Final thinking_config: {:?}", thinking_config);

let generation_config = Some(GenerationConfig {
temperature: req.temperature,
Expand Down Expand Up @@ -470,10 +464,6 @@ impl From<ChatCompletionRequest> for GeminiChatRequest {
system_instruction,
};

tracing::debug!(
"📦 Created GeminiChatRequest with generation_config: {:?}",
result.generation_config
);
result
}
}
Expand Down
19 changes: 1 addition & 18 deletions src/providers/vertexai/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,27 +145,10 @@ impl Provider for VertexAIProvider {

// Validate reasoning config if present
if let Some(reasoning) = &payload.reasoning {
tracing::debug!("🧠 VertexAI processing reasoning config: {:?}", reasoning);

if let Err(e) = reasoning.validate() {
tracing::error!("VertexAI reasoning validation failed: {}", e);
tracing::error!("VertexAI reasoning validation failed: {}", e);
return Err(StatusCode::BAD_REQUEST);
}

if let Some(thinking_budget) = reasoning.to_gemini_thinking_budget() {
tracing::info!(
"✅ VertexAI reasoning enabled with thinking_budget: {} tokens",
thinking_budget
);
} else {
tracing::debug!(
"ℹ️ VertexAI reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
reasoning.effort,
reasoning.max_tokens
);
}
} else {
tracing::debug!("ℹ️ VertexAI no reasoning config provided");
}

let auth_token = self.get_auth_token().await?;
Expand Down
Loading