traceloop · galzilber · Sep 7, 2025 · Sep 7, 2025 · Sep 7, 2025 · Sep 7, 2025
diff --git a/src/models/chat.rs b/src/models/chat.rs
@@ -43,45 +43,6 @@ impl ReasoningConfig {
 
         Ok(())
     }
-
-    // For OpenAI/Azure - Direct passthrough (but prioritize max_tokens over effort)
-    pub fn to_openai_effort(&self) -> Option<String> {
-        if self.max_tokens.is_some() {
-            // If max_tokens is specified, don't use effort for OpenAI
-            None
-        } else {
-            // Only return effort if it's not empty
-            self.effort
-                .as_ref()
-                .filter(|e| !e.trim().is_empty())
-                .cloned()
-        }
-    }
-
-    // For Vertex AI (Gemini) - Use max_tokens directly
-    pub fn to_gemini_thinking_budget(&self) -> Option<i32> {
-        self.max_tokens.map(|tokens| tokens as i32)
-    }
-
-    // For Anthropic/Bedrock - Custom prompt generation (prioritize max_tokens over effort)
-    pub fn to_thinking_prompt(&self) -> Option<String> {
-        if self.max_tokens.is_some() {
-            // If max_tokens is specified, use a generic thinking prompt
-            Some("Think through this step-by-step with detailed reasoning.".to_string())
-        } else {
-            match self.effort.as_deref() {
-                Some(effort) if !effort.trim().is_empty() => match effort {
-                    "high" => {
-                        Some("Think through this step-by-step with detailed reasoning.".to_string())
-                    }
-                    "medium" => Some("Consider this problem thoughtfully.".to_string()),
-                    "low" => Some("Think about this briefly.".to_string()),
-                    _ => None,
-                },
-                _ => None,
-            }
-        }
-    }
 }
 
 #[derive(Deserialize, Serialize, Clone, ToSchema)]

diff --git a/src/providers/anthropic/models.rs b/src/providers/anthropic/models.rs
@@ -105,7 +105,25 @@ impl From<ChatCompletionRequest> for AnthropicChatCompletionRequest {
 
         // Add reasoning prompt if reasoning is requested
         if let Some(reasoning_config) = &request.reasoning {
-            if let Some(thinking_prompt) = reasoning_config.to_thinking_prompt() {
+            // Handle Anthropic reasoning logic inline
+            let thinking_prompt = if reasoning_config.max_tokens.is_some() {
+                // If max_tokens is specified, use a generic thinking prompt
+                Some("Think through this step-by-step with detailed reasoning.".to_string())
+            } else {
+                match reasoning_config.effort.as_deref() {
+                    Some(effort) if !effort.trim().is_empty() => match effort {
+                        "high" => Some(
+                            "Think through this step-by-step with detailed reasoning.".to_string(),
+                        ),
+                        "medium" => Some("Consider this problem thoughtfully.".to_string()),
+                        "low" => Some("Think about this briefly.".to_string()),
+                        _ => None,
+                    },
+                    _ => None,
+                }
+            };
+
+            if let Some(thinking_prompt) = thinking_prompt {
                 system = Some(match system {
                     Some(existing) => format!("{}\n\n{}", existing, thinking_prompt),
                     None => thinking_prompt,

diff --git a/src/providers/anthropic/provider.rs b/src/providers/anthropic/provider.rs
@@ -46,25 +46,6 @@ impl Provider for AnthropicProvider {
                 tracing::error!("Invalid reasoning config: {}", e);
                 return Err(StatusCode::BAD_REQUEST);
             }
-
-            if let Some(max_tokens) = reasoning.max_tokens {
-                info!(
-                    "✅ Anthropic reasoning enabled with max_tokens: {}",
-                    max_tokens
-                );
-            } else if let Some(thinking_prompt) = reasoning.to_thinking_prompt() {
-                info!(
-                    "✅ Anthropic reasoning enabled with effort level: {:?} -> prompt: \"{}\"",
-                    reasoning.effort,
-                    thinking_prompt.chars().take(50).collect::<String>() + "..."
-                );
-            } else {
-                tracing::debug!(
-                    "ℹ️ Anthropic reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
-                    reasoning.effort,
-                    reasoning.max_tokens
-                );
-            }
         }
 
         let request = AnthropicChatCompletionRequest::from(payload);

diff --git a/src/providers/azure/provider.rs b/src/providers/azure/provider.rs
@@ -24,7 +24,20 @@ struct AzureChatCompletionRequest {
 
 impl From<ChatCompletionRequest> for AzureChatCompletionRequest {
     fn from(mut base: ChatCompletionRequest) -> Self {
-        let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort());
+        // Handle Azure reasoning effort logic inline (same as OpenAI)
+        let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| {
+            if reasoning.max_tokens.is_some() {
+                // If max_tokens is specified, don't use effort for Azure
+                None
+            } else {
+                // Only return effort if it's not empty
+                reasoning
+                    .effort
+                    .as_ref()
+                    .filter(|e| !e.trim().is_empty())
+                    .cloned()
+            }
+        });
 
         // Remove reasoning field from base request since Azure uses reasoning_effort
         base.reasoning = None;
@@ -85,24 +98,6 @@ impl Provider for AzureProvider {
                 tracing::error!("Invalid reasoning config: {}", e);
                 return Err(StatusCode::BAD_REQUEST);
             }
-
-            if let Some(max_tokens) = reasoning.max_tokens {
-                info!(
-                    "✅ Azure reasoning with max_tokens: {} (note: Azure uses effort levels, max_tokens ignored)",
-                    max_tokens
-                );
-            } else if let Some(effort) = reasoning.to_openai_effort() {
-                info!(
-                    "✅ Azure reasoning enabled with effort level: \"{}\"",
-                    effort
-                );
-            } else {
-                tracing::debug!(
-                    "ℹ️ Azure reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
-                    reasoning.effort,
-                    reasoning.max_tokens
-                );
-            }
         }
 
         let deployment = model_config.params.get("deployment").unwrap();

diff --git a/src/providers/bedrock/test.rs b/src/providers/bedrock/test.rs
@@ -708,35 +708,99 @@ mod arn_tests {
 
     #[test]
     fn test_reasoning_config_to_thinking_prompt() {
-        // Test effort-based prompts
-        let high_effort_config = crate::models::chat::ReasoningConfig {
-            effort: Some("high".to_string()),
-            max_tokens: None,
-            exclude: None,
-        };
-        assert!(high_effort_config.to_thinking_prompt().is_some());
+        use crate::models::chat::ChatCompletionRequest;
+        use crate::models::content::ChatCompletionMessage;
+        use crate::providers::anthropic::AnthropicChatCompletionRequest;
 
-        let medium_effort_config = crate::models::chat::ReasoningConfig {
-            effort: Some("medium".to_string()),
+        // Test effort-based prompts by converting through AnthropicChatCompletionRequest
+        let high_effort_request = ChatCompletionRequest {
+            model: "test".to_string(),
+            messages: vec![ChatCompletionMessage {
+                role: "user".to_string(),
+                content: Some(crate::models::content::ChatMessageContent::String(
+                    "test".to_string(),
+                )),
+                name: None,
+                tool_calls: None,
+                refusal: None,
+            }],
+            reasoning: Some(crate::models::chat::ReasoningConfig {
+                effort: Some("high".to_string()),
+                max_tokens: None,
+                exclude: None,
+            }),
+            temperature: None,
+            top_p: None,
+            n: None,
+            stream: None,
+            stop: None,
             max_tokens: None,
-            exclude: None,
+            max_completion_tokens: None,
+            parallel_tool_calls: None,
+            presence_penalty: None,
+            frequency_penalty: None,
+            logit_bias: None,
+            tool_choice: None,
+            tools: None,
+            user: None,
+            logprobs: None,
+            top_logprobs: None,
+            response_format: None,
         };
-        assert!(medium_effort_config.to_thinking_prompt().is_some());
 
-        let low_effort_config = crate::models::chat::ReasoningConfig {
-            effort: Some("low".to_string()),
-            max_tokens: None,
-            exclude: None,
-        };
-        assert!(low_effort_config.to_thinking_prompt().is_some());
+        let anthropic_request = AnthropicChatCompletionRequest::from(high_effort_request);
+        assert!(anthropic_request.system.is_some());
+        assert!(
+            anthropic_request
+                .system
+                .unwrap()
+                .contains("Think through this step-by-step")
+        );
 
         // Test max_tokens takes priority over effort
-        let max_tokens_config = crate::models::chat::ReasoningConfig {
-            effort: Some("high".to_string()),
-            max_tokens: Some(1000),
-            exclude: None,
+        let max_tokens_request = ChatCompletionRequest {
+            model: "test".to_string(),
+            messages: vec![ChatCompletionMessage {
+                role: "user".to_string(),
+                content: Some(crate::models::content::ChatMessageContent::String(
+                    "test".to_string(),
+                )),
+                name: None,
+                tool_calls: None,
+                refusal: None,
+            }],
+            reasoning: Some(crate::models::chat::ReasoningConfig {
+                effort: Some("high".to_string()),
+                max_tokens: Some(1000),
+                exclude: None,
+            }),
+            temperature: None,
+            top_p: None,
+            n: None,
+            stream: None,
+            stop: None,
+            max_tokens: None,
+            max_completion_tokens: None,
+            parallel_tool_calls: None,
+            presence_penalty: None,
+            frequency_penalty: None,
+            logit_bias: None,
+            tool_choice: None,
+            tools: None,
+            user: None,
+            logprobs: None,
+            top_logprobs: None,
+            response_format: None,
         };
-        assert!(max_tokens_config.to_thinking_prompt().is_some());
+
+        let anthropic_request = AnthropicChatCompletionRequest::from(max_tokens_request);
+        assert!(anthropic_request.system.is_some());
+        assert!(
+            anthropic_request
+                .system
+                .unwrap()
+                .contains("Think through this step-by-step")
+        );
     }
 
     #[tokio::test]

diff --git a/src/providers/openai/provider.rs b/src/providers/openai/provider.rs
@@ -23,16 +23,25 @@ struct OpenAIChatCompletionRequest {
 
 impl From<ChatCompletionRequest> for OpenAIChatCompletionRequest {
     fn from(mut base: ChatCompletionRequest) -> Self {
-        let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort());
-
-        // Handle max_completion_tokens logic - use max_completion_tokens if provided and > 0,
-        // otherwise fall back to max_tokens
-        base.max_completion_tokens = match (base.max_completion_tokens, base.max_tokens) {
-            (Some(v), _) if v > 0 => Some(v),
-            (_, Some(v)) if v > 0 => Some(v),
-            _ => None,
-        };
+        // Handle OpenAI reasoning effort logic inline
+        let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| {
+            if reasoning.max_tokens.is_some() {
+                // If max_tokens is specified, don't use effort for OpenAI
+                None
+            } else {
+                // Only return effort if it's not empty
+                reasoning
+                    .effort
+                    .as_ref()
+                    .filter(|e| !e.trim().is_empty())
+                    .cloned()
+            }
+        });
 
+        // Convert max_tokens to max_completion_tokens if present
+        if base.max_tokens.is_some() && base.max_completion_tokens.is_none() {
+            base.max_completion_tokens = base.max_tokens;
+        }
         base.max_tokens = None;
 
         // Remove reasoning field from base request since OpenAI uses reasoning_effort

diff --git a/src/providers/vertexai/models.rs b/src/providers/vertexai/models.rs
@@ -333,10 +333,6 @@ impl GeminiSchema {
 
 impl From<ChatCompletionRequest> for GeminiChatRequest {
     fn from(req: ChatCompletionRequest) -> Self {
-        tracing::debug!(
-            "🔄 Converting ChatCompletionRequest to GeminiChatRequest, reasoning: {:?}",
-            req.reasoning
-        );
         let system_instruction = req
             .messages
             .iter()
@@ -415,17 +411,15 @@ impl From<ChatCompletionRequest> for GeminiChatRequest {
             .reasoning
             .as_ref()
             .and_then(|r| {
-                tracing::debug!("📝 Processing reasoning config for thinkingConfig: {:?}", r);
-                r.to_gemini_thinking_budget()
+                // Handle Gemini thinking budget logic inline
+                r.max_tokens.map(|tokens| tokens as i32)
             })
             .map(|budget| {
-                tracing::debug!("🎛️ Creating ThinkingConfig with budget: {} tokens", budget);
                 ThinkingConfig {
                     thinking_budget: Some(budget),
                 }
             });
 
-        tracing::debug!("🔧 Final thinking_config: {:?}", thinking_config);
 
         let generation_config = Some(GenerationConfig {
             temperature: req.temperature,
@@ -470,10 +464,6 @@ impl From<ChatCompletionRequest> for GeminiChatRequest {
             system_instruction,
         };
 
-        tracing::debug!(
-            "📦 Created GeminiChatRequest with generation_config: {:?}",
-            result.generation_config
-        );
         result
     }
 }

diff --git a/src/providers/vertexai/provider.rs b/src/providers/vertexai/provider.rs
@@ -145,27 +145,10 @@ impl Provider for VertexAIProvider {
 
         // Validate reasoning config if present
         if let Some(reasoning) = &payload.reasoning {
-            tracing::debug!("🧠 VertexAI processing reasoning config: {:?}", reasoning);
-
             if let Err(e) = reasoning.validate() {
-                tracing::error!("❌ VertexAI reasoning validation failed: {}", e);
+                tracing::error!("VertexAI reasoning validation failed: {}", e);
                 return Err(StatusCode::BAD_REQUEST);
             }
-
-            if let Some(thinking_budget) = reasoning.to_gemini_thinking_budget() {
-                tracing::info!(
-                    "✅ VertexAI reasoning enabled with thinking_budget: {} tokens",
-                    thinking_budget
-                );
-            } else {
-                tracing::debug!(
-                    "ℹ️ VertexAI reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
-                    reasoning.effort,
-                    reasoning.max_tokens
-                );
-            }
-        } else {
-            tracing::debug!("ℹ️ VertexAI no reasoning config provided");
         }
 
         let auth_token = self.get_auth_token().await?;