traceloop
diff --git a/‎src/models/chat.rs‎
Lines changed: 72 additions & 0 deletions b/‎src/models/chat.rs‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎src/models/streaming.rs‎
Lines changed: 2 additions & 0 deletions b/‎src/models/streaming.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/providers/anthropic/models.rs‎
Lines changed: 11 additions & 1 deletion b/‎src/providers/anthropic/models.rs‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎src/providers/anthropic/provider.rs‎
Lines changed: 28 additions & 0 deletions b/‎src/providers/anthropic/provider.rs‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎src/providers/azure/provider.rs‎
Lines changed: 53 additions & 1 deletion b/‎src/providers/azure/provider.rs‎
Lines changed: 53 additions & 1 deletion
@@ -12,6 +12,76 @@ use super::tool_choice::ToolChoice;
 use super::tool_definition::ToolDefinition;
 use super::usage::Usage;
 
+#[derive(Debug, Serialize, Deserialize, Clone, ToSchema)]
+pub struct ReasoningConfig {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<String>, // "low" | "medium" | "high"
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>, // Alternative to effort
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exclude: Option<bool>, // Whether to exclude from response (default: false)
+}
+
+impl ReasoningConfig {
+    pub fn validate(&self) -> Result<(), String> {
+        if self.effort.is_some() && self.max_tokens.is_some() {
+            tracing::warn!("Both effort and max_tokens specified - prioritizing max_tokens");
+        }
+
+        // Only validate effort if max_tokens is not present (since max_tokens takes priority)
+        if let Some(effort) = &self.effort {
+            if effort.trim().is_empty() {
+                return Err("Effort cannot be empty string".to_string());
+            } else if self.max_tokens.is_none()
+                && !["low", "medium", "high"].contains(&effort.as_str())
+            {
+                return Err("Invalid effort value. Must be 'low', 'medium', or 'high'".to_string());
+            }
+        }
+
+        Ok(())
+    }
+
+    // For OpenAI/Azure - Direct passthrough (but prioritize max_tokens over effort)
+    pub fn to_openai_effort(&self) -> Option<String> {
+        if self.max_tokens.is_some() {
+            // If max_tokens is specified, don't use effort for OpenAI
+            None
+        } else {
+            // Only return effort if it's not empty
+            self.effort
+                .as_ref()
+                .filter(|e| !e.trim().is_empty())
+                .cloned()
+        }
+    }
+
+    // For Vertex AI (Gemini) - Use max_tokens directly
+    pub fn to_gemini_thinking_budget(&self) -> Option<i32> {
+        self.max_tokens.map(|tokens| tokens as i32)
+    }
+
+    // For Anthropic/Bedrock - Custom prompt generation (prioritize max_tokens over effort)
+    pub fn to_thinking_prompt(&self) -> Option<String> {
+        if self.max_tokens.is_some() {
+            // If max_tokens is specified, use a generic thinking prompt
+            Some("Think through this step-by-step with detailed reasoning.".to_string())
+        } else {
+            match self.effort.as_deref() {
+                Some(effort) if !effort.trim().is_empty() => match effort {
+                    "high" => {
+                        Some("Think through this step-by-step with detailed reasoning.".to_string())
+                    }
+                    "medium" => Some("Consider this problem thoughtfully.".to_string()),
+                    "low" => Some("Think about this briefly.".to_string()),
+                    _ => None,
+                },
+                _ => None,
+            }
+        }
+    }
+}
+
 #[derive(Deserialize, Serialize, Clone, ToSchema)]
 pub struct ChatCompletionRequest {
     pub model: String,
@@ -50,6 +120,8 @@ pub struct ChatCompletionRequest {
     pub top_logprobs: Option<u32>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub response_format: Option<ResponseFormat>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
 }
 
 // Note: ChatCompletionResponse cannot derive ToSchema due to BoxStream
 
@@ -21,6 +21,8 @@ pub struct ChoiceDelta {
     pub role: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_calls: Option<Vec<ChatMessageToolCall>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<String>,
 }
 
 #[derive(Deserialize, Serialize, Clone, Debug, ToSchema)]
 
@@ -90,7 +90,7 @@ impl From<ChatCompletionRequest> for AnthropicChatCompletionRequest {
             ))
         );
 
-        let system = request
+        let mut system = request
             .messages
             .iter()
             .find(|msg| msg.role == "system")
@@ -103,6 +103,16 @@ impl From<ChatCompletionRequest> for AnthropicChatCompletionRequest {
                 _ => None,
             });
 
+        // Add reasoning prompt if reasoning is requested
+        if let Some(reasoning_config) = &request.reasoning {
+            if let Some(thinking_prompt) = reasoning_config.to_thinking_prompt() {
+                system = Some(match system {
+                    Some(existing) => format!("{}\n\n{}", existing, thinking_prompt),
+                    None => thinking_prompt,
+                });
+            }
+        }
+
         let messages: Vec<ChatCompletionMessage> = request
             .messages
             .into_iter()
 
@@ -1,6 +1,7 @@
 use async_trait::async_trait;
 use axum::http::StatusCode;
 use reqwest::Client;
+use tracing::info;
 
 use super::models::{AnthropicChatCompletionRequest, AnthropicChatCompletionResponse};
 use crate::config::models::{ModelConfig, Provider as ProviderConfig};
@@ -38,6 +39,33 @@ impl Provider for AnthropicProvider {
         payload: ChatCompletionRequest,
         _model_config: &ModelConfig,
     ) -> Result<ChatCompletionResponse, StatusCode> {
+        // Validate reasoning config if present
+        if let Some(reasoning) = &payload.reasoning {
+            if let Err(e) = reasoning.validate() {
+                tracing::error!("Invalid reasoning config: {}", e);
+                return Err(StatusCode::BAD_REQUEST);
+            }
+
+            if let Some(max_tokens) = reasoning.max_tokens {
+                info!(
+                    "✅ Anthropic reasoning enabled with max_tokens: {}",
+                    max_tokens
+                );
+            } else if let Some(thinking_prompt) = reasoning.to_thinking_prompt() {
+                info!(
+                    "✅ Anthropic reasoning enabled with effort level: {:?} -> prompt: \"{}\"",
+                    reasoning.effort,
+                    thinking_prompt.chars().take(50).collect::<String>() + "..."
+                );
+            } else {
+                tracing::debug!(
+                    "ℹ️ Anthropic reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
+                    reasoning.effort,
+                    reasoning.max_tokens
+                );
+            }
+        }
+
         let request = AnthropicChatCompletionRequest::from(payload);
         let response = self
             .http_client
 
@@ -1,6 +1,7 @@
 use async_trait::async_trait;
 use axum::http::StatusCode;
 use reqwest_streams::JsonStreamResponse;
+use serde::{Deserialize, Serialize};
 
 use crate::config::constants::stream_buffer_size_bytes;
 use crate::config::models::{ModelConfig, Provider as ProviderConfig};
@@ -12,6 +13,28 @@ use crate::providers::provider::Provider;
 use reqwest::Client;
 use tracing::info;
 
+#[derive(Serialize, Deserialize, Clone)]
+struct AzureChatCompletionRequest {
+    #[serde(flatten)]
+    base: ChatCompletionRequest,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_effort: Option<String>,
+}
+
+impl From<ChatCompletionRequest> for AzureChatCompletionRequest {
+    fn from(mut base: ChatCompletionRequest) -> Self {
+        let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort());
+
+        // Remove reasoning field from base request since Azure uses reasoning_effort
+        base.reasoning = None;
+
+        Self {
+            base,
+            reasoning_effort,
+        }
+    }
+}
+
 pub struct AzureProvider {
     config: ProviderConfig,
     http_client: Client,
@@ -55,6 +78,32 @@ impl Provider for AzureProvider {
         payload: ChatCompletionRequest,
         model_config: &ModelConfig,
     ) -> Result<ChatCompletionResponse, StatusCode> {
+        // Validate reasoning config if present
+        if let Some(reasoning) = &payload.reasoning {
+            if let Err(e) = reasoning.validate() {
+                tracing::error!("Invalid reasoning config: {}", e);
+                return Err(StatusCode::BAD_REQUEST);
+            }
+
+            if let Some(max_tokens) = reasoning.max_tokens {
+                info!(
+                    "✅ Azure reasoning with max_tokens: {} (note: Azure uses effort levels, max_tokens ignored)",
+                    max_tokens
+                );
+            } else if let Some(effort) = reasoning.to_openai_effort() {
+                info!(
+                    "✅ Azure reasoning enabled with effort level: \"{}\"",
+                    effort
+                );
+            } else {
+                tracing::debug!(
+                    "ℹ️ Azure reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
+                    reasoning.effort,
+                    reasoning.max_tokens
+                );
+            }
+        }
+
         let deployment = model_config.params.get("deployment").unwrap();
         let api_version = self.api_version();
         let url = format!(
@@ -64,11 +113,14 @@ impl Provider for AzureProvider {
             api_version
         );
 
+        // Convert to Azure-specific request format
+        let azure_request = AzureChatCompletionRequest::from(payload.clone());
+
         let response = self
             .http_client
             .post(&url)
             .header("api-key", &self.config.api_key)
-            .json(&payload)
+            .json(&azure_request)
             .send()
             .await
             .map_err(|e| {
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,8 @@ pub struct ChoiceDelta {`
`21`	`21`	`pub role: Option<String>,`
`22`	`22`	`#[serde(skip_serializing_if = "Option::is_none")]`
`23`	`23`	`pub tool_calls: Option<Vec<ChatMessageToolCall>>,`
	`24`	`+ #[serde(skip_serializing_if = "Option::is_none")]`
	`25`	`+ pub reasoning: Option<String>,`
`24`	`26`	`}`
`25`	`27`
`26`	`28`	`#[derive(Deserialize, Serialize, Clone, Debug, ToSchema)]`