@@ -12,6 +12,76 @@ use super::tool_choice::ToolChoice;
1212use super :: tool_definition:: ToolDefinition ;
1313use super :: usage:: Usage ;
1414
15+ #[ derive( Debug , Serialize , Deserialize , Clone , ToSchema ) ]
16+ pub struct ReasoningConfig {
17+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
18+ pub effort : Option < String > , // "low" | "medium" | "high"
19+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
20+ pub max_tokens : Option < u32 > , // Alternative to effort
21+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
22+ pub exclude : Option < bool > , // Whether to exclude from response (default: false)
23+ }
24+
25+ impl ReasoningConfig {
26+ pub fn validate ( & self ) -> Result < ( ) , String > {
27+ if self . effort . is_some ( ) && self . max_tokens . is_some ( ) {
28+ tracing:: warn!( "Both effort and max_tokens specified - prioritizing max_tokens" ) ;
29+ }
30+
31+ // Only validate effort if max_tokens is not present (since max_tokens takes priority)
32+ if let Some ( effort) = & self . effort {
33+ if effort. trim ( ) . is_empty ( ) {
34+ return Err ( "Effort cannot be empty string" . to_string ( ) ) ;
35+ } else if self . max_tokens . is_none ( )
36+ && ![ "low" , "medium" , "high" ] . contains ( & effort. as_str ( ) )
37+ {
38+ return Err ( "Invalid effort value. Must be 'low', 'medium', or 'high'" . to_string ( ) ) ;
39+ }
40+ }
41+
42+ Ok ( ( ) )
43+ }
44+
45+ // For OpenAI/Azure - Direct passthrough (but prioritize max_tokens over effort)
46+ pub fn to_openai_effort ( & self ) -> Option < String > {
47+ if self . max_tokens . is_some ( ) {
48+ // If max_tokens is specified, don't use effort for OpenAI
49+ None
50+ } else {
51+ // Only return effort if it's not empty
52+ self . effort
53+ . as_ref ( )
54+ . filter ( |e| !e. trim ( ) . is_empty ( ) )
55+ . cloned ( )
56+ }
57+ }
58+
59+ // For Vertex AI (Gemini) - Use max_tokens directly
60+ pub fn to_gemini_thinking_budget ( & self ) -> Option < i32 > {
61+ self . max_tokens . map ( |tokens| tokens as i32 )
62+ }
63+
64+ // For Anthropic/Bedrock - Custom prompt generation (prioritize max_tokens over effort)
65+ pub fn to_thinking_prompt ( & self ) -> Option < String > {
66+ if self . max_tokens . is_some ( ) {
67+ // If max_tokens is specified, use a generic thinking prompt
68+ Some ( "Think through this step-by-step with detailed reasoning." . to_string ( ) )
69+ } else {
70+ match self . effort . as_deref ( ) {
71+ Some ( effort) if !effort. trim ( ) . is_empty ( ) => match effort {
72+ "high" => {
73+ Some ( "Think through this step-by-step with detailed reasoning." . to_string ( ) )
74+ }
75+ "medium" => Some ( "Consider this problem thoughtfully." . to_string ( ) ) ,
76+ "low" => Some ( "Think about this briefly." . to_string ( ) ) ,
77+ _ => None ,
78+ } ,
79+ _ => None ,
80+ }
81+ }
82+ }
83+ }
84+
1585#[ derive( Deserialize , Serialize , Clone , ToSchema ) ]
1686pub struct ChatCompletionRequest {
1787 pub model : String ,
@@ -50,6 +120,8 @@ pub struct ChatCompletionRequest {
50120 pub top_logprobs : Option < u32 > ,
51121 #[ serde( skip_serializing_if = "Option::is_none" ) ]
52122 pub response_format : Option < ResponseFormat > ,
123+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
124+ pub reasoning : Option < ReasoningConfig > ,
53125}
54126
55127// Note: ChatCompletionResponse cannot derive ToSchema due to BoxStream
0 commit comments