diff --git a/crates/spark-server/src/tokenizer.rs b/crates/spark-server/src/tokenizer.rs index dc93d9ba..dbb35404 100644 --- a/crates/spark-server/src/tokenizer.rs +++ b/crates/spark-server/src/tokenizer.rs @@ -105,6 +105,40 @@ mod tests { use super::*; use serde_json::json; + fn render_minimax_openai_template( + messages: &[serde_json::Value], + tools: Option<&[serde_json::Value]>, + enable_thinking: bool, + ) -> String { + let template_path = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../jinja-templates/openai/minimax_m2.jinja" + ); + let raw = std::fs::read_to_string(template_path) + .expect("bundled MiniMax OpenAI template must be present in the repo"); + let converted = super::jinja_helpers::convert_python_jinja_to_minijinja(&raw); + let env = super::jinja_helpers::build_jinja_env(&converted).expect("template compiles"); + let tmpl = env.get_template("chat").unwrap(); + let messages_for_render = normalize_tool_call_arguments(messages); + let messages_val = minijinja::Value::from_serialize(&messages_for_render); + let tools_val = tools.map(minijinja::Value::from_serialize); + let reasoning_effort: minijinja::Value = if enable_thinking { + "high".into() + } else { + "none".into() + }; + let ctx = minijinja::context! { + messages => messages_val, + tools => tools_val.unwrap_or(minijinja::Value::UNDEFINED), + add_generation_prompt => true, + enable_thinking => enable_thinking, + reasoning_effort => reasoning_effort, + disable_tool_steering => false, + add_vision_id => false, + }; + tmpl.render(ctx).expect("template renders") + } + #[test] fn normalize_tool_call_arguments_parses_string_to_dict() { // The shape opencode sends back on the second turn: assistant @@ -217,6 +251,66 @@ mod tests { ); } + #[test] + fn render_minimax_openai_template_closes_think_prompt_when_disabled() { + let messages = vec![json!({"role": "user", "content": "Reply with exactly: OK"})]; + let rendered = render_minimax_openai_template(&messages, None, false); + assert!( + rendered.ends_with("]~b]ai\n\n\n\n\n"), + "expected closed-thinking assistant generation prompt: {rendered}" + ); + let generation_tail = rendered + .rsplit_once("]~b]ai\n") + .map(|(_, tail)| tail) + .expect("assistant generation prompt is present"); + assert_eq!( + generation_tail, "\n\n\n\n", + "disabled thinking must not leave the model inside : {rendered}" + ); + } + + #[test] + fn render_minimax_openai_template_opens_think_prompt_when_enabled() { + let messages = vec![json!({"role": "user", "content": "Think before answering"})]; + let rendered = render_minimax_openai_template(&messages, None, true); + assert!( + rendered.ends_with("]~b]ai\n\n"), + "expected thinking assistant generation prompt: {rendered}" + ); + } + + #[test] + fn render_minimax_openai_template_omits_think_prompt_with_tools_when_disabled() { + let messages = vec![json!({"role": "user", "content": "List the current directory"})]; + let tools = vec![json!({ + "type": "function", + "function": { + "name": "shell", + "description": "Run a shell command", + "parameters": { + "type": "object", + "properties": { + "command": {"type": "string"} + }, + "required": ["command"] + } + } + })]; + let rendered = render_minimax_openai_template(&messages, Some(&tools), false); + assert!( + rendered.contains(""), + "expected tool schema block in render: {rendered}" + ); + assert!( + rendered.contains(""), + "expected MiniMax tool-call instructions in render: {rendered}" + ); + assert!( + rendered.ends_with("]~b]ai\n\n\n\n\n"), + "tool-active disabled-thinking requests must use a closed-thinking assistant prompt: {rendered}" + ); + } + #[test] fn normalize_tool_call_arguments_invalid_json_string_left_alone() { // If args is a string but not valid JSON, leave as-is so the diff --git a/jinja-templates/openai/minimax_m2.jinja b/jinja-templates/openai/minimax_m2.jinja new file mode 100644 index 00000000..744093cd --- /dev/null +++ b/jinja-templates/openai/minimax_m2.jinja @@ -0,0 +1,164 @@ +{# ---------- special token variables ---------- #} +{%- set toolcall_begin_token = '' -%} +{%- set toolcall_end_token = '' -%} +{#- Tool Rendering Functions ============================================== -#} +{%- macro render_tool_namespace(namespace_name, tool_list) -%} +{%- for tool in tool_list -%} +{{ tool.function | tojson(ensure_ascii=False) }} +{% endfor -%} +{%- endmacro -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{ content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{#- System Message Construction ============================================ -#} +{%- macro build_system_message(system_message) -%} + {%- if system_message and system_message.content -%} + {{- visible_text(system_message.content) }} + {%- else -%} + {%- if model_identity is not defined -%} + {%- set model_identity = "You are a helpful assistant. Your name is MiniMax-M2.7 and is built by MiniMax." -%} + {%- endif -%} + {{- model_identity }} + {%- endif -%} + + {#- Handle current_date -#} + {%- if system_message and system_message.current_date -%} + {{- '\n' ~ 'Current date: ' + system_message.current_date }} + {%- endif -%} + {#- Handle current_location -#} + {%- if system_message and system_message.current_location -%} + {{- '\n' ~ 'Current location: ' + system_message.current_location }} + {%- endif -%} +{%- endmacro -%} +{#- Main Template Logic ================================================= -#} +{#- Extract system message (only first message if it's system) -#} +{%- set system_message = none -%} +{%- set conversation_messages = messages -%} +{%- if messages and messages[0].role == "system" -%} + {%- set system_message = messages[0] -%} + {%- set conversation_messages = messages[1:] -%} +{%- endif -%} +{#- Get the last user message turn, for interleaved thinking -#} +{%- set ns = namespace(last_user_index=-1) %} +{% for m in conversation_messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{#- Render system message -#} +{{- ']~!b[' ~ ']~b]system' ~ '\n' }} +{{- build_system_message(system_message) }} +{#- Render tools if available -#} +{%- if tools -%} + {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }} + {{- '\n' ~ '' ~ '\n' }} + {{- render_tool_namespace("functions", tools) }} + {{- '' ~ '\n\n' }} +{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }} +{{- '\n' ~ toolcall_begin_token }} + +param-value-1 +param-value-2 +... + +{{- '\n' ~ toolcall_end_token }} +{%- endif -%} +{{- '[e~[\n' }} + +{#- Render messages -#} +{%- set last_tool_call = namespace(name=none) -%} +{%- for message in conversation_messages -%} + {%- if message.role == 'assistant' -%} + {#- Only render reasoning_content if no user message follows -#} + {{- ']~b]ai' ~ '\n' }} + + {%- set reasoning_content = '' %} + {%- set content = visible_text(message.content) %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].strip('\n').split('')[-1].strip('\n') %} + {%- set content = content.split('')[-1].strip('\n') %} + {%- endif %} + {%- endif %} + {%- if reasoning_content and loop.index0 > ns.last_user_index -%} + {{- '' ~ '\n' ~ reasoning_content ~ '\n' ~ '' ~ '\n\n' }} + {%- endif -%} + {%- if content -%} + {{- content }} + {%- endif -%} + {%- if message.tool_calls -%} + {{- '\n' ~ toolcall_begin_token ~ '\n' }} + + {%- for tool_call in message.tool_calls -%} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '' }} + {% set _args = tool_call.arguments %} + {%- for k, v in _args.items() %} + {{- '' }} + {{- v | tojson(ensure_ascii=False) if v is not string else v }} + {{- '' }} + {% endfor %} + {{- '' ~ '\n' }} + {%- endfor -%} + + {{- toolcall_end_token}} + {%- set last_tool_call.name = message.tool_calls[-1].name -%} + {%- else -%} + {%- set last_tool_call.name = none -%} + {%- endif -%} + {{- '[e~[' ~ '\n' }} + + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none -%} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif -%} + {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%} + {{- ']~b]tool' }} + {%- endif -%} + {%- if message.content is string -%} + {{- '\n' }} + {{- message.content }} + {{- '' }} + {%- else -%} + {%- for tr in message.content -%} + {{- '\n' }} + {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }} + {{- '\n' }} + {%- endfor -%} + {%- endif -%} + {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%} + {{- '[e~[\n' -}} + {%- endif -%} + + {%- elif message.role == 'user' -%} + {{- ']~b]user' ~ '\n' }} + {{- visible_text(message.content) }} + {{- '[e~[' ~ '\n' }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt -#} +{%- if add_generation_prompt -%} +{{- ']~b]ai' ~ '\n' }} +{%- if enable_thinking is defined and enable_thinking is true -%} +{{- '' ~ '\n' }} +{%- else -%} +{{- '' ~ '\n\n' ~ '' ~ '\n\n' }} +{%- endif -%} +{%- endif -%} diff --git a/kernels/gb10/minimax-m2-229b/MODEL.toml b/kernels/gb10/minimax-m2-229b/MODEL.toml index 601c4236..b954fe38 100644 --- a/kernels/gb10/minimax-m2-229b/MODEL.toml +++ b/kernels/gb10/minimax-m2-229b/MODEL.toml @@ -55,20 +55,15 @@ top_p = 0.95 top_k = 20 [behavior] -# MiniMax M2's chat_template.jinja appends `\n` unconditionally -# at `add_generation_prompt` (line 164). With `thinking_in_tools=false` -# the reasoning parser strips `...` from `content`, but -# the model still spends tokens inside `` before emitting either -# (a) a tool call (`...`) or (b) free-form code/text. -# `max_thinking_budget` caps that spontaneous-thinking phase: when the -# scheduler detects `` without an explicit per-request budget, -# it uses this number as the cap. Hit the cap → `` is force- -# emitted → real content begins. 1024 tokens at ~17 tok/s ≈ 60 s of -# reasoning headroom: long enough for fib's chain-of-thought trace, -# short enough that Search emits its tool call inside the test timeout. +# MiniMax M2's native HF chat_template.jinja appends `\n` +# unconditionally at `add_generation_prompt`. Atlas uses an OpenAI- +# variant template that gates this generation prompt on the resolved +# per-request thinking setting. `max_thinking_budget` still caps explicit +# or spontaneous thinking: when the scheduler detects `` without +# an explicit per-request budget, it uses this number as the cap. Hit +# the cap -> `` is force-emitted -> real content begins. # `thinking_default = false` makes the OpenAI-style API treat -# `extra_body.thinking.budget_tokens` as opt-in (matches MiniMax's -# `thinking_in_tools=false` story for tool-active requests). +# `extra_body.thinking.budget_tokens` as opt-in. thinking_in_tools = false max_thinking_budget = 1024 thinking_default = false