diff --git a/crates/spark-server/src/tokenizer.rs b/crates/spark-server/src/tokenizer.rs
index dc93d9ba..dbb35404 100644
--- a/crates/spark-server/src/tokenizer.rs
+++ b/crates/spark-server/src/tokenizer.rs
@@ -105,6 +105,40 @@ mod tests {
use super::*;
use serde_json::json;
+ fn render_minimax_openai_template(
+ messages: &[serde_json::Value],
+ tools: Option<&[serde_json::Value]>,
+ enable_thinking: bool,
+ ) -> String {
+ let template_path = concat!(
+ env!("CARGO_MANIFEST_DIR"),
+ "/../../jinja-templates/openai/minimax_m2.jinja"
+ );
+ let raw = std::fs::read_to_string(template_path)
+ .expect("bundled MiniMax OpenAI template must be present in the repo");
+ let converted = super::jinja_helpers::convert_python_jinja_to_minijinja(&raw);
+ let env = super::jinja_helpers::build_jinja_env(&converted).expect("template compiles");
+ let tmpl = env.get_template("chat").unwrap();
+ let messages_for_render = normalize_tool_call_arguments(messages);
+ let messages_val = minijinja::Value::from_serialize(&messages_for_render);
+ let tools_val = tools.map(minijinja::Value::from_serialize);
+ let reasoning_effort: minijinja::Value = if enable_thinking {
+ "high".into()
+ } else {
+ "none".into()
+ };
+ let ctx = minijinja::context! {
+ messages => messages_val,
+ tools => tools_val.unwrap_or(minijinja::Value::UNDEFINED),
+ add_generation_prompt => true,
+ enable_thinking => enable_thinking,
+ reasoning_effort => reasoning_effort,
+ disable_tool_steering => false,
+ add_vision_id => false,
+ };
+ tmpl.render(ctx).expect("template renders")
+ }
+
#[test]
fn normalize_tool_call_arguments_parses_string_to_dict() {
// The shape opencode sends back on the second turn: assistant
@@ -217,6 +251,66 @@ mod tests {
);
}
+ #[test]
+ fn render_minimax_openai_template_closes_think_prompt_when_disabled() {
+ let messages = vec![json!({"role": "user", "content": "Reply with exactly: OK"})];
+ let rendered = render_minimax_openai_template(&messages, None, false);
+ assert!(
+ rendered.ends_with("]~b]ai\n\n\n\n\n"),
+ "expected closed-thinking assistant generation prompt: {rendered}"
+ );
+ let generation_tail = rendered
+ .rsplit_once("]~b]ai\n")
+ .map(|(_, tail)| tail)
+ .expect("assistant generation prompt is present");
+ assert_eq!(
+ generation_tail, "\n\n\n\n",
+ "disabled thinking must not leave the model inside : {rendered}"
+ );
+ }
+
+ #[test]
+ fn render_minimax_openai_template_opens_think_prompt_when_enabled() {
+ let messages = vec![json!({"role": "user", "content": "Think before answering"})];
+ let rendered = render_minimax_openai_template(&messages, None, true);
+ assert!(
+ rendered.ends_with("]~b]ai\n\n"),
+ "expected thinking assistant generation prompt: {rendered}"
+ );
+ }
+
+ #[test]
+ fn render_minimax_openai_template_omits_think_prompt_with_tools_when_disabled() {
+ let messages = vec![json!({"role": "user", "content": "List the current directory"})];
+ let tools = vec![json!({
+ "type": "function",
+ "function": {
+ "name": "shell",
+ "description": "Run a shell command",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {"type": "string"}
+ },
+ "required": ["command"]
+ }
+ }
+ })];
+ let rendered = render_minimax_openai_template(&messages, Some(&tools), false);
+ assert!(
+ rendered.contains(""),
+ "expected tool schema block in render: {rendered}"
+ );
+ assert!(
+ rendered.contains(""),
+ "expected MiniMax tool-call instructions in render: {rendered}"
+ );
+ assert!(
+ rendered.ends_with("]~b]ai\n\n\n\n\n"),
+ "tool-active disabled-thinking requests must use a closed-thinking assistant prompt: {rendered}"
+ );
+ }
+
#[test]
fn normalize_tool_call_arguments_invalid_json_string_left_alone() {
// If args is a string but not valid JSON, leave as-is so the
diff --git a/jinja-templates/openai/minimax_m2.jinja b/jinja-templates/openai/minimax_m2.jinja
new file mode 100644
index 00000000..744093cd
--- /dev/null
+++ b/jinja-templates/openai/minimax_m2.jinja
@@ -0,0 +1,164 @@
+{# ---------- special token variables ---------- #}
+{%- set toolcall_begin_token = '' -%}
+{%- set toolcall_end_token = '' -%}
+{#- Tool Rendering Functions ============================================== -#}
+{%- macro render_tool_namespace(namespace_name, tool_list) -%}
+{%- for tool in tool_list -%}
+{{ tool.function | tojson(ensure_ascii=False) }}
+{% endfor -%}
+{%- endmacro -%}
+{%- macro visible_text(content) -%}
+ {%- if content is string -%}
+ {{ content }}
+ {%- elif content is iterable and content is not mapping -%}
+ {%- for item in content -%}
+ {%- if item is mapping and item.type == 'text' -%}
+ {{- item.text }}
+ {%- elif item is string -%}
+ {{- item }}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- else -%}
+ {{- content }}
+ {%- endif -%}
+{%- endmacro -%}
+{#- System Message Construction ============================================ -#}
+{%- macro build_system_message(system_message) -%}
+ {%- if system_message and system_message.content -%}
+ {{- visible_text(system_message.content) }}
+ {%- else -%}
+ {%- if model_identity is not defined -%}
+ {%- set model_identity = "You are a helpful assistant. Your name is MiniMax-M2.7 and is built by MiniMax." -%}
+ {%- endif -%}
+ {{- model_identity }}
+ {%- endif -%}
+
+ {#- Handle current_date -#}
+ {%- if system_message and system_message.current_date -%}
+ {{- '\n' ~ 'Current date: ' + system_message.current_date }}
+ {%- endif -%}
+ {#- Handle current_location -#}
+ {%- if system_message and system_message.current_location -%}
+ {{- '\n' ~ 'Current location: ' + system_message.current_location }}
+ {%- endif -%}
+{%- endmacro -%}
+{#- Main Template Logic ================================================= -#}
+{#- Extract system message (only first message if it's system) -#}
+{%- set system_message = none -%}
+{%- set conversation_messages = messages -%}
+{%- if messages and messages[0].role == "system" -%}
+ {%- set system_message = messages[0] -%}
+ {%- set conversation_messages = messages[1:] -%}
+{%- endif -%}
+{#- Get the last user message turn, for interleaved thinking -#}
+{%- set ns = namespace(last_user_index=-1) %}
+{% for m in conversation_messages %}
+ {%- if m.role == 'user' %}
+ {% set ns.last_user_index = loop.index0 -%}
+ {%- endif %}
+{%- endfor %}
+{#- Render system message -#}
+{{- ']~!b[' ~ ']~b]system' ~ '\n' }}
+{{- build_system_message(system_message) }}
+{#- Render tools if available -#}
+{%- if tools -%}
+ {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
+ {{- '\n' ~ '' ~ '\n' }}
+ {{- render_tool_namespace("functions", tools) }}
+ {{- '' ~ '\n\n' }}
+{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
+{{- '\n' ~ toolcall_begin_token }}
+
+param-value-1
+param-value-2
+...
+
+{{- '\n' ~ toolcall_end_token }}
+{%- endif -%}
+{{- '[e~[\n' }}
+
+{#- Render messages -#}
+{%- set last_tool_call = namespace(name=none) -%}
+{%- for message in conversation_messages -%}
+ {%- if message.role == 'assistant' -%}
+ {#- Only render reasoning_content if no user message follows -#}
+ {{- ']~b]ai' ~ '\n' }}
+
+ {%- set reasoning_content = '' %}
+ {%- set content = visible_text(message.content) %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].strip('\n').split('')[-1].strip('\n') %}
+ {%- set content = content.split('')[-1].strip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- if reasoning_content and loop.index0 > ns.last_user_index -%}
+ {{- '' ~ '\n' ~ reasoning_content ~ '\n' ~ '' ~ '\n\n' }}
+ {%- endif -%}
+ {%- if content -%}
+ {{- content }}
+ {%- endif -%}
+ {%- if message.tool_calls -%}
+ {{- '\n' ~ toolcall_begin_token ~ '\n' }}
+
+ {%- for tool_call in message.tool_calls -%}
+ {%- if tool_call.function %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '' }}
+ {% set _args = tool_call.arguments %}
+ {%- for k, v in _args.items() %}
+ {{- '' }}
+ {{- v | tojson(ensure_ascii=False) if v is not string else v }}
+ {{- '' }}
+ {% endfor %}
+ {{- '' ~ '\n' }}
+ {%- endfor -%}
+
+ {{- toolcall_end_token}}
+ {%- set last_tool_call.name = message.tool_calls[-1].name -%}
+ {%- else -%}
+ {%- set last_tool_call.name = none -%}
+ {%- endif -%}
+ {{- '[e~[' ~ '\n' }}
+
+ {%- elif message.role == 'tool' -%}
+ {%- if last_tool_call.name is none -%}
+ {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
+ {%- endif -%}
+ {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
+ {{- ']~b]tool' }}
+ {%- endif -%}
+ {%- if message.content is string -%}
+ {{- '\n' }}
+ {{- message.content }}
+ {{- '' }}
+ {%- else -%}
+ {%- for tr in message.content -%}
+ {{- '\n' }}
+ {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
+ {{- '\n' }}
+ {%- endfor -%}
+ {%- endif -%}
+ {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
+ {{- '[e~[\n' -}}
+ {%- endif -%}
+
+ {%- elif message.role == 'user' -%}
+ {{- ']~b]user' ~ '\n' }}
+ {{- visible_text(message.content) }}
+ {{- '[e~[' ~ '\n' }}
+ {%- endif -%}
+{%- endfor -%}
+
+{#- Generation prompt -#}
+{%- if add_generation_prompt -%}
+{{- ']~b]ai' ~ '\n' }}
+{%- if enable_thinking is defined and enable_thinking is true -%}
+{{- '' ~ '\n' }}
+{%- else -%}
+{{- '' ~ '\n\n' ~ '' ~ '\n\n' }}
+{%- endif -%}
+{%- endif -%}
diff --git a/kernels/gb10/minimax-m2-229b/MODEL.toml b/kernels/gb10/minimax-m2-229b/MODEL.toml
index 601c4236..b954fe38 100644
--- a/kernels/gb10/minimax-m2-229b/MODEL.toml
+++ b/kernels/gb10/minimax-m2-229b/MODEL.toml
@@ -55,20 +55,15 @@ top_p = 0.95
top_k = 20
[behavior]
-# MiniMax M2's chat_template.jinja appends `\n` unconditionally
-# at `add_generation_prompt` (line 164). With `thinking_in_tools=false`
-# the reasoning parser strips `...` from `content`, but
-# the model still spends tokens inside `` before emitting either
-# (a) a tool call (`...`) or (b) free-form code/text.
-# `max_thinking_budget` caps that spontaneous-thinking phase: when the
-# scheduler detects `` without an explicit per-request budget,
-# it uses this number as the cap. Hit the cap → `` is force-
-# emitted → real content begins. 1024 tokens at ~17 tok/s ≈ 60 s of
-# reasoning headroom: long enough for fib's chain-of-thought trace,
-# short enough that Search emits its tool call inside the test timeout.
+# MiniMax M2's native HF chat_template.jinja appends `\n`
+# unconditionally at `add_generation_prompt`. Atlas uses an OpenAI-
+# variant template that gates this generation prompt on the resolved
+# per-request thinking setting. `max_thinking_budget` still caps explicit
+# or spontaneous thinking: when the scheduler detects `` without
+# an explicit per-request budget, it uses this number as the cap. Hit
+# the cap -> `` is force-emitted -> real content begins.
# `thinking_default = false` makes the OpenAI-style API treat
-# `extra_body.thinking.budget_tokens` as opt-in (matches MiniMax's
-# `thinking_in_tools=false` story for tool-active requests).
+# `extra_body.thinking.budget_tokens` as opt-in.
thinking_in_tools = false
max_thinking_budget = 1024
thinking_default = false