-
Notifications
You must be signed in to change notification settings - Fork 95
refactor(tool-parser): rename Glm4MoeParser to GlmParser with catch-all "glm" pattern #1568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,8 +9,8 @@ use tokio::sync::Mutex; | |
|
|
||
| use crate::{ | ||
| parsers::{ | ||
| CohereParser, DeepSeek31Parser, DeepSeekDsmlParser, DeepSeekParser, Glm4MoeParser, | ||
| JsonParser, KimiK2Parser, LlamaParser, MinimaxM2Parser, MistralParser, PassthroughParser, | ||
| CohereParser, DeepSeek31Parser, DeepSeekDsmlParser, DeepSeekParser, GlmParser, JsonParser, | ||
| KimiK2Parser, LlamaParser, MinimaxM2Parser, MistralParser, PassthroughParser, | ||
| PythonicParser, QwenParser, QwenXmlParser, Step3Parser, | ||
| }, | ||
| traits::ToolParser, | ||
|
|
@@ -318,8 +318,8 @@ impl ParserFactory { | |
| registry.register_parser("deepseek31", || Box::new(DeepSeek31Parser::new())); | ||
| registry.register_parser("deepseek32", || Box::new(DeepSeekDsmlParser::v32())); | ||
| registry.register_parser("deepseek_v4", || Box::new(DeepSeekDsmlParser::v4())); | ||
| registry.register_parser("glm45_moe", || Box::new(Glm4MoeParser::glm45())); | ||
| registry.register_parser("glm47_moe", || Box::new(Glm4MoeParser::glm47())); | ||
| registry.register_parser("glm", || Box::new(GlmParser::default())); | ||
| registry.register_parser("glm45", || Box::new(GlmParser::glm45())); | ||
| registry.register_parser("step3", || Box::new(Step3Parser::new())); | ||
| registry.register_parser_with_structural_tag( | ||
| "kimik2", | ||
|
|
@@ -386,11 +386,10 @@ impl ParserFactory { | |
| registry.map_model("deepseek-ai/DeepSeek-V4*", "deepseek_v4"); | ||
| registry.map_model("deepseek-*", "pythonic"); | ||
|
|
||
| // GLM models | ||
| registry.map_model("glm-4.5*", "glm45_moe"); | ||
| registry.map_model("glm-4.6*", "glm45_moe"); | ||
| registry.map_model("glm-4.7*", "glm47_moe"); | ||
| registry.map_model("glm-*", "json"); | ||
| // GLM models (4.5/4.6 use newline format, 4.7+ uses whitespace-only format) | ||
| registry.map_model("glm-4.5*", "glm45"); | ||
| registry.map_model("glm-4.6*", "glm45"); | ||
| registry.map_model("glm-*", "glm"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This catch-all now routes every Useful? React with 👍 / 👎. |
||
|
|
||
| // Step3 models | ||
| registry.map_model("step3*", "step3"); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,17 +10,14 @@ use crate::{ | |
| types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem}, | ||
| }; | ||
|
|
||
| /// GLM-4 MoE format parser for tool calls | ||
| /// GLM tool call format parser. | ||
| /// | ||
| /// Handles both GLM-4 MoE and GLM-4.7 MoE formats: | ||
| /// - GLM-4: `<tool_call>{name}\n<arg_key>{key}</arg_key>\n<arg_value>{value}</arg_value>\n</tool_call>` | ||
| /// - GLM-4.7: `<tool_call>{name}<arg_key>{key}</arg_key><arg_value>{value}</arg_value></tool_call>` | ||
| /// Handles the XML-style `<tool_call>` format used by GLM-4.5 through GLM-5.1: | ||
| /// - GLM-4.5/4.6: `<tool_call>{name}\n<arg_key>{key}</arg_key>\n<arg_value>{value}</arg_value>\n</tool_call>` | ||
| /// - GLM-4.7/5/5.1: `<tool_call>{name}<arg_key>{key}</arg_key><arg_value>{value}</arg_value></tool_call>` | ||
| /// | ||
| /// Features: | ||
| /// - XML-style tags for tool calls | ||
| /// - Key-value pairs for arguments | ||
| /// - Support for multiple sequential tool calls | ||
| pub struct Glm4MoeParser { | ||
| /// The default constructor uses the 4.7+ format (no newline between function name and args). | ||
| pub struct GlmParser { | ||
| /// Regex for extracting complete tool calls | ||
| tool_call_extractor: Regex, | ||
| /// Regex for extracting function details | ||
|
|
@@ -45,19 +42,13 @@ pub struct Glm4MoeParser { | |
| eot_token: &'static str, | ||
| } | ||
|
|
||
| impl Glm4MoeParser { | ||
| /// Create a new generic GLM MoE parser with a custom func_detail_extractor pattern | ||
| /// | ||
| /// # Arguments | ||
| /// - `func_detail_pattern`: Regex pattern for extracting function name and arguments | ||
| /// - For GLM-4: `r"(?s)<tool_call>([^\n]*)\n(.*)</tool_call>"` | ||
| /// - For GLM-4.7: `r"(?s)<tool_call>\s*([^<\s]+)\s*(.*?)</tool_call>"` | ||
| impl GlmParser { | ||
| /// Create a new GLM parser with a custom func_detail_extractor pattern. | ||
| #[expect( | ||
| clippy::expect_used, | ||
| reason = "regex patterns are compile-time string literals" | ||
| )] | ||
| pub(crate) fn new(func_detail_pattern: &str) -> Self { | ||
| // Use (?s) flag for DOTALL mode to handle newlines | ||
| let tool_call_pattern = r"(?s)<tool_call>.*?</tool_call>"; | ||
| let tool_call_extractor = Regex::new(tool_call_pattern).expect("Valid regex pattern"); | ||
|
|
||
|
|
@@ -79,13 +70,14 @@ impl Glm4MoeParser { | |
| } | ||
| } | ||
|
|
||
| /// Create a new GLM-4.5/4.6 MoE parser (with newline-based format) | ||
| /// Create a GLM-4.5/4.6 parser (newline between function name and args). | ||
| pub fn glm45() -> Self { | ||
| Self::new(r"(?s)<tool_call>([^\n]*)\n(.*)</tool_call>") | ||
| } | ||
|
|
||
| /// Create a new GLM-4.7 MoE parser (with whitespace-based format) | ||
| pub fn glm47() -> Self { | ||
| /// Create a GLM-4.7+ parser (no newline required between function name and args). | ||
| /// Compatible with GLM-4.7, GLM-5, GLM-5.1. | ||
| pub(crate) fn glm47() -> Self { | ||
| Self::new(r"(?s)<tool_call>\s*([^<\s]+)\s*(.*?)</tool_call>") | ||
|
Comment on lines
+80
to
81
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For downstream Rust users of the published Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
|
|
@@ -172,16 +164,16 @@ impl Glm4MoeParser { | |
| } | ||
| } | ||
|
|
||
| impl Default for Glm4MoeParser { | ||
| impl Default for GlmParser { | ||
| fn default() -> Self { | ||
| Self::glm45() | ||
| Self::glm47() | ||
| } | ||
| } | ||
|
|
||
| #[async_trait] | ||
| impl ToolParser for Glm4MoeParser { | ||
| impl ToolParser for GlmParser { | ||
| async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> { | ||
| // Check if text contains GLM-4 MoE format | ||
| // Check if text contains GLM format | ||
| if !self.has_tool_markers(text) { | ||
| return Ok((text.to_string(), vec![])); | ||
| } | ||
|
|
@@ -276,7 +268,7 @@ impl ToolParser for Glm4MoeParser { | |
| tracing::debug!("Invalid tool name '{}' - skipping", tool_call.function.name); | ||
| helpers::reset_current_tool_state( | ||
| &mut self.buffer, | ||
| &mut false, // glm45_moe/glm47_moe doesn't track name_sent per tool | ||
| &mut false, | ||
| &mut self.streamed_args_for_tool, | ||
| &self.prev_tool_call_arr, | ||
| ); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Deployments that explicitly configure the previously registered parser names (
--tool-call-parser glm45_moeorglm47_moe) now fail startup becauseAppContextBuildervalidates the configured name withfactory.has_parser(name)and rejects unknown tool-call parsers. This change only registers the newglm/glm45names, so existing configs that worked before this refactor cannot start unless they are updated; keeping the old names as aliases would avoid that compatibility break.Useful? React with 👍 / 👎.