diff --git a/crates/zeroclaw-api/src/model_provider.rs b/crates/zeroclaw-api/src/model_provider.rs index 3a5f60d7340..55d75ba48df 100644 --- a/crates/zeroclaw-api/src/model_provider.rs +++ b/crates/zeroclaw-api/src/model_provider.rs @@ -166,6 +166,16 @@ pub struct ChatRequest<'a> { pub struct ToolResultMessage { pub tool_call_id: String, pub content: String, + /// Name of the tool that produced this result, retained so downstream + /// media-marker canonicalization stays provenance-aware: path-listing + /// tools (`content_search`, `glob_search`) must not have incidental image + /// paths promoted to routable `[IMAGE:...]` markers (PR #7345). Empty when + /// the producing tool is unknown (e.g. results reconstructed from a + /// provider-wire `tool` message that never carried the name), in which case + /// the blind canonicalizer runs exactly as before (PR #6183). + /// `#[serde(default)]` keeps older serialized session records readable. + #[serde(default)] + pub tool_name: String, } /// A message in a multi-turn conversation, including tool interactions. diff --git a/crates/zeroclaw-infra/src/acp_session_store.rs b/crates/zeroclaw-infra/src/acp_session_store.rs index 47ba1fc3fb7..eaf18c6cfe6 100644 --- a/crates/zeroclaw-infra/src/acp_session_store.rs +++ b/crates/zeroclaw-infra/src/acp_session_store.rs @@ -386,6 +386,11 @@ impl AcpSessionStore { "out" => outs.push(ToolResultMessage { tool_call_id, content: payload, + // Carry the producing tool name (looked up from the + // matching 'in' row on write) so a resumed session + // stays provenance-aware for media-marker + // canonicalization (PR #7345). + tool_name, }), other => { ::zeroclaw_log::record!( @@ -925,6 +930,7 @@ mod tests { ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc-1".into(), content: "file.txt\n".into(), + tool_name: String::new(), }]), ConversationMessage::Chat(ChatMessage::assistant("done")), ]; @@ -1076,6 +1082,7 @@ mod tests { ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc-1".into(), content: "ok".into(), + tool_name: String::new(), }]), ], ) diff --git a/crates/zeroclaw-runtime/src/agent/agent.rs b/crates/zeroclaw-runtime/src/agent/agent.rs index 189493ab0de..c1f4dd51b87 100644 --- a/crates/zeroclaw-runtime/src/agent/agent.rs +++ b/crates/zeroclaw-runtime/src/agent/agent.rs @@ -1776,6 +1776,10 @@ impl Agent { .and_then(|c| c.as_str()) .unwrap_or_default() .to_string(), + // Provider-wire tool messages do not carry the + // producing tool name; replayed results fall back + // to blind canonicalization (PR #6183, #7345). + tool_name: String::new(), }) }) .collect(); @@ -1796,6 +1800,9 @@ impl Agent { .and_then(|c| c.as_str()) .unwrap_or_default() .to_string(), + // No provenance on the provider-wire shape; blind canon + // applies as before (PR #6183, #7345). + tool_name: String::new(), }; push_tool_results(&mut replayed, vec![result]); continue; @@ -3958,6 +3965,7 @@ mod tests { ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc-1".into(), content: "ok".into(), + tool_name: String::new(), }]), ConversationMessage::Chat(ChatMessage::assistant("done")), ]; @@ -4700,6 +4708,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: format!("tc{i}"), content: format!("result{i}"), + tool_name: String::new(), }])); } } @@ -4805,6 +4814,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc1".into(), content: "result1".into(), + tool_name: String::new(), }])); // AC2, TR2 agent.history.push(ConversationMessage::AssistantToolCalls { @@ -4822,6 +4832,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc2".into(), content: "result2".into(), + tool_name: String::new(), }])); // AC3, TR3 agent.history.push(ConversationMessage::AssistantToolCalls { @@ -4839,6 +4850,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc3".into(), content: "result3".into(), + tool_name: String::new(), }])); assert_eq!(agent.history.len(), 7); @@ -4957,6 +4969,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc1".into(), content: "result1".into(), + tool_name: String::new(), }])); // AC2, TR2 agent.history.push(ConversationMessage::AssistantToolCalls { @@ -4974,6 +4987,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc2".into(), content: "result2".into(), + tool_name: String::new(), }])); assert_eq!(agent.history.len(), 5); @@ -5089,6 +5103,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc1".into(), content: "result1".into(), + tool_name: String::new(), }])); // AC2, TR2 agent.history.push(ConversationMessage::AssistantToolCalls { @@ -5106,6 +5121,7 @@ mod tests { .push(ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc2".into(), content: "result2".into(), + tool_name: String::new(), }])); assert_eq!(agent.history.len(), 6); diff --git a/crates/zeroclaw-runtime/src/agent/dispatcher.rs b/crates/zeroclaw-runtime/src/agent/dispatcher.rs index 4bcf2103188..aefb7fc7293 100644 --- a/crates/zeroclaw-runtime/src/agent/dispatcher.rs +++ b/crates/zeroclaw-runtime/src/agent/dispatcher.rs @@ -1,4 +1,4 @@ -use super::history::canonicalize_tool_result_media_markers; +use super::history::canonicalize_tool_result_media_markers_for; use crate::tools::{Tool, ToolSpec}; use serde_json::Value; use std::fmt::Write; @@ -130,7 +130,12 @@ impl ToolDispatcher for XmlToolDispatcher { let mut content = String::new(); for result in results { let status = if result.success { "ok" } else { "error" }; - let output = canonicalize_tool_result_media_markers(&result.output); + // Provenance-gated: search/listing tools (content_search, + // glob_search) must not have incidental image paths promoted to + // routable [IMAGE:...] markers (PR #7345). The producing tool name is + // known here, so canonicalize through the same shared helper the + // turn loop uses. + let output = canonicalize_tool_result_media_markers_for(&result.name, &result.output); let _ = writeln!( content, "\n{}\n", @@ -167,7 +172,16 @@ impl ToolDispatcher for XmlToolDispatcher { ConversationMessage::ToolResults(results) => { let mut content = String::new(); for result in results { - let output = canonicalize_tool_result_media_markers(&result.content); + // Provenance-aware (PR #7345). XML format_results stores + // a Chat message rather than ToolResults, so this branch + // only fires for ToolResults reconstructed elsewhere + // (ACP/RPC resume) and rendered through an XML agent; + // gate it for the same reason as the native path. Empty + // `tool_name` falls back to blind canon (PR #6183). + let output = canonicalize_tool_result_media_markers_for( + &result.tool_name, + &result.content, + ); let _ = writeln!( content, "\n{}\n", @@ -213,7 +227,13 @@ impl ToolDispatcher for NativeToolDispatcher { .tool_call_id .clone() .unwrap_or_else(|| "unknown".to_string()), - content: canonicalize_tool_result_media_markers(&result.output), + // Retain the producing tool name so the read path + // (`to_provider_messages`) can re-canonicalize provenance-aware + // instead of blindly re-promoting a search/listing path back + // into an `[IMAGE:...]` marker (PR #7345). + tool_name: result.name.clone(), + // Provenance-gated (PR #7345): see the XML dispatcher above. + content: canonicalize_tool_result_media_markers_for(&result.name, &result.output), }) .collect(); ConversationMessage::ToolResults(messages) @@ -248,7 +268,16 @@ impl ToolDispatcher for NativeToolDispatcher { ChatMessage::tool( serde_json::json!({ "tool_call_id": result.tool_call_id, - "content": canonicalize_tool_result_media_markers(&result.content), + // Provenance-aware (PR #7345): a stored + // search/listing result keeps its literal path + // instead of being re-promoted to `[IMAGE:...]`. + // Empty `tool_name` (results with no recorded + // provenance) falls back to the blind + // canonicalizer, preserving PR #6183. + "content": canonicalize_tool_result_media_markers_for( + &result.tool_name, + &result.content, + ), }) .to_string(), ) @@ -387,6 +416,228 @@ mod tests { } } + // ═══════════════════════════════════════════════════════════════════════ + // provenance-gated media-marker canonicalization (PR #7345) + // ═══════════════════════════════════════════════════════════════════════ + // The dispatcher result-formatting path is reachable from `Agent::turn` + // / `Agent::turn_streamed` (ACP, gateway WebSocket + RPC). A search/listing + // tool that merely *lists* a local image path must NOT have that path + // rewritten into a routable `[IMAGE:...]` marker - otherwise it falsely + // triggers vision routing and a provider-capability error on a text-only + // provider. A genuine image-producing tool (e.g. `image_gen`) MUST still be + // canonicalized. Both dispatchers gate via the shared + // `canonicalize_tool_result_media_markers_for(tool_name, ...)` helper. + + /// Write a throwaway PNG and return its absolute path string. An existing + /// local image path is required for canonicalization to fire at all. + fn write_temp_image(dir: &std::path::Path, name: &str) -> String { + let image = dir.join(name); + std::fs::write(&image, [0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n']).unwrap(); + image.display().to_string() + } + + fn xml_format_results_text( + dispatcher: &XmlToolDispatcher, + result: ToolExecutionResult, + ) -> String { + match dispatcher.format_results(&[result]) { + ConversationMessage::Chat(chat) => chat.content, + _ => panic!("XmlToolDispatcher::format_results must return a Chat message"), + } + } + + fn native_format_results_content( + dispatcher: &NativeToolDispatcher, + result: ToolExecutionResult, + ) -> String { + match dispatcher.format_results(&[result]) { + ConversationMessage::ToolResults(results) => results[0].content.clone(), + _ => panic!("NativeToolDispatcher::format_results must return ToolResults"), + } + } + + #[test] + fn xml_format_results_does_not_promote_search_tool_image_paths() { + let dir = tempfile::tempdir().unwrap(); + let path = write_temp_image(dir.path(), "hit.png"); + let xml = XmlToolDispatcher; + + for tool in ["content_search", "glob_search"] { + let rendered = xml_format_results_text( + &xml, + ToolExecutionResult { + name: tool.into(), + output: format!("match: {path}"), + success: true, + tool_call_id: None, + }, + ); + assert!( + !rendered.contains("[IMAGE:"), + "{tool} output must not be promoted to an image marker" + ); + assert!( + rendered.contains(&path), + "{tool} output must still carry the literal path text" + ); + } + } + + #[test] + fn native_format_results_does_not_promote_search_tool_image_paths() { + let dir = tempfile::tempdir().unwrap(); + let path = write_temp_image(dir.path(), "hit.png"); + let native = NativeToolDispatcher; + + for tool in ["content_search", "glob_search"] { + let content = native_format_results_content( + &native, + ToolExecutionResult { + name: tool.into(), + output: format!("found: {path}"), + success: true, + tool_call_id: Some("tc1".into()), + }, + ); + assert!( + !content.contains("[IMAGE:"), + "{tool} output must not be promoted to an image marker" + ); + assert!(content.contains(&path)); + } + } + + #[test] + fn format_results_still_promotes_image_producing_tool_paths() { + // Default-allow: a genuinely image-producing tool keeps canonicalization + // in BOTH dispatchers, so real tool-produced images still route to a + // vision provider. + let dir = tempfile::tempdir().unwrap(); + let path = write_temp_image(dir.path(), "generated.png"); + let expected = format!("[IMAGE:{path}]"); + + let xml = XmlToolDispatcher; + let rendered = xml_format_results_text( + &xml, + ToolExecutionResult { + name: "image_gen".into(), + output: format!("saved to {path}"), + success: true, + tool_call_id: None, + }, + ); + assert!( + rendered.contains(&expected), + "image_gen output must be canonicalized into a marker (XML)" + ); + + let native = NativeToolDispatcher; + let content = native_format_results_content( + &native, + ToolExecutionResult { + name: "image_gen".into(), + output: format!("saved to {path}"), + success: true, + tool_call_id: Some("tc1".into()), + }, + ); + assert!( + content.contains(&expected), + "image_gen output must be canonicalized into a marker (native)" + ); + } + + /// Round-trip the native tool-result history shape: `format_results` + /// (write) -> `to_provider_messages` (read). This is the path the agent + /// loop actually exercises (`Agent::turn` serializes `self.history` via + /// `to_provider_messages` before handing it to `run_tool_call_loop`). + /// Without provenance carried on `ToolResultMessage`, the provenance-blind + /// read-side serializer re-promoted a stored search path back into a + /// routable `[IMAGE:...]` marker. (PR #7345 blocker.) + fn native_round_trip_tool_content( + dispatcher: &NativeToolDispatcher, + result: ToolExecutionResult, + ) -> String { + let stored = dispatcher.format_results(&[result]); + let messages = dispatcher.to_provider_messages(&[stored]); + assert_eq!(messages.len(), 1, "one tool result -> one provider message"); + assert_eq!(messages[0].role, "tool"); + messages[0].content.clone() + } + + #[test] + fn native_search_path_survives_format_then_to_provider_messages() { + let dir = tempfile::tempdir().unwrap(); + let path = write_temp_image(dir.path(), "hit.png"); + let native = NativeToolDispatcher; + + for tool in ["content_search", "glob_search"] { + let rendered = native_round_trip_tool_content( + &native, + ToolExecutionResult { + name: tool.into(), + output: format!("found: {path}"), + success: true, + tool_call_id: Some("tc1".into()), + }, + ); + assert!( + !rendered.contains("[IMAGE:"), + "{tool} path must not be re-promoted on the read side" + ); + assert!( + rendered.contains(&path), + "{tool} provider-visible content must keep the literal path" + ); + } + } + + #[test] + fn native_image_gen_path_still_promotes_through_to_provider_messages() { + // Default-allow preserved across the round trip: a real generated image + // still becomes an `[IMAGE:...]` marker, so it routes to a vision + // provider as before. + let dir = tempfile::tempdir().unwrap(); + let path = write_temp_image(dir.path(), "generated.png"); + let native = NativeToolDispatcher; + + let rendered = native_round_trip_tool_content( + &native, + ToolExecutionResult { + name: "image_gen".into(), + output: format!("saved to {path}"), + success: true, + tool_call_id: Some("tc1".into()), + }, + ); + assert!( + rendered.contains(&format!("[IMAGE:{path}]")), + "image_gen image must still canonicalize through the round trip" + ); + } + + #[test] + fn native_unknown_provenance_still_promotes_on_read() { + // PR #6183 contract preserved: a tool result stored WITHOUT provenance + // (empty `tool_name`, e.g. reconstructed from a provider-wire message) + // still has a genuine image path canonicalized on the read side. + let dir = tempfile::tempdir().unwrap(); + let path = write_temp_image(dir.path(), "history.png"); + let native = NativeToolDispatcher; + + let history = vec![ConversationMessage::ToolResults(vec![ToolResultMessage { + tool_call_id: "tc1".into(), + content: format!("Saved image to {path}"), + tool_name: String::new(), + }])]; + let messages = native.to_provider_messages(&history); + assert_eq!(messages.len(), 1); + assert!( + messages[0].content.contains(&format!("[IMAGE:{path}]")), + "unknown-provenance result must still promote a real image path" + ); + } + // ═══════════════════════════════════════════════════════════════════════ // reasoning_content pass-through tests // ═══════════════════════════════════════════════════════════════════════ diff --git a/crates/zeroclaw-runtime/src/agent/history.rs b/crates/zeroclaw-runtime/src/agent/history.rs index e2381ccf682..abeea897c71 100644 --- a/crates/zeroclaw-runtime/src/agent/history.rs +++ b/crates/zeroclaw-runtime/src/agent/history.rs @@ -227,6 +227,39 @@ pub fn canonicalize_tool_result_media_markers(output: &str) -> String { rewritten } +/// Tools whose output merely *lists* or *quotes* local filesystem paths +/// (search hits, glob matches) rather than presenting an image as visual +/// content. Their incidental image-file paths must NOT be auto-promoted to +/// `[IMAGE:...]` markers: the agent loop counts the current iteration's +/// tool-result markers (`multimodal::count_image_markers`) when deciding +/// whether to switch to a vision provider, so a path echo here falsely +/// triggers vision routing - producing a provider-capability error on a +/// text-only provider. See PR #7345. +/// +/// This is a denylist (default-allow): any other tool - including ones that +/// genuinely *generate* or *fetch* an image and print its path (e.g. +/// `image_gen`, `file_download`) - keeps canonicalization, so real +/// tool-produced images still route to a configured vision provider. +fn is_path_listing_tool(tool_name: &str) -> bool { + matches!( + tool_name.to_ascii_lowercase().as_str(), + "content_search" | "glob_search" + ) +} + +/// Provenance-aware wrapper around [`canonicalize_tool_result_media_markers`]. +/// +/// Returns the output unchanged for path-listing tools ([`is_path_listing_tool`]) +/// so their incidental image paths never become routable `[IMAGE:...]` markers; +/// all other tools are canonicalized exactly as before. +pub fn canonicalize_tool_result_media_markers_for(tool_name: &str, output: &str) -> String { + if is_path_listing_tool(tool_name) { + output.to_string() + } else { + canonicalize_tool_result_media_markers(output) + } +} + /// Truncate a tool message's content, preserving JSON structure when the /// message stores `tool_call_id` alongside `content` (native tool-call /// format). Without this, `truncate_tool_result` destroys the JSON envelope @@ -531,6 +564,42 @@ mod tests { assert_eq!(output, input); } + #[test] + fn canonicalize_for_skips_path_listing_tools() { + // A search/listing tool that surfaces a real image path must be left + // untouched - promoting it to [IMAGE:...] would falsely trigger vision + // routing (PR #7345). + let dir = tempfile::tempdir().unwrap(); + let image = dir.path().join("hit.png"); + std::fs::write(&image, [0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n']).unwrap(); + let input = format!("match: {}", image.display()); + + for tool in ["content_search", "glob_search", "GLOB_SEARCH"] { + let output = canonicalize_tool_result_media_markers_for(tool, &input); + assert_eq!(output, input, "{tool} output must be left untouched"); + assert!(!output.contains("[IMAGE:")); + } + } + + #[test] + fn canonicalize_for_wraps_image_producing_and_fetching_tools() { + // Default-allow: image_gen (produces) and file_download (fetches) keep + // canonicalization so a genuinely produced/fetched image still routes. + let dir = tempfile::tempdir().unwrap(); + let image = dir.path().join("generated.png"); + std::fs::write(&image, [0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n']).unwrap(); + let input = format!("Saved to {}", image.display()); + let expected = format!("[IMAGE:{}]", image.display()); + + for tool in ["image_gen", "file_download", "some_future_tool"] { + let output = canonicalize_tool_result_media_markers_for(tool, &input); + assert!( + output.contains(&expected), + "{tool} output should be canonicalized into a marker" + ); + } + } + #[test] fn canonicalize_tool_result_media_markers_dedups_path_already_in_marker() { // `image_info` emits a durable `File: ` line *and* an explicit diff --git a/crates/zeroclaw-runtime/src/agent/tests.rs b/crates/zeroclaw-runtime/src/agent/tests.rs index 286424a1ca9..3f82d2d37e1 100644 --- a/crates/zeroclaw-runtime/src/agent/tests.rs +++ b/crates/zeroclaw-runtime/src/agent/tests.rs @@ -1271,6 +1271,7 @@ fn conversation_message_serialization_roundtrip() { ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc1".into(), content: "ok".into(), + tool_name: String::new(), }]), ConversationMessage::Chat(ChatMessage::assistant("done")), ]; @@ -1434,6 +1435,7 @@ fn xml_dispatcher_converts_history_to_provider_messages() { ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc1".into(), content: "ok".into(), + tool_name: String::new(), }]), ConversationMessage::Chat(ChatMessage::assistant("done")), ]; @@ -1461,10 +1463,12 @@ fn native_dispatcher_converts_tool_results_to_tool_messages() { ToolResultMessage { tool_call_id: "tc1".into(), content: format!("Saved image to {}", image_path.display().to_string()), + tool_name: String::new(), }, ToolResultMessage { tool_call_id: "tc2".into(), content: "output2".into(), + tool_name: String::new(), }, ])]; diff --git a/crates/zeroclaw-runtime/src/agent/turn/results_collect.rs b/crates/zeroclaw-runtime/src/agent/turn/results_collect.rs index 581c13505f7..908ab743141 100644 --- a/crates/zeroclaw-runtime/src/agent/turn/results_collect.rs +++ b/crates/zeroclaw-runtime/src/agent/turn/results_collect.rs @@ -3,7 +3,8 @@ //! identical-output abort. use crate::agent::history::{ - append_or_merge_system_message, canonicalize_tool_result_media_markers, truncate_tool_result, + append_or_merge_system_message, canonicalize_tool_result_media_markers_for, + truncate_tool_result, }; use crate::agent::loop_detector::LoopDetector; use crate::agent::tool_execution::ToolExecutionOutcome; @@ -115,7 +116,13 @@ pub(crate) fn collect_tool_results( } } } - let canonical_output = canonicalize_tool_result_media_markers(&outcome.output); + // Provenance-gated: search/listing tools (content_search, glob_search) + // must not have incidental image paths promoted to routable [IMAGE:...] + // markers, or they falsely trigger vision routing on a text-only + // provider. Image-producing/fetching tools keep canonicalization. + // See PR #7345. + let canonical_output = + canonicalize_tool_result_media_markers_for(&tool_name, &outcome.output); let mut result_output = truncate_tool_result(&canonical_output, max_tool_result_chars); // Append HMAC receipt to tool result when receipts are enabled if let Some(ref receipt) = outcome.receipt { diff --git a/crates/zeroclaw-runtime/src/rpc/dispatch.rs b/crates/zeroclaw-runtime/src/rpc/dispatch.rs index 7e43f299316..e65ae57d558 100644 --- a/crates/zeroclaw-runtime/src/rpc/dispatch.rs +++ b/crates/zeroclaw-runtime/src/rpc/dispatch.rs @@ -4451,6 +4451,7 @@ mod tests { ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc-1".into(), content: "log contents".into(), + tool_name: String::new(), }]), ConversationMessage::AssistantToolCalls { text: None, @@ -4465,6 +4466,7 @@ mod tests { ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "tc-2".into(), content: "no errors".into(), + tool_name: String::new(), }]), ConversationMessage::Chat(ChatMessage { role: "assistant".into(), diff --git a/src/providers/traits.rs b/src/providers/traits.rs index bbae7012632..e5c70f98f2d 100644 --- a/src/providers/traits.rs +++ b/src/providers/traits.rs @@ -140,6 +140,7 @@ mod tests { let tool_result = ConversationMessage::ToolResults(vec![ToolResultMessage { tool_call_id: "1".into(), content: "done".into(), + tool_name: String::new(), }]); let json = serde_json::to_string(&tool_result).unwrap(); assert!(json.contains("\"type\":\"ToolResults\""));