From 111d104f32c1e0e365b73e90bc6c5c45f0a3ce4c Mon Sep 17 00:00:00 2001
From: Yijun Yu <yijun.yu@open.ac.uk>
Date: Wed, 13 May 2026 13:44:28 +0000
Subject: [PATCH 1/2] fix(channels/orchestrator): keep system prompt
 byte-stable for prefix caching (#6360)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

build_channel_system_prompt embedded two per-turn pieces into the cached
system prompt: a ## Current Date & Time section refreshed to second precision
and a Channel context: paragraph carrying reply_target / sender. Every reply
mutated the system prefix, so provider prompt caches (llama.cpp prefix cache,
Anthropic/OpenAI cached prefixes) missed unconditionally — the reporter saw
'forcing full prompt re-processing due to lack of cache data' and ~12k tokens
reprocessed every turn. CLI did not exhibit the bug because it never appended
these fields to the system prompt.

Move both pieces to a per-turn preamble that rides on the **user message** of
the current turn instead. The system prompt now stays byte-stable for the
lifetime of a session, so prefix caches hit. The cached conversation copy
stored at append_sender_turn time is left untouched — only the outgoing wire
copy carries the [turn-context] marker.

New tests:
- build_channel_system_prompt_is_byte_stable_across_calls (helper-level)
- build_channel_system_prompt_excludes_per_turn_context / _per_second_datetime
- build_channel_turn_context_preamble_carries_dynamic_fields / _empty_when_reply_target_empty
- process_channel_message_telegram_system_prompt_is_byte_stable_across_turns
  (end-to-end: two consecutive process_channel_message calls 1.1 s apart must
  produce byte-identical system prompts — under the pre-fix behavior the
  ## Current Date & Time second field alone would have mutated)

Inverted the existing process_channel_message_enriches_current_turn_without_persisting_context
assertion to pin the new contract (user message carries the turn-context
preamble; cached history copy stays clean).
---
 .../zeroclaw-channels/src/orchestrator/mod.rs | 313 ++++++++++++++----
 1 file changed, 245 insertions(+), 68 deletions(-)

diff --git a/crates/zeroclaw-channels/src/orchestrator/mod.rs b/crates/zeroclaw-channels/src/orchestrator/mod.rs
index 1bbd76cec82..de5f4d890a7 100644
--- a/crates/zeroclaw-channels/src/orchestrator/mod.rs
+++ b/crates/zeroclaw-channels/src/orchestrator/mod.rs
@@ -657,56 +657,60 @@ fn channel_delivery_instructions(channel_name: &str) -> Option<&'static str> {
     }
 }
 
-fn build_channel_system_prompt(
-    base_prompt: &str,
-    channel_name: &str,
-    reply_target: &str,
-    sender: &str,
-) -> String {
-    let mut prompt = base_prompt.to_string();
-
-    // Refresh the stale datetime in the cached system prompt
-    {
-        let now = chrono::Local::now();
-        let fresh = format!(
-            "## Current Date & Time\n\n{} ({})\n",
-            now.format("%Y-%m-%d %H:%M:%S"),
-            now.format("%Z"),
-        );
-        if let Some(start) = prompt.find("## Current Date & Time\n\n") {
-            // Find the end of this section (next "## " heading or end of string)
-            let rest = &prompt[start + 24..]; // skip past "## Current Date & Time\n\n"
-            let section_end = rest
-                .find("\n## ")
-                .map(|i| start + 24 + i)
-                .unwrap_or(prompt.len());
-            prompt.replace_range(start..section_end, fresh.trim_end());
-        }
-    }
+/// Build the channel-specific system prompt.
+///
+/// The result is intentionally **stable across turns of the same channel**: it
+/// embeds only the base prompt and the channel-delivery instructions. Per-turn
+/// pieces (current date/time, reply_target, sender) are kept out of the system
+/// prompt entirely so that provider-side prompt caching (e.g. llama.cpp prefix
+/// cache, Anthropic/OpenAI cached prefixes) can hit on the system prefix. See
+/// `build_channel_turn_context_preamble` for the per-turn dynamic block that
+/// now rides on the user message instead.
+///
+/// Issue #6360: the previous implementation refreshed `## Current Date & Time`
+/// to *second* precision and appended a per-turn channel-context paragraph
+/// containing `reply_target` and `sender`, which caused the entire system
+/// prompt to mutate on every turn and forced a full prompt re-tokenization.
+fn build_channel_system_prompt(base_prompt: &str, channel_name: &str) -> String {
+    let prompt = base_prompt.to_string();
 
     if let Some(instructions) = channel_delivery_instructions(channel_name) {
         if prompt.is_empty() {
-            prompt = instructions.to_string();
+            instructions.to_string()
         } else {
-            prompt = format!("{prompt}\n\n{instructions}");
+            format!("{prompt}\n\n{instructions}")
         }
+    } else {
+        prompt
     }
+}
 
-    if !reply_target.is_empty() {
-        let context = format!(
-            "\n\nChannel context: You are currently responding on channel={channel_name}, \
-             reply_target={reply_target}, sender={sender}. \
-             The sender field is the platform-specific user ID of the person who sent \
-             this message. Use it to distinguish between different users. \
-             When scheduling delayed messages or reminders \
-             via cron_add for this conversation, use delivery={{\"mode\":\"announce\",\
-             \"channel\":\"{channel_name}\",\"to\":\"{reply_target}\"}} so the message \
-             reaches the user."
-        );
-        prompt.push_str(&context);
+/// Build the per-turn channel-context preamble that used to be appended to
+/// the system prompt. Returns an empty string when there is no useful context
+/// to attach so the caller can skip emitting the marker entirely.
+///
+/// This text is meant to ride on the **user message** of the current turn,
+/// not the system prompt — it changes every turn (datetime, sender, reply
+/// target) and must not invalidate the cached system prefix (#6360).
+fn build_channel_turn_context_preamble(
+    channel_name: &str,
+    reply_target: &str,
+    sender: &str,
+) -> String {
+    if reply_target.is_empty() {
+        return String::new();
     }
-
-    prompt
+    let now = chrono::Local::now();
+    format!(
+        "[turn-context] time={} ({}) channel={channel_name} reply_target={reply_target} sender={sender}. \
+         The sender field is the platform-specific user ID of the person who sent \
+         this message. Use it to distinguish between different users. \
+         When scheduling delayed messages or reminders via cron_add for this \
+         conversation, use delivery={{\"mode\":\"announce\",\"channel\":\"{channel_name}\",\
+         \"to\":\"{reply_target}\"}} so the message reaches the user.",
+        now.format("%Y-%m-%d %H:%M:%S"),
+        now.format("%Z"),
+    )
 }
 
 fn normalize_cached_channel_turns(turns: Vec<ChatMessage>) -> Vec<ChatMessage> {
@@ -3021,18 +3025,31 @@ async fn process_channel_message(
     } else {
         refreshed_new_session_system_prompt(ctx.as_ref())
     };
-    let mut system_prompt = build_channel_system_prompt(
-        &base_system_prompt,
-        &msg.channel,
-        &msg.reply_target,
-        &msg.sender,
-    );
+    let mut system_prompt = build_channel_system_prompt(&base_system_prompt, &msg.channel);
     if !memory_context.is_empty() {
         let _ = write!(system_prompt, "\n\n{memory_context}");
     }
     let mut history = vec![ChatMessage::system(system_prompt)];
     history.extend(prior_turns);
 
+    // Prepend the per-turn channel context (datetime, reply_target, sender) to
+    // the *last* user turn — never to the system prompt — so that the system
+    // prefix stays byte-stable across turns and provider-side prompt caches
+    // can hit on it (#6360). The cached history copy stored at line ~2903 is
+    // untouched: only the outgoing LLM `history` carries the preamble.
+    let turn_preamble =
+        build_channel_turn_context_preamble(&msg.channel, &msg.reply_target, &msg.sender);
+    if !turn_preamble.is_empty() {
+        if let Some(last) = history
+            .iter_mut()
+            .rev()
+            .find(|m| m.role == "user")
+            .filter(|m| !m.content.starts_with("[turn-context]"))
+        {
+            last.content = format!("{turn_preamble}\n\n{}", last.content);
+        }
+    }
+
     // ── Proactive context compression ────────────────────────────
     // Use the existing ContextCompressor to summarize older history
     // before the LLM call, preventing context-window-exceeded errors
@@ -11379,8 +11396,32 @@ BTC is currently around $65,000 based on latest tool output."#
         assert_eq!(calls[0][0].0, "system");
         assert!(calls[0][0].1.contains(MEMORY_CONTEXT_OPEN));
         assert!(calls[0][0].1.contains("Age is 45"));
+        // Per-turn channel-context (datetime, reply_target, sender) rides on the
+        // user message — NOT the system prompt — so the system prefix stays
+        // byte-stable and provider prompt caches hit on it (#6360).
+        assert!(
+            !calls[0][0].1.contains("reply_target="),
+            "system prompt must not carry per-turn reply_target",
+        );
+        assert!(
+            !calls[0][0].1.contains("## Current Date & Time"),
+            "system prompt must not carry per-second timestamp",
+        );
         assert_eq!(calls[0][1].0, "user");
-        assert_eq!(calls[0][1].1, "hello");
+        assert!(
+            calls[0][1].1.starts_with("[turn-context]"),
+            "user turn must be prefixed with the turn-context preamble, got: {}",
+            calls[0][1].1
+        );
+        assert!(
+            calls[0][1].1.contains("reply_target=chat-ctx"),
+            "user turn preamble carries the reply target"
+        );
+        assert!(
+            calls[0][1].1.ends_with("\n\nhello"),
+            "original user content must be preserved verbatim after the preamble, got: {}",
+            calls[0][1].1
+        );
 
         let histories = runtime_ctx
             .conversation_histories
@@ -11390,8 +11431,10 @@ BTC is currently around $65,000 based on latest tool output."#
             .peek("test-channel_chat-ctx_alice")
             .expect("history should be stored for sender");
         assert_eq!(turns[0].role, "user");
+        // The cached copy stays clean — only the outgoing wire copy is enriched.
         assert_eq!(turns[0].content, "hello");
         assert!(!turns[0].content.contains(MEMORY_CONTEXT_OPEN));
+        assert!(!turns[0].content.contains("[turn-context]"));
     }
 
     #[tokio::test]
@@ -11512,6 +11555,119 @@ BTC is currently around $65,000 based on latest tool output."#
         assert!(!calls[0].iter().skip(1).any(|(role, _)| role == "system"));
     }
 
+    /// #6360 end-to-end pin: two consecutive process_channel_message calls for
+    /// the same sender/room must produce byte-identical system prompts so that
+    /// the upstream LLM's prompt cache can reuse the prefix.
+    #[tokio::test]
+    async fn process_channel_message_telegram_system_prompt_is_byte_stable_across_turns() {
+        let channel_impl = Arc::new(TelegramRecordingChannel::default());
+        let channel: Arc<dyn Channel> = channel_impl.clone();
+        let mut channels_by_name = HashMap::new();
+        channels_by_name.insert(channel.name().to_string(), channel);
+        let provider_impl = Arc::new(HistoryCaptureProvider::default());
+
+        let runtime_ctx = Arc::new(ChannelRuntimeContext {
+            channels_by_name: Arc::new(channels_by_name),
+            provider: provider_impl.clone(),
+            default_provider: Arc::new("test-provider".to_string()),
+            memory: Arc::new(NoopMemory),
+            tools_registry: Arc::new(vec![]),
+            observer: Arc::new(NoopObserver),
+            system_prompt: Arc::new("test-system-prompt".to_string()),
+            model: Arc::new("test-model".to_string()),
+            temperature: 0.0,
+            auto_save_memory: false,
+            max_tool_iterations: 5,
+            min_relevance_score: 0.0,
+            conversation_histories: Arc::new(Mutex::new(lru::LruCache::new(
+                std::num::NonZeroUsize::new(MAX_CONVERSATION_SENDERS).unwrap(),
+            ))),
+            pending_new_sessions: Arc::new(Mutex::new(HashSet::new())),
+            provider_cache: Arc::new(Mutex::new(HashMap::new())),
+            route_overrides: Arc::new(Mutex::new(HashMap::new())),
+            api_key: None,
+            api_url: None,
+            reliability: Arc::new(zeroclaw_config::schema::ReliabilityConfig::default()),
+            provider_runtime_options: zeroclaw_providers::ProviderRuntimeOptions::default(),
+            workspace_dir: Arc::new(std::env::temp_dir()),
+            prompt_config: Arc::new(zeroclaw_config::schema::Config::default()),
+            message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
+            interrupt_on_new_message: InterruptOnNewMessageConfig {
+                telegram: false,
+                slack: false,
+                discord: false,
+                mattermost: false,
+                matrix: false,
+            },
+            multimodal: zeroclaw_config::schema::MultimodalConfig::default(),
+            media_pipeline: zeroclaw_config::schema::MediaPipelineConfig::default(),
+            transcription_config: zeroclaw_config::schema::TranscriptionConfig::default(),
+            hooks: None,
+            non_cli_excluded_tools: Arc::new(Vec::new()),
+            autonomy_level: AutonomyLevel::default(),
+            tool_call_dedup_exempt: Arc::new(Vec::new()),
+            model_routes: Arc::new(Vec::new()),
+            query_classification: zeroclaw_config::schema::QueryClassificationConfig::default(),
+            ack_reactions: true,
+            show_tool_calls: true,
+            session_store: None,
+            approval_manager: Arc::new(ApprovalManager::for_non_interactive(
+                &zeroclaw_config::schema::AutonomyConfig::default(),
+            )),
+            activated_tools: None,
+            cost_tracking: None,
+            pacing: zeroclaw_config::schema::PacingConfig::default(),
+            max_tool_result_chars: 0,
+            context_token_budget: 0,
+            debouncer: Arc::new(zeroclaw_infra::debounce::MessageDebouncer::new(
+                Duration::ZERO,
+            )),
+            receipt_generator: None,
+            show_receipts_in_response: false,
+        });
+
+        let mk_msg = |id: &str, content: &str| zeroclaw_api::channel::ChannelMessage {
+            id: id.to_string(),
+            sender: "alice".to_string(),
+            reply_target: "chat-cache".to_string(),
+            content: content.to_string(),
+            channel: "telegram".to_string(),
+            timestamp: 1,
+            thread_ts: None,
+            interruption_scope_id: None,
+            attachments: vec![],
+        };
+
+        process_channel_message(
+            runtime_ctx.clone(),
+            mk_msg("m1", "first"),
+            CancellationToken::new(),
+        )
+        .await;
+        // Sleep across a second boundary — under the pre-fix behavior this would
+        // mutate `## Current Date & Time` in the system prompt by at least one
+        // second and invalidate the prefix cache.
+        tokio::time::sleep(Duration::from_millis(1100)).await;
+        process_channel_message(
+            runtime_ctx.clone(),
+            mk_msg("m2", "second"),
+            CancellationToken::new(),
+        )
+        .await;
+
+        let calls = provider_impl
+            .calls
+            .lock()
+            .unwrap_or_else(|e| e.into_inner());
+        assert_eq!(calls.len(), 2, "expected exactly two provider invocations");
+        assert_eq!(calls[0][0].0, "system");
+        assert_eq!(calls[1][0].0, "system");
+        assert_eq!(
+            calls[0][0].1, calls[1][0].1,
+            "system prompt must be byte-identical across turns for prompt caching to hit (#6360)"
+        );
+    }
+
     #[test]
     fn channel_delivery_instructions_for_discord_mandates_absolute_paths() {
         let block = channel_delivery_instructions("discord")
@@ -13423,31 +13579,52 @@ This is an example JSON object for profile settings."#;
     }
 
     #[test]
-    fn build_channel_system_prompt_includes_sender_id() {
-        let prompt = build_channel_system_prompt(
-            "You are a helpful assistant.",
-            "mattermost",
-            "channel123:root456",
-            "user_abc123",
-        );
-        assert!(prompt.contains("sender=user_abc123"));
-        assert!(prompt.contains("channel=mattermost"));
-        assert!(prompt.contains("reply_target=channel123:root456"));
+    fn build_channel_system_prompt_is_byte_stable_across_calls() {
+        // #6360: the system prompt must not embed any per-turn state. Two
+        // back-to-back calls with the same inputs must yield identical bytes,
+        // even though wall-clock time has advanced between them.
+        let a = build_channel_system_prompt("Base.", "telegram");
+        std::thread::sleep(std::time::Duration::from_millis(5));
+        let b = build_channel_system_prompt("Base.", "telegram");
+        assert_eq!(a, b, "system prompt must be byte-stable across turns");
     }
 
     #[test]
-    fn build_channel_system_prompt_omits_context_when_reply_target_empty() {
-        let prompt = build_channel_system_prompt("Base prompt.", "mattermost", "", "user_abc123");
-        assert!(!prompt.contains("sender="));
+    fn build_channel_system_prompt_excludes_per_turn_context() {
+        // The Channel context: paragraph and any `sender=`/`reply_target=` token
+        // belong on the user turn, never on the system prompt — otherwise
+        // provider prefix caches re-tokenize every reply (#6360).
+        let prompt = build_channel_system_prompt("You are helpful.", "telegram");
         assert!(!prompt.contains("Channel context:"));
+        assert!(!prompt.contains("sender="));
+        assert!(!prompt.contains("reply_target="));
+    }
+
+    #[test]
+    fn build_channel_system_prompt_excludes_per_second_datetime() {
+        // The previous implementation injected a `## Current Date & Time` section
+        // with second precision into the cached system prompt. That alone was
+        // enough to invalidate every cache hit. Pin that it's no longer added.
+        let prompt = build_channel_system_prompt("Base.", "telegram");
+        assert!(!prompt.contains("## Current Date & Time"));
+    }
+
+    #[test]
+    fn build_channel_turn_context_preamble_carries_dynamic_fields() {
+        let p = build_channel_turn_context_preamble("telegram", "chat:42", "user_xyz");
+        assert!(p.contains("channel=telegram"));
+        assert!(p.contains("reply_target=chat:42"));
+        assert!(p.contains("sender=user_xyz"));
+        // The preamble starts with the [turn-context] marker so the duplicate-guard
+        // at the call site can detect already-prefixed user turns.
+        assert!(p.starts_with("[turn-context]"));
     }
 
     #[test]
-    fn build_channel_system_prompt_sender_distinguishes_users() {
-        let prompt_a = build_channel_system_prompt("Base.", "mattermost", "ch:thread", "user_aaa");
-        let prompt_b = build_channel_system_prompt("Base.", "mattermost", "ch:thread", "user_bbb");
-        assert!(prompt_a.contains("sender=user_aaa"));
-        assert!(prompt_b.contains("sender=user_bbb"));
-        assert_ne!(prompt_a, prompt_b);
+    fn build_channel_turn_context_preamble_empty_when_reply_target_empty() {
+        // No reply target → CLI-style invocation → no preamble at all, mirroring
+        // CLI behavior which has always worked with prompt caching.
+        let p = build_channel_turn_context_preamble("telegram", "", "user_xyz");
+        assert!(p.is_empty());
     }
 }

From 1e0ae6f87bef3d448d2efd20c996b474c86f28c6 Mon Sep 17 00:00:00 2001
From: Yijun Yu <yijun.yu@open.ac.uk>
Date: Wed, 13 May 2026 14:15:25 +0000
Subject: [PATCH 2/2] fix(channels/orchestrator): collapse nested if-let to
 satisfy clippy::collapsible_if (#6360)

CI Lint failed with clippy::collapsible_if on the turn-context preamble
injection. Fold the !turn_preamble.is_empty() guard into the .filter()
predicate of the existing find chain so a single if let suffices.

Behavior unchanged; all 992 zeroclaw-channels tests still pass.
---
 crates/zeroclaw-channels/src/orchestrator/mod.rs | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/crates/zeroclaw-channels/src/orchestrator/mod.rs b/crates/zeroclaw-channels/src/orchestrator/mod.rs
index de5f4d890a7..359efc77a33 100644
--- a/crates/zeroclaw-channels/src/orchestrator/mod.rs
+++ b/crates/zeroclaw-channels/src/orchestrator/mod.rs
@@ -3039,15 +3039,13 @@ async fn process_channel_message(
     // untouched: only the outgoing LLM `history` carries the preamble.
     let turn_preamble =
         build_channel_turn_context_preamble(&msg.channel, &msg.reply_target, &msg.sender);
-    if !turn_preamble.is_empty() {
-        if let Some(last) = history
-            .iter_mut()
-            .rev()
-            .find(|m| m.role == "user")
-            .filter(|m| !m.content.starts_with("[turn-context]"))
-        {
-            last.content = format!("{turn_preamble}\n\n{}", last.content);
-        }
+    if let Some(last) = history
+        .iter_mut()
+        .rev()
+        .find(|m| m.role == "user")
+        .filter(|m| !turn_preamble.is_empty() && !m.content.starts_with("[turn-context]"))
+    {
+        last.content = format!("{turn_preamble}\n\n{}", last.content);
     }
 
     // ── Proactive context compression ────────────────────────────