diff --git a/apps/memos-local-plugin/adapters/openclaw/bridge.ts b/apps/memos-local-plugin/adapters/openclaw/bridge.ts
index 9c527f30c..f8932d86d 100644
--- a/apps/memos-local-plugin/adapters/openclaw/bridge.ts
+++ b/apps/memos-local-plugin/adapters/openclaw/bridge.ts
@@ -189,10 +189,15 @@ export function flattenMessages(input: unknown[] | undefined): FlatMessage[] {
       }
       for (const tc of inlineToolCalls) out.push(tc);
 
-      // OpenAI legacy: assistant has a top-level `tool_calls` array
-      // (separate from content). Fold these in after pi-ai inline
-      // tool calls so order is preserved when both shapes coexist.
-      if (Array.isArray(m.tool_calls)) {
+      // OpenAI-legacy fallback only: when the message has NO pi-ai
+      // inline tool calls but does have a top-level `tool_calls` array
+      // (pure OpenAI Function-Calling shape). When both shapes coexist
+      // (as OpenClaw's pi-ai bundled OpenAI adapter does), pi-ai
+      // already populated `content[].toolCall`, so re-reading the
+      // top-level field would emit each call twice — which in turn
+      // causes `extractTurn`'s `pendingCalls.set(key, …)` to clobber
+      // the first stub's `thinkingBefore` with an empty second stub.
+      if (inlineToolCalls.length === 0 && Array.isArray(m.tool_calls)) {
         for (const tc of m.tool_calls as Array<Record<string, unknown>>) {
           const fn = tc.function as Record<string, unknown> | undefined;
           if (!fn) continue;
@@ -476,35 +481,51 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
   const userText = messages[lastUserIdx].content.trim();
   const tail = messages.slice(lastUserIdx + 1);
 
-  const assistantParts: string[] = [];
-  const thinkingParts: string[] = [];
   const pendingCalls = new Map<string, Partial<ToolCallDTO> & { _id?: string }>();
   const toolCalls: ToolCallDTO[] = [];
 
+  // Two separate buffers accumulate content not yet assigned to a tool.
+  //
+  // `pendingThinking`: Claude extended-thinking blocks (`ThinkingContent`)
+  // `pendingAssistant`: regular model text (`TextContent`)
+  //
+  // When a `tool_call` arrives, BOTH buffers are flushed together into
+  // that tool's `thinkingBefore` — this is the reasoning (structured OR
+  // natural language) the model did before deciding to invoke the tool.
+  //
+  // After all messages are processed, whatever remains in the buffers
+  // forms the final output: `pendingAssistant` → `agentText` (the
+  // reply) and `pendingThinking` → `agentThinking` (model reasoning
+  // shown in a dedicated bubble for non-tool turns).
+  let pendingThinking: string[] = [];
+  let pendingAssistant: string[] = [];
+
   for (const m of tail) {
     if (m.role === "assistant") {
-      if (m.content) assistantParts.push(m.content);
+      if (m.content) pendingAssistant.push(m.content);
       continue;
     }
     if (m.role === "thinking") {
-      if (m.content) thinkingParts.push(m.content);
+      if (m.content) pendingThinking.push(m.content);
       continue;
     }
     if (m.role === "tool_call" && m.toolName) {
-      // Assistant decided to call a tool. Stash until the matching
-      // tool_result lands so we can stitch the full ToolCallDTO.
+      const parts = [...pendingThinking, ...pendingAssistant];
+      const thinkingBefore = parts.join("\n\n").trim() || undefined;
+      pendingThinking = [];
+      pendingAssistant = [];
+
       const key = m.toolCallId ?? m.toolName;
       pendingCalls.set(key, {
         _id: m.toolCallId,
         name: m.toolName,
         input: m.toolInput,
         startedAt: m.ts ?? now,
+        thinkingBefore,
       });
       continue;
     }
     if (m.role === "tool_result") {
-      // Pair by id (preferred — works even when two parallel calls hit
-      // the same tool name) or fall back to toolName.
       const key = m.toolCallId ?? m.toolName ?? "";
       const stub = pendingCalls.get(key);
       const errorCode = stub
@@ -517,16 +538,13 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
         errorCode,
         startedAt: stub?.startedAt ?? (m.ts ?? now),
         endedAt: m.ts ?? now,
+        thinkingBefore: stub?.thinkingBefore,
       });
       if (key) pendingCalls.delete(key);
       continue;
     }
-    // system / unknown: ignore for the purpose of extractTurn.
   }
 
-  // Any tool call that never received a paired tool_result still lands
-  // in the trace (with `output: undefined`) so the viewer can show
-  // "tool was invoked but produced no result".
   for (const stub of pendingCalls.values()) {
     if (!stub.name) continue;
     toolCalls.push({
@@ -535,14 +553,15 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
       output: undefined,
       startedAt: stub.startedAt ?? now,
       endedAt: now,
+      thinkingBefore: stub.thinkingBefore,
     });
   }
 
-  const agentThinking = thinkingParts.join("\n\n").trim();
+  const agentThinking = pendingThinking.join("\n\n").trim();
   return {
     userText,
-    agentText: assistantParts.join("\n\n").trim(),
-    agentThinking: agentThinking ? agentThinking : undefined,
+    agentText: pendingAssistant.join("\n\n").trim(),
+    agentThinking: agentThinking || undefined,
     toolCalls,
   };
 }
@@ -796,6 +815,7 @@ export function createOpenClawBridge(opts: BridgeOptions): BridgeHandle {
       hasError: !!event.error,
     });
 
+
     try {
       // Legacy adapter parity: even when `success === false` we still
       // enqueue the user's message (and whatever the assistant managed
diff --git a/apps/memos-local-plugin/agent-contract/dto.ts b/apps/memos-local-plugin/agent-contract/dto.ts
index a76beead3..7b2353a71 100644
--- a/apps/memos-local-plugin/agent-contract/dto.ts
+++ b/apps/memos-local-plugin/agent-contract/dto.ts
@@ -40,6 +40,16 @@ export interface ToolCallDTO {
   errorCode?: string;
   startedAt: EpochMs;
   endedAt: EpochMs;
+  /**
+   * LLM-native thinking emitted *before* the model decided to invoke this
+   * tool — e.g. "I got an error from tool_1, let me try a different
+   * approach". Populated by the adapter when the model interleaves
+   * thinking blocks between tool calls. `undefined` for legacy data or
+   * when no thinking preceded this particular call.
+   *
+   * Stored inside `tool_calls_json` (no schema migration needed).
+   */
+  thinkingBefore?: string;
 }
 
 export interface TurnInputDTO {
diff --git a/apps/memos-local-plugin/core/capture/normalizer.ts b/apps/memos-local-plugin/core/capture/normalizer.ts
index bb4bebdb8..2e773f673 100644
--- a/apps/memos-local-plugin/core/capture/normalizer.ts
+++ b/apps/memos-local-plugin/core/capture/normalizer.ts
@@ -36,15 +36,25 @@ export function normalizeSteps(
       continue;
     }
 
-    const last = out[out.length - 1];
-    if (
-      last &&
-      last.agentText === agentText &&
-      last.userText === userText &&
-      sameToolCalls(last.toolCalls, toolCalls)
-    ) {
-      log.debug("normalize.skip_duplicate", { key: step.key });
-      continue;
+    // Sub-steps produced by the per-tool-call extractor (V7 §0.1) have
+    // intentionally-identical userText="" / agentText="" and carry only
+    // a single tool call each — but two different tools can still share
+    // a short input fingerprint, which the generic dedup path below
+    // would incorrectly collapse. Skip dedup for sub-steps; the key
+    // uniqueness guarantees they can't be genuine duplicates.
+    const isSubStep = (step.meta as Record<string, unknown> | undefined)?.subStep === true;
+
+    if (!isSubStep) {
+      const last = out[out.length - 1];
+      if (
+        last &&
+        last.agentText === agentText &&
+        last.userText === userText &&
+        sameToolCalls(last.toolCalls, toolCalls)
+      ) {
+        log.debug("normalize.skip_duplicate", { key: step.key });
+        continue;
+      }
     }
 
     out.push({
diff --git a/apps/memos-local-plugin/core/capture/step-extractor.ts b/apps/memos-local-plugin/core/capture/step-extractor.ts
index 89bca5aa2..2e566930f 100644
--- a/apps/memos-local-plugin/core/capture/step-extractor.ts
+++ b/apps/memos-local-plugin/core/capture/step-extractor.ts
@@ -175,7 +175,11 @@ function segmentToSteps(
     out.push({
       key: `${episode.id}:${ts}:tool:${i}`,
       ts,
-      userText,
+      // Only the first sub-step carries the user query; subsequent
+      // sub-steps leave `userText` empty so the viewer's flattenChat
+      // doesn't render the same user bubble N times. The turn's
+      // provenance (episodeId) still links them together.
+      userText: i === 0 ? userText : "",
       agentText: "",
       agentThinking: i === 0 ? fullThinking : null,
       toolCalls: [tc],
@@ -232,6 +236,7 @@ function toolCallFromTurn(turn: EpisodeTurn): ToolCallDTO | null {
   const endedAt = typeof meta.endedAt === "number" ? meta.endedAt : turn.ts;
   const input = meta.input ?? meta.args ?? undefined;
   const errorCode = typeof meta.errorCode === "string" ? meta.errorCode : undefined;
+  const thinkingBefore = typeof meta.thinkingBefore === "string" ? meta.thinkingBefore : undefined;
   return {
     name,
     input,
@@ -239,6 +244,7 @@ function toolCallFromTurn(turn: EpisodeTurn): ToolCallDTO | null {
     errorCode,
     startedAt,
     endedAt,
+    thinkingBefore,
   };
 }
 
@@ -264,7 +270,8 @@ function coerceToolCall(raw: unknown): ToolCallDTO | null {
   const startedAt =
     typeof r.startedAt === "number" ? r.startedAt : Date.now();
   const endedAt = typeof r.endedAt === "number" ? r.endedAt : startedAt;
-  return { name, input, output, errorCode, startedAt, endedAt };
+  const thinkingBefore = typeof r.thinkingBefore === "string" ? r.thinkingBefore : undefined;
+  return { name, input, output, errorCode, startedAt, endedAt, thinkingBefore };
 }
 
 function depthFromMeta(meta: Record<string, unknown>): number {
diff --git a/apps/memos-local-plugin/core/config/defaults.ts b/apps/memos-local-plugin/core/config/defaults.ts
index 4e24e7436..971f86a66 100644
--- a/apps/memos-local-plugin/core/config/defaults.ts
+++ b/apps/memos-local-plugin/core/config/defaults.ts
@@ -157,21 +157,27 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
       episodeGoalMinSim: 0.45,
       tagFilter: "auto",
       keywordTopK: 20,
-      relativeThresholdFloor: 0.4,
+      // Lowered from 0.4 → 0.2 with the 2026 ranker overhaul: the new
+      // base relevance already uses channel rank as a first-class
+      // signal, so the old 0.4 floor was over-pruning keyword hits
+      // with modest V·decay.
+      relativeThresholdFloor: 0.2,
       skillEtaBlend: 0.15,
       smartSeed: true,
+      smartSeedRatio: 0.7,
+      multiChannelBypass: true,
       skillInjectionMode: "summary",
       skillSummaryChars: 200,
       llmFilterEnabled: true,
       // Tighter than the legacy default (5) so the LLM filter has a
-      // budget that forces "drop, don't pad". Combined with the
-      // few-shot prompt this dramatically improves precision.
+      // small budget; combined with the richer prompt (v3) this keeps
+      // packets concise without over-dropping.
       llmFilterMaxKeep: 4,
-      // Lowered from 3 → 2: small packets (e.g. just a Tier-1 skill +
-      // a Tier-2 trace) used to skip the LLM filter entirely and ship
-      // both items even when one was tangential. Now anything > 1
-      // candidate gets a precision pass.
-      llmFilterMinCandidates: 2,
+      // Lowered from 2 → 1: even a single candidate gets a precision
+      // pass. Mirrors `memos-local-openclaw`'s tool-level filter and
+      // prevents a lone off-topic memory from sneaking through unchecked.
+      llmFilterMinCandidates: 1,
+      llmFilterCandidateBodyChars: 500,
     },
   },
   hub: {
diff --git a/apps/memos-local-plugin/core/config/schema.ts b/apps/memos-local-plugin/core/config/schema.ts
index 0736ceff9..11280b23c 100644
--- a/apps/memos-local-plugin/core/config/schema.ts
+++ b/apps/memos-local-plugin/core/config/schema.ts
@@ -322,8 +322,14 @@ const AlgorithmSchema = Type.Object({
      * `minTraceSim` — when the best hit is weak, we keep more (lower
      * absolute floor); when there's a clear winner, we drop noise.
      * Set to 0 to disable the relative cutoff entirely.
+     *
+     * Default lowered to 0.2 with the 2026 ranker overhaul: the new
+     * base formula already weighs channel-rank evidence (so a raw
+     * FTS-only hit lands in a comparable range to a cosine-0.8 hit),
+     * and the old 0.4 floor was over-pruning keyword matches with
+     * modest V·decay.
      */
-    relativeThresholdFloor: NumberInRange(0.4, 0, 1),
+    relativeThresholdFloor: NumberInRange(0.2, 0, 1),
     /**
      * Tier-1 skill relevance blend weight for `η` (skill reliability).
      * Old default `0.4` made well-trodden skills outrank obviously-more-
@@ -333,12 +339,28 @@ const AlgorithmSchema = Type.Object({
     skillEtaBlend: NumberInRange(0.15, 0, 1),
     /**
      * MMR Phase-A seed-by-tier policy. When `true`, only seed a tier
-     * if its best candidate's relevance ≥ `relativeThresholdFloor *
-     * topRelevance`. This prevents the ranker from force-injecting a
-     * stale Tier-1 skill / Tier-3 world-model just because it cleared
-     * the absolute floors.
+     * if its best candidate's relevance ≥ `poolTopRelevance *
+     * smartSeedRatio` (see below). This prevents the ranker from
+     * force-injecting a stale Tier-1 skill / Tier-3 world-model just
+     * because it cleared the absolute floors.
      */
     smartSeed: Bool(true),
+    /**
+     * Seed cutoff for smart-seed MMR — tier is seeded iff its best
+     * candidate's relevance ≥ `poolTopRelevance * smartSeedRatio`.
+     * Independent of `relativeThresholdFloor` so the seed gate can be
+     * stricter than the generic drop floor (0.7 is "within 30% of the
+     * best available candidate anywhere in the pool").
+     */
+    smartSeedRatio: NumberInRange(0.7, 0, 1),
+    /**
+     * When a candidate is surfaced by ≥ 2 retrieval channels (e.g.
+     * both vec and fts hit the same trace), bypass the relative
+     * threshold. Multi-channel agreement is a strong signal, and
+     * without this keyword-only matches with modest V·decay often
+     * get dropped by a noisy `topRelevance`.
+     */
+    multiChannelBypass: Bool(true),
     /**
      * How Tier-1 skills are surfaced in the injected prompt:
      *   - "summary" (default): inject only `name + η + 1-line summary +
@@ -368,10 +390,21 @@ const AlgorithmSchema = Type.Object({
     /** Keep at most this many candidates after the LLM filter. */
     llmFilterMaxKeep: NumberInRange(5, 1, 30),
     /**
-     * Skip the filter when the ranked list already has ≤ this many
-     * items — no point paying an LLM round-trip to prune 3 candidates.
+     * Skip the filter when the ranked list has fewer than this many
+     * items. Default 1 — even a single candidate gets a precision
+     * pass, matching `memos-local-openclaw`'s tool-level filter and
+     * preventing a lone off-topic memory from sneaking through
+     * unchecked.
+     */
+    llmFilterMinCandidates: NumberInRange(1, 1, 50),
+    /**
+     * Body-text budget per candidate when building the LLM filter
+     * prompt. Higher = more context for precise judgement, at the
+     * cost of more tokens per round-trip. Default 500 (openclaw uses
+     * 300 without tags/channels; we include richer metadata, so a
+     * slightly larger window pays for itself).
      */
-    llmFilterMinCandidates: NumberInRange(4, 1, 50),
+    llmFilterCandidateBodyChars: NumberInRange(500, 120, 2000),
   }, { default: {} }),
 }, { default: {} });
 
diff --git a/apps/memos-local-plugin/core/llm/prompts/retrieval-filter.ts b/apps/memos-local-plugin/core/llm/prompts/retrieval-filter.ts
index a73c2fbc9..3db213888 100644
--- a/apps/memos-local-plugin/core/llm/prompts/retrieval-filter.ts
+++ b/apps/memos-local-plugin/core/llm/prompts/retrieval-filter.ts
@@ -3,89 +3,128 @@ import type { PromptDef } from "./index.js";
 /**
  * Relevance-filter prompt for retrieved candidates.
  *
- * Mirrors the legacy `memos-local-openclaw` `unifiedLLMFilter`, but with
- * three deliberate changes baked into the prompt itself:
+ * Mirrors the legacy `memos-local-openclaw` `unifiedLLMFilter`, but
+ * tuned for the plugin's tier-aware candidate labels (SKILL / TRACE /
+ * EPISODE / WORLD-MODEL). Key design choices:
  *
- *   1. **Few-shot examples.** Two cases — one ACCEPT, one REJECT — pin
- *      down what "tangentially-related but should drop" means. Without
- *      this LLMs often pad to the maximum allowed selection.
- *   2. **"Drop, don't pad" instruction.** Explicit: returning fewer
- *      items (or `[]`) is preferred over including marginal hits.
- *   3. **Hard upper bound on output.** We say `≤ 4 items` (caller still
- *      enforces via `llmFilterMaxKeep`).
+ *   1. **Four few-shot examples** — two KEEP, two DROP — so the model
+ *      sees both "useful fact that should survive" and "surface-similar
+ *      but wrong sub-problem". Earlier two-example versions were too
+ *      conservative and dropped genuinely relevant traces.
+ *   2. **Informational tone, not strict gatekeeping.** The filter is
+ *      the *precision* pass, not a second retrieval — we lean towards
+ *      keeping anything that could plausibly help, because the ranker
+ *      already pruned the obvious noise.
+ *   3. **`sufficient` self-report.** The model reports whether the
+ *      kept set is enough to answer the query; callers surface this so
+ *      the agent can decide whether to widen recall.
  *
- * Bumping `version` here also rotates the prompt-fingerprint id used by
- * `core/llm` audit trails.
+ * Bumping `version` rotates the prompt-fingerprint id used by
+ * `core/llm` audit trails, so A/B data from v2 and v3 stays separable.
  */
 export const RETRIEVAL_FILTER_PROMPT: PromptDef = {
   id: "retrieval.filter",
-  version: 2,
+  version: 3,
   description:
-    "Pick only the candidates that are genuinely useful for the user query before injection.",
-  system: `You are a strict relevance gatekeeper for an AI agent's memory retrieval.
-
-Given:
-- QUERY: the user's current request
-- CANDIDATES: a numbered list of items the retriever surfaced, each
-  labelled with a kind (SKILL / TRACE / EPISODE / WORLD-MODEL).
-
-Your job: pick ONLY the candidates that are genuinely useful for answering
-THIS query. Vector retrieval over-matches on surface similarity — most of
-your candidates will be tangentially related and should be DROPPED.
-
-Decision rules (apply in order):
-- KEEP a SKILL only if its name + description directly addresses the
-  exact sub-problem the user is asking about, NOT just the same domain.
-- KEEP a WORLD-MODEL only if its title's domain matches the query's
-  domain AND the body provides a structural fact the agent would
-  otherwise need to re-discover.
-- KEEP a TRACE / EPISODE only if its content contains specific evidence
-  (a fact, a command, a snippet, a name) the agent could cite or reuse
-  verbatim. Vague topical similarity is NOT enough.
-- DROP items in the same broad area but on a different sub-problem
-  (e.g. query asks "write a pytest test", candidate is "write a Python
-  JWT validator" — same language, different problem → DROP).
-- DROP "scaffolding" memories (greetings, throwaway acks, capability
-  questions) even when topically related.
-
-PREFERENCE: drop, don't pad. Returning 1 truly useful item is better
-than returning 4 marginal ones. Returning [] is the right answer when
-nothing is genuinely relevant.
-
-HARD LIMITS: keep at most 4 candidates total.
-
-──── Example 1 ────
+    "Pick the retrieved candidates that are plausibly useful for the user query, and report whether that set is sufficient.",
+  system: `You are the relevance check for an AI agent's memory retrieval. A
+mechanical retriever has already surfaced candidates by vector / keyword
+hit. Your job is to keep the ones that a helpful assistant would want to
+read before answering, and drop the ones that merely share surface
+keywords.
+
+Input:
+- QUERY: the user's current request (or a tool-driven retrieval query).
+- CANDIDATES: a numbered list. Each item is labelled with a kind
+  (SKILL / TRACE / EPISODE / WORLD-MODEL) and metadata such as
+  \`time\`, \`tags\`, \`via\` (which channels hit — vec / fts / pattern),
+  and \`score\` (the ranker's relevance).
+
+Decision guidance:
+- KEEP a TRACE / EPISODE when it carries a concrete fact the agent
+  could use: a name, number, file path, command, preference, or a
+  specific past exchange that answers the query. Surface-similar chat
+  without such facts should be dropped.
+- KEEP a SKILL when its name / description plausibly addresses the
+  user's sub-problem. The agent decides later whether to call
+  \`skill_get\` for the full procedure — err on the side of keeping
+  one candidate skill.
+- KEEP a WORLD-MODEL when its topic matches the domain of the query
+  and the body contains structural information the agent would
+  otherwise have to re-derive.
+- DROP items in the same broad area but a different sub-problem
+  (e.g. query asks "write a pytest test", candidate is "write a
+  Python JWT validator" — same language, different problem).
+- DROP scaffolding chatter (greetings, capability questions, acks)
+  unless the query is explicitly about the chat history.
+- Prefer keeping an item when uncertain — you are the precision pass,
+  not a second retriever.
+
+After choosing, self-report whether the kept set is enough:
+- \`sufficient: true\` when the kept items plausibly answer the QUERY
+  as-is.
+- \`sufficient: false\` when the kept items are only a starting point
+  and the agent should broaden recall (e.g. run \`memory_search\` with
+  a different query).
+
+──── Example 1 (React dark mode, KEEP 2) ────
 QUERY: 把这个 React 组件改成支持暗黑模式
 
 CANDIDATES:
-1. [SKILL] React Tailwind dark-mode toggle — adds class="dark" toggling and useTheme hook for any React project
-2. [TRACE] [user] 我喜欢的运动是游泳  [assistant] 记住了
-3. [SKILL] Python JWT validator — verifies HS256 / RS256 tokens via PyJWT
-4. [TRACE] [user] 上次我们用 React Context 写了 ThemeProvider，文件在 src/theme/  [assistant] 记得，要继续用同样的模式吗？
+1. [SKILL time=2026-03-01 10:00 via=vec+fts score=0.84] React Tailwind dark-mode toggle · η=0.82 · active
+   adds class="dark" toggling and useTheme hook for any React project
+2. [TRACE time=2026-02-14 09:30 tags=[chit-chat] via=vec score=0.41] [user] 我喜欢的运动是游泳 [assistant] 记住了
+3. [SKILL time=2026-01-11 08:10 via=vec score=0.51] Python JWT validator · η=0.75 · active
+   verifies HS256 / RS256 tokens via PyJWT
+4. [TRACE time=2026-03-04 14:20 tags=[react,theme] via=vec+pattern score=0.79] 上次我们用 React Context 写了 ThemeProvider，文件在 src/theme/ [assistant] 记得，要继续用同样的模式吗？
+
+Correct output: {"selected": [1, 4], "sufficient": true}
+
+──── Example 2 (phone number lookup, KEEP 1 via FTS only) ────
+QUERY: 还记得我的手机号吗？
+
+CANDIDATES:
+1. [TRACE time=2026-02-20 21:05 tags=[profile] via=fts score=0.18] [user] 我的手机号是 13800001234 [assistant] 已记住
+2. [TRACE time=2026-02-10 09:30 tags=[chit-chat] via=vec score=0.35] [user] 今天天气怎么样 [assistant] 杭州小雨
+3. [SKILL time=2025-12-01 11:00 via=vec score=0.22] phone-number-validator · η=0.88
 
-Correct output: {"selected": [1, 4]}
-Reasoning: 1 directly addresses dark-mode in React; 4 contains the
-exact file path the agent will need. 2 is unrelated. 3 is wrong language
-+ wrong sub-problem.
+Correct output: {"selected": [1], "sufficient": true}
+Reasoning: candidate 1 is only surfaced by FTS with a modest score, but
+it carries the exact fact the user is asking about. Keep it.
 
-──── Example 2 ────
+──── Example 3 (weather lookup, KEEP 1 fact) ────
 QUERY: 帮我看下今天天气
 
 CANDIDATES:
-1. [TRACE] [user] 我住在杭州  [assistant] 已记住
-2. [SKILL] Docker container syslib install fix
-3. [WORLD-MODEL] React project layout — components in src/components/
+1. [TRACE time=2026-01-04 18:05 tags=[profile] via=fts score=0.22] [user] 我住在杭州 [assistant] 已记住
+2. [SKILL time=2025-10-02 09:10 via=vec score=0.31] Docker container syslib install fix · η=0.77
+3. [WORLD-MODEL time=2025-09-11 16:00 via=vec score=0.29] React project layout — components in src/components/
+
+Correct output: {"selected": [1], "sufficient": false}
+Reasoning: only 1 carries a fact the agent needs (location). The agent
+still needs a live weather lookup tool, so the kept set alone is not
+enough.
+
+──── Example 4 (DROP everything, DROP 3) ────
+QUERY: 写一个快速排序的 Python 实现
+
+CANDIDATES:
+1. [TRACE time=2026-03-02 11:00 tags=[chit-chat] via=vec score=0.40] [user] 你好 [assistant] 你好！今天想做什么？
+2. [TRACE time=2026-01-19 22:00 tags=[japanese] via=fts score=0.21] [user] 「クイック」は何の意味？ [assistant] fast / quick
+3. [SKILL time=2025-08-01 09:00 via=vec score=0.33] Python JWT validator · η=0.70
 
-Correct output: {"selected": [1]}
-Reasoning: only 1 carries a fact the agent needs (location for weather
-lookup). 2 and 3 are completely unrelated.
+Correct output: {"selected": [], "sufficient": false}
+Reasoning: no candidate carries information the agent needs to produce
+the answer. The chit-chat and translation traces share only surface
+keywords. Drop all and let the agent answer from its own knowledge.
 
 ──── Output format ────
 Return JSON only, no prose:
 {
-  "selected": [1, 3]
+  "selected": [1, 3],
+  "sufficient": true
 }
-where each number is the 1-based index in the CANDIDATES list.
+where each number is the 1-based index into CANDIDATES.
 
-If nothing is truly relevant, return {"selected": []}.`,
+If nothing is truly relevant, return {"selected": [], "sufficient": false}.`,
 };
diff --git a/apps/memos-local-plugin/core/pipeline/deps.ts b/apps/memos-local-plugin/core/pipeline/deps.ts
index 5322bf0d4..28fa9f79c 100644
--- a/apps/memos-local-plugin/core/pipeline/deps.ts
+++ b/apps/memos-local-plugin/core/pipeline/deps.ts
@@ -147,12 +147,15 @@ export function extractAlgorithmConfig(
       relativeThresholdFloor: alg.retrieval.relativeThresholdFloor,
       skillEtaBlend: alg.retrieval.skillEtaBlend,
       smartSeed: alg.retrieval.smartSeed,
+      smartSeedRatio: alg.retrieval.smartSeedRatio,
+      multiChannelBypass: alg.retrieval.multiChannelBypass,
       skillInjectionMode: alg.retrieval.skillInjectionMode,
       skillSummaryChars: alg.retrieval.skillSummaryChars,
       decayHalfLifeDays: alg.reward.decayHalfLifeDays,
       llmFilterEnabled: alg.retrieval.llmFilterEnabled,
       llmFilterMaxKeep: alg.retrieval.llmFilterMaxKeep,
       llmFilterMinCandidates: alg.retrieval.llmFilterMinCandidates,
+      llmFilterCandidateBodyChars: alg.retrieval.llmFilterCandidateBodyChars,
     },
     session: {
       followUpMode: alg.session.followUpMode,
diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
index bcb599898..2d26d5670 100644
--- a/apps/memos-local-plugin/core/pipeline/memory-core.ts
+++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -824,6 +824,22 @@ export function createMemoryCore(
       snippet: string;
     }> = [];
     let filtered: typeof candidates = [];
+    let retrievalStats: {
+      raw?: number;
+      ranked?: number;
+      droppedByThreshold?: number;
+      thresholdFloor?: number;
+      topRelevance?: number;
+      llmFilter?: {
+        outcome?: string;
+        kept?: number;
+        dropped?: number;
+        sufficient?: boolean | null;
+      };
+      channelHits?: Record<string, number>;
+      queryTokens?: number;
+      queryTags?: string[];
+    } | undefined;
     try {
       const result = await turnStartRetrieve(deps, {
         reason: "turn_start",
@@ -857,6 +873,29 @@ export function createMemoryCore(
         snippet: h.snippet,
       }));
       filtered = candidates; // post-filter is what we return → same list.
+
+      // Three-stage observability — surfaced verbatim so the viewer's
+      // Logs page can render "raw → threshold → ranked → LLM filter"
+      // funnels. All fields are optional on the producer side so older
+      // consumers keep working.
+      const s = result.stats;
+      retrievalStats = {
+        raw: s.rawCandidateCount,
+        ranked: s.rankedCount,
+        droppedByThreshold: s.droppedByThresholdCount,
+        thresholdFloor: s.thresholdFloor,
+        topRelevance: s.topRelevance,
+        llmFilter: {
+          outcome: s.llmFilterOutcome,
+          kept: s.llmFilterKept,
+          dropped: s.llmFilterDropped,
+          sufficient: s.llmFilterSufficient ?? null,
+        },
+        channelHits: s.channelHits as Record<string, number> | undefined,
+        queryTokens: s.queryTokens,
+        queryTags: s.queryTags,
+      };
+
       return {
         query,
         hits,
@@ -883,6 +922,7 @@ export function createMemoryCore(
                 candidates,
                 hubCandidates: [] as unknown[],
                 filtered,
+                stats: retrievalStats,
               }
             : { error: "retrieval_failed" },
           durationMs: Date.now() - startedAt,
diff --git a/apps/memos-local-plugin/core/pipeline/orchestrator.ts b/apps/memos-local-plugin/core/pipeline/orchestrator.ts
index 672f1cc9b..8c68571f9 100644
--- a/apps/memos-local-plugin/core/pipeline/orchestrator.ts
+++ b/apps/memos-local-plugin/core/pipeline/orchestrator.ts
@@ -687,6 +687,11 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle {
           errorCode: tc.errorCode,
           startedAt: tc.startedAt,
           endedAt: tc.endedAt,
+          // V7 §0.1: preserve the model's "Thought for X" narration that
+          // precedes this call so `step-extractor` can re-attach it to
+          // the captured ToolCallDTO. Without this, chained tool calls
+          // lose the natural-language bridge between steps.
+          thinkingBefore: tc.thinkingBefore,
         },
       });
     }
diff --git a/apps/memos-local-plugin/core/retrieval/llm-filter.ts b/apps/memos-local-plugin/core/retrieval/llm-filter.ts
index a42771908..f103c240d 100644
--- a/apps/memos-local-plugin/core/retrieval/llm-filter.ts
+++ b/apps/memos-local-plugin/core/retrieval/llm-filter.ts
@@ -2,29 +2,31 @@
  * LLM-based relevance filter — post-processing step after `rank()`.
  *
  * Motivation (ported from legacy `memos-local-openclaw::unifiedLLMFilter`):
- * cosine retrieval is greedy — any Python prompt pulls back every
+ * mechanical retrieval is greedy — any Python prompt pulls back every
  * Python-tagged trace even when the sub-problem doesn't match. A small
  * LLM call ("given this query, pick the truly relevant candidates")
  * removes most of the noise with a single round-trip.
  *
  * Design constraints:
- *   - One LLM call per turn, bounded output (just the index list).
+ *   - One LLM call per turn, bounded output (index list + `sufficient`).
  *   - Totally opt-in: if the LLM is null, or the config flag is off,
- *     or the candidate list is small enough, we pass through the
- *     ranked list unchanged.
- *   - On ANY failure (network, schema, timeout) we fall back to the
- *     ranked list. A missing filter must never crash retrieval.
+ *     or the candidate list is empty, we pass through unchanged.
+ *   - On ANY failure (network, schema, timeout) we fall back to a
+ *     mechanical cutoff. A broken filter must never crash retrieval.
  *   - Returns both kept and dropped candidates so callers can log
  *     exactly what the LLM pruned (feeds the Logs page).
+ *   - Rich candidate labels — we include role/time/tags/channels/score
+ *     because openclaw's filter runs on those fields and loses precision
+ *     without them.
  */
 
 import type { LlmClient } from "../llm/index.js";
 import type { Logger } from "../logger/types.js";
 import { RETRIEVAL_FILTER_PROMPT } from "../llm/prompts/index.js";
 import type { RankedCandidate } from "./ranker.js";
-import type { RetrievalConfig } from "./types.js";
+import type { RetrievalConfig, TierCandidate } from "./types.js";
 
-const MAX_CANDIDATE_CONTENT_CHARS = 240;
+const DEFAULT_CANDIDATE_BODY_CHARS = 500;
 
 export interface FilterInput {
   query: string;
@@ -36,7 +38,10 @@ export interface FilterDeps {
   log: Logger;
   config: Pick<
     RetrievalConfig,
-    "llmFilterEnabled" | "llmFilterMaxKeep" | "llmFilterMinCandidates"
+    | "llmFilterEnabled"
+    | "llmFilterMaxKeep"
+    | "llmFilterMinCandidates"
+    | "llmFilterCandidateBodyChars"
   >;
 }
 
@@ -59,6 +64,13 @@ export interface FilterResult {
     // `relativeThresholdFloor · topRelevance`) instead of dumping the
     // entire ranked list into the prompt.
     | "llm_failed_safe_cutoff";
+  /**
+   * The LLM's self-report on whether the *kept* candidates are enough
+   * to answer `query`, or whether the caller should widen recall /
+   * run a follow-up `memory_search`. `null` when the filter didn't
+   * run (disabled / passthrough / failure paths).
+   */
+  sufficient: boolean | null;
 }
 
 export async function llmFilterCandidates(
@@ -69,12 +81,17 @@ export async function llmFilterCandidates(
   if (!deps.config.llmFilterEnabled) {
     return passthrough(ranked, "disabled");
   }
-  // `llmFilterMinCandidates` is "minimum candidates required to RUN the
-  // filter". `<` so a packet with exactly the threshold count still gets
-  // a precision pass (the most useful case — small but noisy packets).
+  // `llmFilterMinCandidates` is the *minimum* list length required to
+  // RUN the filter. Default is 1, meaning even a single candidate gets
+  // a precision pass — openclaw behaviour, and matches the user
+  // reports that "a single off-topic memory sneaks through when the
+  // filter skips the check".
   if (ranked.length < deps.config.llmFilterMinCandidates) {
     return passthrough(ranked, "below_threshold");
   }
+  if (ranked.length === 0) {
+    return passthrough(ranked, "below_threshold");
+  }
   if (!query || !query.trim()) {
     return passthrough(ranked, "empty_query");
   }
@@ -82,16 +99,19 @@ export async function llmFilterCandidates(
     return passthrough(ranked, "no_llm");
   }
 
+  const bodyChars =
+    deps.config.llmFilterCandidateBodyChars ?? DEFAULT_CANDIDATE_BODY_CHARS;
   const items = ranked.map((r, i) => ({
     index: i,
-    label: describeCandidate(r),
+    label: describeCandidate(r, bodyChars),
   }));
-  const list = items
-    .map((x) => `${x.index + 1}. ${x.label}`)
-    .join("\n");
+  const list = items.map((x) => `${x.index + 1}. ${x.label}`).join("\n");
 
   try {
-    const rsp = await deps.llm.completeJson<{ selected?: unknown }>(
+    const rsp = await deps.llm.completeJson<{
+      selected?: unknown;
+      sufficient?: unknown;
+    }>(
       [
         { role: "system", content: RETRIEVAL_FILTER_PROMPT.system },
         {
@@ -105,22 +125,18 @@ ${list}`,
       {
         op: `retrieval.${RETRIEVAL_FILTER_PROMPT.id}.v${RETRIEVAL_FILTER_PROMPT.version}`,
         temperature: 0,
-        // Short output — we only need an array of integers. Keep the
-        // token cap tight so a misbehaving model can't blow budgets.
-        maxTokens: 120,
+        // Short output — indices + one bool. Kept tight so a misbehaving
+        // model can't blow budgets.
+        maxTokens: 160,
         malformedRetries: 1,
       },
     );
     const raw = (rsp.value?.selected ?? []) as unknown;
+    const sufficient = coerceBool(rsp.value?.sufficient);
     if (!Array.isArray(raw)) {
-      deps.log.debug("llm_filter.malformed", {
-        got: typeof raw,
-      });
-      // Same fallback policy as throw — we'd rather lean conservative
-      // than dump the whole ranked list into the prompt.
+      deps.log.debug("llm_filter.malformed", { got: typeof raw });
       return safeCutoff(ranked, deps);
     }
-    // Convert 1-based indices → 0-based, drop duplicates and out-of-range.
     const keepIndices = new Set<number>();
     for (const v of raw) {
       const n = typeof v === "number" ? v : Number(v);
@@ -131,14 +147,14 @@ ${list}`,
       if (keepIndices.size >= deps.config.llmFilterMaxKeep) break;
     }
     if (keepIndices.size === 0) {
-      // Model asked us to drop everything — we honour it even when the
-      // ranked list was non-empty. Surface this explicitly so the Logs
-      // page can show "LLM found nothing relevant" instead of silently
-      // injecting a partial packet.
+      // Model asked us to drop everything — honoured. Surface this
+      // explicitly so the Logs page can show "LLM found nothing
+      // relevant" instead of silently injecting a partial packet.
       return {
         kept: [],
         dropped: [...ranked],
         outcome: "llm_filtered",
+        sufficient: sufficient ?? false,
       };
     }
     const kept: RankedCandidate[] = [];
@@ -151,6 +167,7 @@ ${list}`,
       dropped,
       outcome:
         kept.length === ranked.length ? "llm_kept_all" : "llm_filtered",
+      sufficient,
     };
   } catch (err) {
     deps.log.warn("llm_filter.failed", {
@@ -165,32 +182,39 @@ function passthrough(
   ranked: readonly RankedCandidate[],
   outcome: FilterResult["outcome"],
 ): FilterResult {
-  return { kept: [...ranked], dropped: [], outcome };
+  return { kept: [...ranked], dropped: [], outcome, sufficient: null };
 }
 
 /**
  * Mechanical fail-closed: when the LLM is unavailable / errored,
  * apply a relative-relevance cutoff so we don't dump the entire ranked
  * list into the prompt. Keeps:
- *   1. items whose score ≥ `topScore · relativeThresholdFloor`
+ *   1. items whose score ≥ `topScore · 0.7`
  *   2. capped at `llmFilterMaxKeep` so the prompt stays small.
  *
- * The ranker already applied an initial cutoff with the same floor,
- * but the LLM is expected to prune further (because cosine + RRF still
- * over-includes); this fallback uses a slightly tighter ratio so the
- * "fail" path doesn't ship as much noise as the success path.
+ * The ranker already applied an initial cutoff with the same family of
+ * floors, but the LLM is expected to prune further (because the
+ * ranker is tuned for recall). This fallback uses a slightly tighter
+ * ratio so the "fail" path doesn't ship as much noise as the success
+ * path.
  */
 function safeCutoff(
   ranked: readonly RankedCandidate[],
   deps: FilterDeps,
 ): FilterResult {
   if (ranked.length === 0) {
-    return { kept: [], dropped: [], outcome: "llm_failed_safe_cutoff" };
+    return {
+      kept: [],
+      dropped: [],
+      outcome: "llm_failed_safe_cutoff",
+      sufficient: null,
+    };
   }
-  // Tighter than the ranker's relativeThresholdFloor — when LLM has
-  // failed, lean conservative.
   const ratio = 0.7;
-  const topScore = ranked.reduce((m, c) => Math.max(m, c.score ?? c.relevance), 0);
+  const topScore = ranked.reduce(
+    (m, c) => Math.max(m, c.score ?? c.relevance),
+    0,
+  );
   const cutoff = topScore > 0 ? topScore * ratio : 0;
   const keepCap = Math.max(1, deps.config.llmFilterMaxKeep);
   const kept: RankedCandidate[] = [];
@@ -201,29 +225,52 @@ function safeCutoff(
     else dropped.push(c);
   }
   // If the cutoff would have dropped everything, keep the single best
-  // candidate so the agent at least sees one option. Better than 0.
+  // candidate so the agent at least sees one option.
   if (kept.length === 0 && ranked.length > 0) {
     kept.push(ranked[0]!);
     dropped.shift();
   }
-  return { kept, dropped, outcome: "llm_failed_safe_cutoff" };
+  return {
+    kept,
+    dropped,
+    outcome: "llm_failed_safe_cutoff",
+    sufficient: null,
+  };
+}
+
+function coerceBool(v: unknown): boolean | null {
+  if (typeof v === "boolean") return v;
+  if (v === "true" || v === "yes" || v === 1) return true;
+  if (v === "false" || v === "no" || v === 0) return false;
+  return null;
 }
 
-function describeCandidate(r: RankedCandidate): string {
+/**
+ * Render a ranked candidate into a single labelled string for the LLM.
+ * Much richer than the old 240-char summary — now includes time, role,
+ * tags, which channels surfaced the row, and the ranker's score. This
+ * mirrors what openclaw's `filterRelevant` receives and lets the model
+ * reason over "fresh vs stale", "skill vs memory", "keyword vs vector
+ * hit" without guessing.
+ */
+function describeCandidate(r: RankedCandidate, bodyChars: number): string {
   const c = r.candidate;
+  const meta = metaOf(r, c);
   switch (c.tier) {
     case "tier1": {
       const skill = c as {
         skillName?: string;
         invocationGuide?: string;
         eta?: number;
+        status?: string;
       };
-      const name = skill.skillName ?? "(skill)";
-      const hint = (skill.invocationGuide ?? "")
-        .replace(/\s+/g, " ")
-        .trim()
-        .slice(0, MAX_CANDIDATE_CONTENT_CHARS);
-      return `[SKILL] ${name} — ${hint}`;
+      const head = `${skill.skillName ?? "(skill)"}${
+        typeof skill.eta === "number"
+          ? ` · η=${skill.eta.toFixed(2)}`
+          : ""
+      }${skill.status ? ` · ${skill.status}` : ""}`;
+      const hint = squashBody(skill.invocationGuide ?? "", bodyChars);
+      return `[SKILL ${meta}] ${head}${hint ? `\n   ${hint}` : ""}`;
     }
     case "tier2": {
       if (c.refKind === "trace") {
@@ -233,29 +280,62 @@ function describeCandidate(r: RankedCandidate): string {
           agentText?: string;
           reflection?: string | null;
         };
-        const body = (tr.summary || tr.userText || tr.agentText || "")
-          .replace(/\s+/g, " ")
-          .trim()
-          .slice(0, MAX_CANDIDATE_CONTENT_CHARS);
-        return `[TRACE] ${body}`;
+        const parts: string[] = [];
+        if (tr.summary?.trim()) parts.push(tr.summary.trim());
+        if (tr.userText?.trim()) parts.push(`[user] ${tr.userText.trim()}`);
+        if (tr.agentText?.trim())
+          parts.push(`[assistant] ${tr.agentText.trim()}`);
+        if (tr.reflection?.trim())
+          parts.push(`[note] ${tr.reflection.trim()}`);
+        const body = squashBody(parts.join(" "), bodyChars);
+        return `[TRACE ${meta}] ${body}`;
       }
       const ep = c as { summary?: string };
-      const body = (ep.summary ?? "")
-        .replace(/\s+/g, " ")
-        .trim()
-        .slice(0, MAX_CANDIDATE_CONTENT_CHARS);
-      return `[EPISODE] ${body}`;
+      const body = squashBody(ep.summary ?? "", bodyChars);
+      return `[EPISODE ${meta}] ${body}`;
     }
     case "tier3": {
       const wm = c as { title?: string; body?: string };
       const head = wm.title ?? "(world-model)";
-      const hint = (wm.body ?? "")
-        .replace(/\s+/g, " ")
-        .trim()
-        .slice(0, MAX_CANDIDATE_CONTENT_CHARS);
-      return `[WORLD-MODEL] ${head} — ${hint}`;
+      const body = squashBody(wm.body ?? "", bodyChars);
+      return `[WORLD-MODEL ${meta}] ${head}${body ? `\n   ${body}` : ""}`;
     }
     default:
-      return "[UNKNOWN]";
+      return `[UNKNOWN ${meta}]`;
+  }
+}
+
+function metaOf(r: RankedCandidate, c: TierCandidate): string {
+  const bits: string[] = [];
+  if (typeof c.ts === "number" && c.ts > 0) {
+    bits.push(`time=${formatTime(c.ts)}`);
+  }
+  if (Array.isArray((c as { tags?: readonly string[] }).tags)) {
+    const tags = ((c as { tags?: readonly string[] }).tags ?? [])
+      .filter(Boolean)
+      .slice(0, 6);
+    if (tags.length) bits.push(`tags=[${tags.join(",")}]`);
+  }
+  const channels = (c.channels ?? [])
+    .map((ch) => ch.channel)
+    .filter(Boolean)
+    .slice(0, 4);
+  if (channels.length) bits.push(`via=${channels.join("+")}`);
+  const score = r.score ?? r.relevance;
+  if (Number.isFinite(score)) bits.push(`score=${score.toFixed(3)}`);
+  return bits.join(" ");
+}
+
+function squashBody(s: string, max: number): string {
+  const cleaned = s.replace(/\s+/g, " ").trim();
+  if (cleaned.length <= max) return cleaned;
+  return cleaned.slice(0, Math.max(0, max - 1)) + "…";
+}
+
+function formatTime(ts: number): string {
+  try {
+    return new Date(ts).toISOString().slice(0, 16).replace("T", " ");
+  } catch {
+    return String(ts);
   }
 }
diff --git a/apps/memos-local-plugin/core/retrieval/ranker.ts b/apps/memos-local-plugin/core/retrieval/ranker.ts
index 083d10fe1..453fdbeb1 100644
--- a/apps/memos-local-plugin/core/retrieval/ranker.ts
+++ b/apps/memos-local-plugin/core/retrieval/ranker.ts
@@ -1,28 +1,35 @@
 /**
  * Ranker — fuses candidates across tiers and enforces diversity.
  *
- * Three passes:
+ * Design (2026 overhaul, aligned with `memos-local-openclaw::recall/engine`):
  *
- *   1. **Per-channel RRF.** Each `RankedCandidate` carries one
- *      `ChannelRank` per channel that contributed it (vec_summary,
- *      vec_action, fts, pattern, structural). The fused score is
- *      `Σ 1 / (k + rank_i + 1)` over those ranks. A row that surfaces
- *      in 3 channels gets a much bigger lift than a vector-only hit.
- *      This is what plugs the "single-channel false positive" hole.
+ *   1. **Base = best channel score.** A candidate's base evidence is the
+ *      strongest single-channel hit it has — cosine for vector, `1/(rank+1)`
+ *      for FTS / pattern, `0.9` synthetic for structural error-signature.
+ *      This puts all channels on a comparable (0, 1] footing without the
+ *      "cosine=0 for keyword hits" trap the old formula had.
  *
- *   2. **Relative threshold drop.** After computing per-candidate
- *      `relevance`, drop everyone whose `relevance < topRelevance ·
- *      relativeThresholdFloor`. Adaptive: a strong query (top score 0.9)
- *      keeps only items ≥ 0.36; a weak query (top 0.4) keeps items ≥ 0.16.
+ *   2. **RRF bonus across channels.** Multi-channel matches add
+ *      `rrfWeight · Σ 1/(k + rank_i + 1)`. A row confirmed by 2+ channels
+ *      gets a clear lift over single-channel false-positives.
  *
- *   3. **MMR with smart per-tier seed.** Seed at most one candidate per
- *      non-empty tier (so a packet is never a single-tier monoculture)
- *      — but only seed a tier if its best candidate clears the relative
- *      threshold. This kills the "irrelevant skill / world-model gets
- *      force-injected" failure mode.
+ *   3. **Tier-specific additive boosts.** V·decay (Tier-2) and η
+ *      (Tier-1) are add-ons that differentiate rows *within* the same
+ *      base-score band — not a dominant term that washes out the RRF
+ *      signal.
  *
- * This module is pure and framework-agnostic — no storage, no embedder,
- * no side effects. Unit testable by passing in plain arrays.
+ *   4. **Multi-channel bypass.** Any candidate surfaced by ≥ 2 channels
+ *      is exempt from the relative-threshold drop (it can still lose in
+ *      MMR on redundancy). This is the backstop that guarantees a
+ *      keyword-only hit confirmed by vector can never be silently
+ *      dropped because a noisy topRelevance dragged the floor up.
+ *
+ *   5. **Smart-seed MMR.** Phase A seeds at most one candidate per tier,
+ *      and only if its relevance is within `smartSeedRatio` of the pool
+ *      top. Prevents "force-inject an irrelevant Tier-1 / Tier-3 just
+ *      because the tier had a candidate".
+ *
+ * The module stays pure — no storage, no embedder, no side effects.
  */
 
 import { cosinePrenormed, norm2 } from "../storage/vector.js";
@@ -31,6 +38,7 @@ import { priorityFor } from "../reward/backprop.js";
 import type {
   ChannelRank,
   EpisodeCandidate,
+  RetrievalChannel,
   RetrievalConfig,
   SkillCandidate,
   TierCandidate,
@@ -53,18 +61,20 @@ export interface RankerInput {
 export interface RankedCandidate {
   candidate: TierCandidate;
   /**
-   * Base relevance used by MMR. Blends:
-   *   - cosine + priority (vector-aware tiers)
-   *   - small η nudge for Tier-1
-   *   - per-channel RRF lift (so multi-channel matches surface)
+   * Base relevance used by MMR.
+   *   relevance = bestChannelScore + rrfWeight · Σ 1/(k+rank+1)
+   *             + priorityBoost (tier2)  + etaBoost (tier1)
    */
   relevance: number;
-  /** Fused RRF score across channels. */
+  /** Fused RRF score across channels (pre-weighting). */
   rrf: number;
   /** Final MMR-adjusted score. */
   score: number;
   /** `||vec||²`, cached for MMR. `null` means "no vec → treat as fully diverse". */
   normSq: number | null;
+  /** True when this candidate was allowed past the threshold via the
+   *  multi-channel bypass (useful for logs / "why did this survive?"). */
+  bypassedThreshold?: boolean;
 }
 
 export interface RankerResult {
@@ -77,10 +87,23 @@ export interface RankerResult {
   topRelevance: number;
   /** Number of candidates the relative-threshold cut. */
   droppedByThreshold: number;
+  /** Absolute floor applied (`topRelevance · floor`). */
+  thresholdFloor: number;
+  /** Channel hit counts aggregated across all candidates. */
+  channelHits: Partial<Record<RetrievalChannel, number>>;
 }
 
-const DEFAULT_RELATIVE_THRESHOLD = 0.4;
+const DEFAULT_RELATIVE_THRESHOLD = 0.2;
+const DEFAULT_SMART_SEED_RATIO = 0.7;
 const DEFAULT_SKILL_ETA_BLEND = 0.15;
+/**
+ * How much each channel's RRF contribution is scaled by in the base
+ * relevance formula. Kept small so that "best-channel-score" dominates
+ * per-candidate but multi-channel agreement still gets a clear lift.
+ */
+const RRF_WEIGHT = 0.4;
+/** Default priority blend — V·decay contributes this much at V=1. */
+const DEFAULT_PRIORITY_BLEND = 0.3;
 
 export function rank(input: RankerInput): RankerResult {
   const tierSizes: Record<TierKind, number> = {
@@ -89,6 +112,7 @@ export function rank(input: RankerInput): RankerResult {
     tier3: input.tier3.length,
   };
   const kept: Record<TierKind, number> = { tier1: 0, tier2: 0, tier3: 0 };
+  const channelHits: Partial<Record<RetrievalChannel, number>> = {};
 
   // ─── 1. Bag every candidate with relevance + RRF ──────────────────────────
   const bag: RankedCandidate[] = [];
@@ -97,6 +121,13 @@ export function rank(input: RankerInput): RankerResult {
   pushAll(bag, input.tier2Episodes, (c) => relevanceFor(c, input));
   pushAll(bag, input.tier3, (c) => relevanceFor(c, input));
 
+  // Tally channel hits for observability.
+  for (const c of bag) {
+    for (const ch of c.candidate.channels ?? []) {
+      channelHits[ch.channel] = (channelHits[ch.channel] ?? 0) + 1;
+    }
+  }
+
   if (bag.length === 0) {
     return {
       ranked: [],
@@ -104,24 +135,44 @@ export function rank(input: RankerInput): RankerResult {
       kept,
       topRelevance: 0,
       droppedByThreshold: 0,
+      thresholdFloor: 0,
+      channelHits,
     };
   }
 
   assignChannelRrf(bag, input.config.rrfConstant);
-  // Fold the channel-RRF into relevance so MMR + threshold both honour it.
-  for (const c of bag) c.relevance += c.rrf;
+  for (const c of bag) c.relevance += RRF_WEIGHT * c.rrf;
 
-  // ─── 2. Relative threshold cut ────────────────────────────────────────────
+  // ─── 2. Relative threshold cut (with multi-channel bypass) ────────────────
   const topRelevance = bag.reduce((m, c) => Math.max(m, c.relevance), 0);
   const floorRatio =
     input.config.relativeThresholdFloor ?? DEFAULT_RELATIVE_THRESHOLD;
   const cutoff = topRelevance > 0 ? topRelevance * floorRatio : 0;
-  const droppedByThreshold = bag.filter((c) => c.relevance < cutoff).length;
-  const survivors =
-    cutoff > 0 ? bag.filter((c) => c.relevance >= cutoff) : [...bag];
+  const bypassEnabled = input.config.multiChannelBypass !== false;
+
+  let droppedByThreshold = 0;
+  const survivors: RankedCandidate[] = [];
+  for (const c of bag) {
+    const channels = c.candidate.channels ?? [];
+    const multiChannel = bypassEnabled && channels.length >= 2;
+    if (multiChannel) c.bypassedThreshold = true;
+    if (cutoff > 0 && c.relevance < cutoff && !multiChannel) {
+      droppedByThreshold += 1;
+      continue;
+    }
+    survivors.push(c);
+  }
 
   if (survivors.length === 0) {
-    return { ranked: [], tierSizes, kept, topRelevance, droppedByThreshold };
+    return {
+      ranked: [],
+      tierSizes,
+      kept,
+      topRelevance,
+      droppedByThreshold,
+      thresholdFloor: cutoff,
+      channelHits,
+    };
   }
 
   // ─── 3. MMR-style greedy pick ─────────────────────────────────────────────
@@ -132,13 +183,18 @@ export function rank(input: RankerInput): RankerResult {
   const pool = [...survivors];
   const limit = Math.min(input.limit, survivors.length);
   const smartSeed = input.config.smartSeed !== false;
-  // Smart-seed cutoff: only seed a tier if its best candidate beats this.
-  // Falls back to plain `cutoff` so we never seed an item we'd otherwise
-  // drop. Setting `smartSeed = false` reverts to the legacy "seed best
-  // of every non-empty tier".
-  const seedCutoff = smartSeed ? cutoff : 0;
+  const seedRatio = smartSeed
+    ? input.config.smartSeedRatio ?? DEFAULT_SMART_SEED_RATIO
+    : 0;
+  const poolTop = pool.reduce((m, c) => Math.max(m, c.relevance), 0);
+  const seedCutoff = smartSeed ? poolTop * seedRatio : 0;
 
   // Phase A — seeded picks per tier (preserves cross-tier diversity).
+  // V7 §2.6: each tier answers a different question — we keep at most
+  // one seed per tier so a packet is never a monoculture, but we only
+  // seed if the tier's best candidate is within `smartSeedRatio` of the
+  // pool top. Irrelevant Tier-1 / Tier-3 candidates no longer slip in
+  // just because the tier was non-empty.
   const seedTiers: TierKind[] = ["tier1", "tier2", "tier3"];
   for (const tk of seedTiers) {
     if (out.length >= limit) break;
@@ -185,35 +241,100 @@ export function rank(input: RankerInput): RankerResult {
   // Sort the final list by score desc (MMR scores are not guaranteed
   // monotone during the loop because Phase A seeds get their raw relevance).
   out.sort((a, b) => b.score - a.score || b.rrf - a.rrf);
-  return { ranked: out, tierSizes, kept, topRelevance, droppedByThreshold };
+  return {
+    ranked: out,
+    tierSizes,
+    kept,
+    topRelevance,
+    droppedByThreshold,
+    thresholdFloor: cutoff,
+    channelHits,
+  };
 }
 
 // ─── Helpers ────────────────────────────────────────────────────────────────
 
+/**
+ * Per-candidate base relevance. New design:
+ *
+ *   relevance = bestChannelScore
+ *             + priorityBlend · priorityForLive         (trace / episode)
+ *             + skillEtaBlend · η                       (skill)
+ *
+ * RRF across channels is added *after* this function runs (so we have
+ * access to `rrfConstant`). We start from `bestChannelScore` — which for
+ * vec hits is cosine, for fts/pattern is `1/(rank+1)`, for structural is
+ * the synthetic 0.9 — meaning an exact keyword hit at rank 0 starts at
+ * the same base (1.0) as a cosine-1.0 hit. Without this, pure-keyword
+ * hits with cosine=0 would score essentially zero and get guillotined
+ * by the relative threshold.
+ */
 function relevanceFor(c: TierCandidate, input: RankerInput): number {
-  const cosW = input.config.weightCosine;
-  const priW = input.config.weightPriority;
-  const cos = clamp(c.cosine, -1, 1);
+  const base = bestChannelScore(c);
 
   if (c.tier === "tier1") {
     const sk = c as SkillCandidate;
-    const etaBlend =
-      input.config.skillEtaBlend ?? DEFAULT_SKILL_ETA_BLEND;
-    // Cosine still dominates; η is a small reliability nudge.
-    return cosW * cos + etaBlend * clamp(sk.eta, 0, 1);
+    const etaBlend = input.config.skillEtaBlend ?? DEFAULT_SKILL_ETA_BLEND;
+    return base + etaBlend * clamp(sk.eta, 0, 1);
   }
   if (c.refKind === "trace") {
     const tc = c as TraceCandidate;
-    const live = priorityFor(tc.value, tc.ts, input.config.decayHalfLifeDays, input.now);
-    return cosW * cos + priW * live;
+    const live = priorityFor(
+      tc.value,
+      tc.ts,
+      input.config.decayHalfLifeDays,
+      input.now,
+    );
+    const blend = priorityBlendFor(input.config);
+    return base + blend * live;
   }
   if (c.refKind === "episode") {
     const ep = c as EpisodeCandidate;
-    const live = priorityFor(ep.maxValue, ep.ts, input.config.decayHalfLifeDays, input.now);
-    return cosW * cos + priW * live;
+    const live = priorityFor(
+      ep.maxValue,
+      ep.ts,
+      input.config.decayHalfLifeDays,
+      input.now,
+    );
+    const blend = priorityBlendFor(input.config);
+    return base + blend * live;
+  }
+  // Tier 3 world-model — no V signal; rely on base + RRF.
+  return base;
+}
+
+/**
+ * `weightPriority` is kept in config for backwards-compat, but the new
+ * default-semantics is: "how much priority lifts relevance at V=1".
+ * Historically this was used as a linear weight on a `cos + priority`
+ * blend where `cos` was already in 0~1; now `base` already carries a
+ * 0~1 signal so we scale priority to a non-dominating floor (default
+ * 0.3). Configs that explicitly set `weightPriority` higher than that
+ * still work — their intent "priority matters more" is preserved.
+ */
+function priorityBlendFor(config: RetrievalConfig): number {
+  const w = config.weightPriority;
+  if (w == null || w <= 0) return 0;
+  // Cap the effective blend so priority can't single-handedly push a
+  // V=1 trace above a channel-confirmed keyword hit — priority is a
+  // tie-breaker, not a dominant term.
+  return Math.min(w, DEFAULT_PRIORITY_BLEND);
+}
+
+function bestChannelScore(c: TierCandidate): number {
+  const channels = c.channels ?? [];
+  if (channels.length === 0) {
+    // Legacy path — callers that build candidates without `channels`
+    // (unit tests, older fixtures) fall back to the raw cosine.
+    return clamp(c.cosine, 0, 1);
+  }
+  let best = 0;
+  for (const ch of channels) {
+    if (ch.score > best) best = ch.score;
   }
-  // Tier 3 — cosine only; world-models have no V.
-  return cosW * cos;
+  // If the candidate also carries a cosine (e.g. structural bumped),
+  // honour it as a floor — structural hits set cosine=0.9 synthetically.
+  return Math.max(best, clamp(c.cosine, 0, 1));
 }
 
 function pushAll<C extends TierCandidate>(
diff --git a/apps/memos-local-plugin/core/retrieval/retrieve.ts b/apps/memos-local-plugin/core/retrieval/retrieve.ts
index 2377359f9..f621d7209 100644
--- a/apps/memos-local-plugin/core/retrieval/retrieve.ts
+++ b/apps/memos-local-plugin/core/retrieval/retrieve.ts
@@ -264,6 +264,8 @@ async function runAll(
     const tier3LatencyMs = plan.wantTier3 ? Date.now() - tier3Start : 0;
 
     const fuseStart = Date.now();
+    const rawCandidateCount =
+      tier1.length + tier2.traces.length + tier2.episodes.length + tier3.length;
     const ranked = rank({
       tier1,
       tier2Traces: tier2.traces,
@@ -281,7 +283,8 @@ async function runAll(
     // items that share surface keywords with the query but aren't
     // actually relevant. Fails open — on any error we keep the
     // mechanical ranking.
-    const queryText = (ctx as { userText?: string }).userText ?? compiled.text ?? "";
+    const queryText =
+      (ctx as { userText?: string }).userText ?? compiled.text ?? "";
     const filtered = await llmFilterCandidates(
       { query: queryText, ranked: ranked.ranked },
       {
@@ -292,9 +295,15 @@ async function runAll(
     );
     log.debug("llm_filter.done", {
       outcome: filtered.outcome,
-      before: ranked.ranked.length,
+      sufficient: filtered.sufficient,
+      raw: rawCandidateCount,
+      afterThreshold: ranked.ranked.length,
+      droppedByThreshold: ranked.droppedByThreshold,
+      thresholdFloor: round(ranked.thresholdFloor, 3),
+      topRelevance: round(ranked.topRelevance, 3),
       kept: filtered.kept.length,
       dropped: filtered.dropped.length,
+      channels: ranked.channelHits,
     });
 
     const { packet } = toPacket({
@@ -342,6 +351,16 @@ async function runAll(
       queryTokens: approxTokens(compiled.text),
       queryTags: compiled.tags,
       emptyPacket: packet.snippets.length === 0,
+      rawCandidateCount,
+      droppedByThresholdCount: ranked.droppedByThreshold,
+      thresholdFloor: ranked.thresholdFloor,
+      topRelevance: ranked.topRelevance,
+      rankedCount: ranked.ranked.length,
+      llmFilterOutcome: filtered.outcome,
+      llmFilterSufficient: filtered.sufficient ?? undefined,
+      llmFilterKept: filtered.kept.length,
+      llmFilterDropped: filtered.dropped.length,
+      channelHits: ranked.channelHits,
     };
 
     log.info("done", {
@@ -432,6 +451,12 @@ function approxTokens(s: string): number {
   return Math.ceil(s.length / 4);
 }
 
+function round(n: number, d: number): number {
+  if (!Number.isFinite(n)) return n;
+  const f = 10 ** d;
+  return Math.round(n * f) / f;
+}
+
 /** Thin façade so pipelines can `new Retriever(deps)` if they prefer OO. */
 export class Retriever {
   constructor(private readonly deps: RetrievalDeps) {}
diff --git a/apps/memos-local-plugin/core/retrieval/types.ts b/apps/memos-local-plugin/core/retrieval/types.ts
index 2b72c697a..5f68fb3ea 100644
--- a/apps/memos-local-plugin/core/retrieval/types.ts
+++ b/apps/memos-local-plugin/core/retrieval/types.ts
@@ -213,9 +213,26 @@ export interface RetrievalConfig {
   skillEtaBlend?: number;
   /**
    * Smart MMR seeding — only seed a tier if its best candidate clears
-   * `topRelevance · relativeThresholdFloor`. Default true.
+   * `topRelevance · smartSeedRatio` (see below). Default true.
+   * `smartSeed: false` restores the legacy "seed best of every non-empty
+   * tier" behaviour regardless of relevance.
    */
   smartSeed?: boolean;
+  /**
+   * When `smartSeed` is on, only seed a tier whose best candidate's
+   * relevance is ≥ `poolTopRelevance · smartSeedRatio`. Default 0.7.
+   * Independent of `relativeThresholdFloor` so the seed gate can be
+   * stricter than the generic drop floor.
+   */
+  smartSeedRatio?: number;
+  /**
+   * If a candidate is surfaced by ≥ 2 channels, bypass the relative
+   * threshold (it still participates in MMR). This compensates for
+   * the ranker's base formula being "max channel score + additive
+   * boosts" — a two-channel agreement is a strong signal even when
+   * the absolute score falls below the drop floor. Default true.
+   */
+  multiChannelBypass?: boolean;
 
   /**
    * V7 §2.6 Tier-1 rendering mode.
@@ -254,8 +271,16 @@ export interface RetrievalConfig {
   llmFilterEnabled: boolean;
   /** Keep at most N candidates after the LLM filter. */
   llmFilterMaxKeep: number;
-  /** Skip the filter entirely when the ranked list has ≤ this many items. */
+  /** Skip the filter entirely when the ranked list has fewer than this many items. */
   llmFilterMinCandidates: number;
+  /**
+   * Max chars of body text to show the LLM filter for each candidate.
+   * Higher = more context for precise judgement, at the cost of more
+   * tokens per round-trip. Default 500 (openclaw uses 300 without
+   * tags/channels; we include richer metadata so a slightly bigger
+   * window pays for itself).
+   */
+  llmFilterCandidateBodyChars?: number;
 }
 
 /**
@@ -514,6 +539,44 @@ export interface RetrievalStats {
   queryTokens: number;
   queryTags: string[];
   emptyPacket: boolean;
+  /**
+   * Observability breakdown — populated so the Logs page (and
+   * api_logs) can show "how many candidates survived each stage" and
+   * operators can spot "this stage is the lossy one" at a glance.
+   * All fields are optional so legacy callers / older RetrievalStats
+   * consumers keep compiling.
+   */
+  rawCandidateCount?: number;
+  droppedByThresholdCount?: number;
+  thresholdFloor?: number;
+  topRelevance?: number;
+  rankedCount?: number;
+  llmFilterOutcome?:
+    | "disabled"
+    | "no_llm"
+    | "below_threshold"
+    | "empty_query"
+    | "llm_kept_all"
+    | "llm_filtered"
+    | "llm_failed_safe_cutoff";
+  llmFilterSufficient?: boolean;
+  llmFilterKept?: number;
+  llmFilterDropped?: number;
+  /**
+   * Channel hit counts across all tiers, e.g.
+   * `{ vec_summary: 12, fts: 7, pattern: 3, structural: 0 }`. Helps
+   * identify queries that got hits only through one channel (likely
+   * fragile).
+   */
+  channelHits?: Partial<Record<
+    | "vec_summary"
+    | "vec_action"
+    | "vec"
+    | "fts"
+    | "pattern"
+    | "structural",
+    number
+  >>;
 }
 
 /** Discriminated context union — one per entry point in `retrieve.ts`. */
diff --git a/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts b/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts
index 267d56899..94298d7be 100644
--- a/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts
+++ b/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts
@@ -180,6 +180,41 @@ describe("flattenMessages", () => {
     expect(flat[3].content).toBe("file.txt");
   });
 
+  it("does NOT double-emit tool calls when content[] and top-level tool_calls coexist (pi-ai + OpenAI bundle)", () => {
+    // Regression for the "tool call rows duplicated 2x" bug. OpenAI
+    // messages plumbed through pi-ai carry the canonical pi-ai
+    // `content[{type:"toolCall"}]` shape AND the legacy OpenAI
+    // `tool_calls` top-level array. Pre-fix, flattenMessages emitted
+    // BOTH, which made extractTurn's `pendingCalls.set(key, …)`
+    // overwrite the first stub (with its `thinkingBefore`) with an
+    // empty second stub — so `thinkingBefore` silently went missing
+    // AND the trace ended up with 2× rows per tool.
+    const flat = flattenMessages([
+      { role: "user", content: "deploy" },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "running" },
+          { type: "toolCall", id: "call_X", name: "sh", arguments: { cmd: "deploy" } },
+        ],
+        tool_calls: [
+          {
+            id: "call_X",
+            function: { name: "sh", arguments: JSON.stringify({ cmd: "deploy" }) },
+          },
+        ],
+      },
+    ]);
+    const toolCallEntries = flat.filter((m) => m.role === "tool_call");
+    expect(toolCallEntries).toHaveLength(1);
+    expect(toolCallEntries[0].toolName).toBe("sh");
+    expect(toolCallEntries[0].toolCallId).toBe("call_X");
+    // Ensure the assistant text emitted for the SAME message is
+    // preserved — it's the `thinkingBefore` source for this call.
+    const assistantText = flat.find((m) => m.role === "assistant");
+    expect(assistantText?.content).toBe("running");
+  });
+
   it("does NOT coerce unknown roles into 'user' (the bug that captured tool stdout as user input)", () => {
     const flat = flattenMessages([
       { role: "user", content: "real user input" },
@@ -257,18 +292,15 @@ describe("extractTurn", () => {
     const turn = extractTurn(flat, 1_700_000_000_000);
     expect(turn).not.toBeNull();
     expect(turn!.userText).toBe("how many files?");
-    expect(turn!.agentText).toContain("2 files");
+    expect(turn!.agentText).toBe("2 files");
     expect(turn!.toolCalls).toHaveLength(1);
     expect(turn!.toolCalls[0].name).toBe("sh");
     expect(turn!.toolCalls[0].input).toEqual({ cmd: "ls" });
     expect(turn!.toolCalls[0].output).toContain("a.txt");
+    expect(turn!.toolCalls[0].thinkingBefore).toBe("running ls");
   });
 
   it("captures sysctl-style exec invocation: tool stdout lands in tool output, NOT in userText", () => {
-    // Regression for the user's bug: an exec tool with a complex
-    // command + multi-line stdout used to be parsed as a fresh user
-    // turn whose content was the stdout. Lock down that pi-ai's
-    // toolResult shape now keeps the boundaries straight.
     const flat = flattenMessages([
       { role: "user", content: "帮我看下当前运行的系统是几个核心多少内存" },
       {
@@ -304,12 +336,11 @@ describe("extractTurn", () => {
     expect(turn!.userText).toBe("帮我看下当前运行的系统是几个核心多少内存");
     expect(turn!.userText).not.toContain("17179869184");
     expect(turn!.userText).not.toContain("Hardware:");
-    // Both assistant texts are kept in chronological order — the
-    // lead-in ("I'll check the system.") and the final answer
-    // ("10 核 / 16 GB"). What we explicitly forbid is tool stdout
-    // leaking back into agentText.
-    expect(turn!.agentText).toContain("I'll check the system.");
-    expect(turn!.agentText).toContain("10 核 / 16 GB");
+    // "I'll check the system." is the model's pre-tool reasoning and
+    // is captured in the tool's thinkingBefore. The final reply after
+    // the tool result is agentText.
+    expect(turn!.toolCalls[0].thinkingBefore).toBe("I'll check the system.");
+    expect(turn!.agentText).toBe("10 核 / 16 GB");
     expect(turn!.agentText).not.toContain("17179869184");
     expect(turn!.agentText).not.toContain("Hardware:");
     expect(turn!.toolCalls).toHaveLength(1);
@@ -338,6 +369,173 @@ describe("extractTurn", () => {
     expect(turn!.agentThinking).toBe("Let me read the issue first.");
   });
 
+  it("assigns interleaved thinking to each tool call's thinkingBefore", () => {
+    // OpenClaw's PI agent alternates: think → tool → result → think → tool.
+    // Both thinking blocks and regular text before a tool call are
+    // captured in thinkingBefore.
+    const flat = flattenMessages([
+      { role: "user", content: "fix the build" },
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "Let me check the error log first." },
+          { type: "text", text: "checking" },
+          { type: "toolCall", id: "c1", name: "sh", arguments: { cmd: "cat error.log" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "c1",
+        toolName: "sh",
+        content: "pg_config not found",
+        isError: false,
+      },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "The error says pg_config is missing. I need to install libpq-dev.",
+          },
+          { type: "toolCall", id: "c2", name: "sh", arguments: { cmd: "apt-get install libpq-dev" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "c2",
+        toolName: "sh",
+        content: "ok",
+        isError: false,
+      },
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "Good, now let me retry the build." },
+          { type: "toolCall", id: "c3", name: "sh", arguments: { cmd: "make build" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "c3",
+        toolName: "sh",
+        content: "BUILD SUCCESSFUL",
+        isError: false,
+      },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "Fixed — the build passes now." }],
+      },
+    ]);
+    const turn = extractTurn(flat, 0);
+    expect(turn).not.toBeNull();
+    expect(turn!.toolCalls).toHaveLength(3);
+    // First tool: thinking + text merged into thinkingBefore
+    expect(turn!.toolCalls[0].thinkingBefore).toBe(
+      "Let me check the error log first.\n\nchecking",
+    );
+    expect(turn!.toolCalls[1].thinkingBefore).toBe(
+      "The error says pg_config is missing. I need to install libpq-dev.",
+    );
+    expect(turn!.toolCalls[2].thinkingBefore).toBe("Good, now let me retry the build.");
+    // All thinking was flushed into tool calls; none left over
+    expect(turn!.agentThinking).toBeUndefined();
+    expect(turn!.agentText).toBe("Fixed — the build passes now.");
+  });
+
+  it("tool call has no thinkingBefore when model goes directly to the tool", () => {
+    const flat = flattenMessages([
+      { role: "user", content: "list files" },
+      {
+        role: "assistant",
+        content: [
+          { type: "toolCall", id: "c1", name: "sh", arguments: { cmd: "ls" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "c1",
+        toolName: "sh",
+        content: "a.txt",
+        isError: false,
+      },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "found a.txt" }],
+      },
+    ]);
+    const turn = extractTurn(flat, 0);
+    expect(turn!.toolCalls[0].thinkingBefore).toBeUndefined();
+    expect(turn!.agentText).toBe("found a.txt");
+  });
+
+  it("captures regular assistant text between tool calls as thinkingBefore (most models)", () => {
+    // Most models (non-Claude, or Claude without extended thinking)
+    // produce regular text between tool calls, not ThinkingContent.
+    // This text is the model's reasoning and must be captured.
+    const flat = flattenMessages([
+      { role: "user", content: "帮我查下当前系统有几个cpu有多少g内存" },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "Let me check the CPU count first." },
+          { type: "toolCall", id: "c1", name: "exec", arguments: { command: "sysctl -n hw.ncpu" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "c1",
+        toolName: "exec",
+        content: "10",
+        isError: false,
+      },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "OK, 10 CPUs. Now let me check the memory." },
+          { type: "toolCall", id: "c2", name: "exec", arguments: { command: "sysctl -n hw.memsize" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "c2",
+        toolName: "exec",
+        content: "17179869184",
+        isError: false,
+      },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "Now let me check disk space." },
+          { type: "toolCall", id: "c3", name: "exec", arguments: { command: "df -h /" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "c3",
+        toolName: "exec",
+        content: "/dev/disk1s1 466Gi 200Gi 266Gi 43% /",
+        isError: false,
+      },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "Your system has 10 CPUs, 16 GB RAM, and 266 GB free disk space." },
+        ],
+      },
+    ]);
+    const turn = extractTurn(flat, 0);
+    expect(turn).not.toBeNull();
+    expect(turn!.toolCalls).toHaveLength(3);
+    expect(turn!.toolCalls[0].thinkingBefore).toBe("Let me check the CPU count first.");
+    expect(turn!.toolCalls[1].thinkingBefore).toBe("OK, 10 CPUs. Now let me check the memory.");
+    expect(turn!.toolCalls[2].thinkingBefore).toBe("Now let me check disk space.");
+    expect(turn!.agentText).toBe(
+      "Your system has 10 CPUs, 16 GB RAM, and 266 GB free disk space.",
+    );
+    // No thinking blocks used, so agentThinking is empty
+    expect(turn!.agentThinking).toBeUndefined();
+  });
+
   it("falls back gracefully when assistant.toolCall has no matching toolResult", () => {
     const flat = flattenMessages([
       { role: "user", content: "do x" },
diff --git a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts
index b0676e911..73e6c2e77 100644
--- a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts
+++ b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts
@@ -148,6 +148,9 @@ function makeDeps(handle: TmpDbHandle): RetrievalDeps {
       minTraceSim: 0.3,
       tagFilter: "auto",
       decayHalfLifeDays: 30,
+      llmFilterEnabled: false,
+      llmFilterMaxKeep: 4,
+      llmFilterMinCandidates: 1,
     },
     now: () => NOW as never,
   };
diff --git a/apps/memos-local-plugin/tests/unit/retrieval/llm-filter.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/llm-filter.test.ts
index a8b05bee9..4097c5f06 100644
--- a/apps/memos-local-plugin/tests/unit/retrieval/llm-filter.test.ts
+++ b/apps/memos-local-plugin/tests/unit/retrieval/llm-filter.test.ts
@@ -9,16 +9,17 @@ import type {
 
 const cfg: Pick<
   RetrievalConfig,
-  "llmFilterEnabled" | "llmFilterMaxKeep" | "llmFilterMinCandidates"
+  | "llmFilterEnabled"
+  | "llmFilterMaxKeep"
+  | "llmFilterMinCandidates"
+  | "llmFilterCandidateBodyChars"
 > = {
   llmFilterEnabled: true,
   llmFilterMaxKeep: 4,
-  llmFilterMinCandidates: 2,
+  llmFilterMinCandidates: 1,
+  llmFilterCandidateBodyChars: 500,
 };
 
-// Minimal Logger stub — `llm-filter` only calls `.warn`, `.debug`, `.info`.
-// We use `as any` rather than implementing the full `Logger` interface,
-// since the missing methods are never invoked in this filter path.
 const log = {
   trace: vi.fn(),
   debug: vi.fn(),
@@ -33,18 +34,19 @@ function trace(id: string, score: number): RankedCandidate {
     refKind: "trace",
     refId: id as never,
     cosine: score,
-    ts: 1 as never,
+    ts: 1_700_000_000_000 as never,
     vec: null,
     value: 0.5 as never,
     priority: 0.5 as never,
     episodeId: "e1" as never,
     sessionId: "s1" as never,
     vecKind: "summary",
-    userText: "u",
-    agentText: "a",
-    summary: "summary text",
+    userText: `user ${id}`,
+    agentText: `agent ${id}`,
+    summary: `summary ${id}`,
     reflection: null,
-    tags: [],
+    tags: ["sample"],
+    channels: [{ channel: "vec_summary", rank: 0, score }],
   };
   return {
     candidate: cand,
@@ -56,28 +58,46 @@ function trace(id: string, score: number): RankedCandidate {
 }
 
 describe("retrieval/llm-filter", () => {
-  it("disabled → passthrough", async () => {
+  it("disabled → passthrough with null sufficient", async () => {
     const result = await llmFilterCandidates(
       { query: "anything", ranked: [trace("a", 0.9), trace("b", 0.5)] },
       { llm: null, log, config: { ...cfg, llmFilterEnabled: false } },
     );
     expect(result.outcome).toBe("disabled");
     expect(result.kept.length).toBe(2);
+    expect(result.sufficient).toBeNull();
   });
 
-  it("below threshold → passthrough", async () => {
+  it("below threshold → passthrough (minCandidates can lift the gate)", async () => {
     const result = await llmFilterCandidates(
       { query: "x", ranked: [trace("only", 0.9)] },
-      { llm: null, log, config: cfg },
+      { llm: null, log, config: { ...cfg, llmFilterMinCandidates: 5 } },
     );
     expect(result.outcome).toBe("below_threshold");
     expect(result.kept.length).toBe(1);
+    expect(result.sufficient).toBeNull();
+  });
+
+  it("single candidate → filter still runs at minCandidates=1 default", async () => {
+    const llm: any = {
+      completeJson: vi.fn().mockResolvedValue({
+        value: { selected: [1], sufficient: true },
+        servedBy: "fake",
+      }),
+    };
+    const result = await llmFilterCandidates(
+      { query: "q", ranked: [trace("solo", 0.9)] },
+      { llm, log, config: cfg },
+    );
+    expect(result.outcome).toBe("llm_kept_all");
+    expect(result.kept.map((r) => String(r.candidate.refId))).toEqual(["solo"]);
+    expect(result.sufficient).toBe(true);
   });
 
-  it("LLM returns selected indices → filters precisely", async () => {
+  it("LLM returns selected indices → filters precisely and surfaces sufficient", async () => {
     const llm: any = {
       completeJson: vi.fn().mockResolvedValue({
-        value: { selected: [1, 3] },
+        value: { selected: [1, 3], sufficient: false },
         servedBy: "fake",
       }),
     };
@@ -89,12 +109,13 @@ describe("retrieval/llm-filter", () => {
     expect(result.outcome).toBe("llm_filtered");
     expect(result.kept.map((r) => String(r.candidate.refId))).toEqual(["a", "c"]);
     expect(result.dropped.map((r) => String(r.candidate.refId))).toEqual(["b"]);
+    expect(result.sufficient).toBe(false);
   });
 
-  it("LLM returns empty selection → keeps nothing (drops the whole packet)", async () => {
+  it("LLM returns empty selection → drops everything and marks insufficient", async () => {
     const llm: any = {
       completeJson: vi.fn().mockResolvedValue({
-        value: { selected: [] },
+        value: { selected: [], sufficient: false },
         servedBy: "fake",
       }),
     };
@@ -106,6 +127,21 @@ describe("retrieval/llm-filter", () => {
     expect(result.outcome).toBe("llm_filtered");
     expect(result.kept.length).toBe(0);
     expect(result.dropped.length).toBe(2);
+    expect(result.sufficient).toBe(false);
+  });
+
+  it("coerces string / number `sufficient` fields sent by lax models", async () => {
+    const llm: any = {
+      completeJson: vi.fn().mockResolvedValue({
+        value: { selected: [1], sufficient: "yes" },
+        servedBy: "fake",
+      }),
+    };
+    const result = await llmFilterCandidates(
+      { query: "q", ranked: [trace("a", 0.9)] },
+      { llm, log, config: cfg },
+    );
+    expect(result.sufficient).toBe(true);
   });
 
   it("LLM throws → mechanical safe cutoff (NOT passthrough)", async () => {
@@ -115,16 +151,16 @@ describe("retrieval/llm-filter", () => {
     const ranked = [
       trace("strong", 0.9),
       trace("middle", 0.6),
-      trace("weak", 0.05), // far below 0.7·top → cut by safeCutoff
+      trace("weak", 0.05),
     ];
     const result = await llmFilterCandidates(
       { query: "q", ranked },
       { llm, log, config: cfg },
     );
     expect(result.outcome).toBe("llm_failed_safe_cutoff");
+    expect(result.sufficient).toBeNull();
     const ids = result.kept.map((r) => String(r.candidate.refId));
     expect(ids).toContain("strong");
-    // weak is far below the relative cutoff → dropped
     expect(ids).not.toContain("weak");
   });
 
@@ -132,16 +168,11 @@ describe("retrieval/llm-filter", () => {
     const llm: any = {
       completeJson: vi.fn().mockRejectedValue(new Error("boom")),
     };
-    const ranked = [trace("only", 0.05)];
-    // Below threshold gates the LLM call entirely, so this exercises
-    // the safeCutoff path indirectly by raising the cutoff via cfg
-    // override:
     const result = await llmFilterCandidates(
       { query: "q", ranked: [trace("a", 0.5), trace("b", 0.49)] },
       { llm, log, config: cfg },
     );
     expect(result.outcome).toBe("llm_failed_safe_cutoff");
-    // both are above 0.7 · 0.5 = 0.35, so both kept
     expect(result.kept.length).toBeGreaterThanOrEqual(1);
   });
 
@@ -149,14 +180,13 @@ describe("retrieval/llm-filter", () => {
     const llm: any = {
       completeJson: vi.fn().mockRejectedValue(new Error("boom")),
     };
-    // 6 candidates all above threshold, llmFilterMaxKeep=2 → kept ≤ 2.
     const ranked = [
       trace("a", 0.95),
       trace("b", 0.94),
       trace("c", 0.93),
       trace("d", 0.92),
       trace("e", 0.91),
-      trace("f", 0.90),
+      trace("f", 0.9),
     ];
     const result = await llmFilterCandidates(
       { query: "q", ranked },
@@ -168,10 +198,32 @@ describe("retrieval/llm-filter", () => {
 
   it("no LLM at all → passthrough (not safe-cutoff, since the call never happens)", async () => {
     const result = await llmFilterCandidates(
-      { query: "q", ranked: [trace("a", 0.9), trace("b", 0.8), trace("c", 0.7)] },
+      {
+        query: "q",
+        ranked: [trace("a", 0.9), trace("b", 0.8), trace("c", 0.7)],
+      },
       { llm: null, log, config: cfg },
     );
     expect(result.outcome).toBe("no_llm");
     expect(result.kept.length).toBe(3);
+    expect(result.sufficient).toBeNull();
+  });
+
+  it("candidate description includes time / tags / channels / score metadata", async () => {
+    const seen: string[] = [];
+    const llm: any = {
+      completeJson: vi.fn().mockImplementation(async (messages: any[]) => {
+        seen.push(messages[1].content);
+        return { value: { selected: [1], sufficient: true }, servedBy: "fake" };
+      }),
+    };
+    await llmFilterCandidates(
+      { query: "q", ranked: [trace("a", 0.9)] },
+      { llm, log, config: cfg },
+    );
+    expect(seen[0]).toContain("time=");
+    expect(seen[0]).toContain("tags=[sample]");
+    expect(seen[0]).toContain("via=vec_summary");
+    expect(seen[0]).toContain("score=");
   });
 });
diff --git a/apps/memos-local-plugin/tests/unit/retrieval/ranker.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/ranker.test.ts
index da4f53d34..ec7efc6c2 100644
--- a/apps/memos-local-plugin/tests/unit/retrieval/ranker.test.ts
+++ b/apps/memos-local-plugin/tests/unit/retrieval/ranker.test.ts
@@ -23,6 +23,9 @@ const cfg: RetrievalConfig = {
   minTraceSim: 0.35,
   tagFilter: "auto",
   decayHalfLifeDays: 30,
+  llmFilterEnabled: false,
+  llmFilterMaxKeep: 4,
+  llmFilterMinCandidates: 1,
 };
 
 const NOW = 1_700_000_000_000;
@@ -110,34 +113,37 @@ describe("retrieval/ranker", () => {
     expect(out.ranked.length).toBe(0);
   });
 
-  it("seeds at least one pick per non-empty tier", () => {
+  it("smart-seed picks every tier when all tier-bests are close to pool top", () => {
     const out = rank({
       tier1: [skill("sk1", 0.9, 0.9)],
-      tier2Traces: [trace("t1", 0.8, 0.5)],
+      tier2Traces: [trace("t1", 0.85, 0.5)],
       tier2Episodes: [],
-      tier3: [world("w1", 0.7)],
+      tier3: [world("w1", 0.8)],
       limit: 3,
-      config: cfg,
+      config: { ...cfg, relativeThresholdFloor: 0, smartSeedRatio: 0.7 },
       now: NOW,
     });
-    expect(out.ranked.map((r) => r.candidate.tier).sort()).toEqual(["tier1", "tier2", "tier3"]);
+    expect(out.ranked.map((r) => r.candidate.tier).sort()).toEqual([
+      "tier1",
+      "tier2",
+      "tier3",
+    ]);
   });
 
-  it("tier-2 V-aware order beats pure cosine when weights favor priority", () => {
-    const highCosLowV = trace("t1", 0.95, 0.0); // high sim, worthless
-    const highV = trace("t2", 0.4, 0.9); // mediocre sim, high V
+  it("priority breaks ties within the same base-score band", () => {
+    // Same cosine → same base. Higher V adds a priority lift.
+    const lowV = trace("t1", 0.5, 0.0);
+    const highV = trace("t2", 0.5, 0.9);
     const out = rank({
       tier1: [],
-      tier2Traces: [highCosLowV, highV],
+      tier2Traces: [lowV, highV],
       tier2Episodes: [],
       tier3: [],
       limit: 2,
-      config: { ...cfg, weightCosine: 0.2, weightPriority: 0.8 },
+      config: { ...cfg, relativeThresholdFloor: 0 },
       now: NOW,
     });
-    // t2 should rank ahead of t1 under priority-heavy weights
-    const first = out.ranked[0]!.candidate.refId;
-    expect(String(first)).toBe("t2");
+    expect(String(out.ranked[0]!.candidate.refId)).toBe("t2");
   });
 
   it("MMR suppresses near-duplicate vectors", () => {
@@ -151,7 +157,7 @@ describe("retrieval/ranker", () => {
       tier2Episodes: [],
       tier3: [],
       limit: 2,
-      config: { ...cfg, mmrLambda: 0 }, // pure diversity
+      config: { ...cfg, mmrLambda: 0, relativeThresholdFloor: 0 }, // pure diversity
       now: NOW,
     });
     const picked = out.ranked.map((r) => String(r.candidate.refId));
@@ -179,10 +185,10 @@ describe("retrieval/ranker", () => {
       tier2Episodes: [episode("ep1", 0.5, 0.9)],
       tier3: [world("w1", 0.4)],
       limit: 5,
-      config: cfg,
+      config: { ...cfg, relativeThresholdFloor: 0, smartSeedRatio: 0.3 },
       now: NOW,
     });
-    // Both tiers are seeded; ep1 should outrank w1 due to its high maxValue.
+    // ep1 has higher base AND a priority lift from maxValue → should lead.
     expect(out.ranked[0]!.candidate.refId).toBe("ep1");
   });
 
@@ -192,9 +198,9 @@ describe("retrieval/ranker", () => {
     const out = rank({
       tier1: [],
       tier2Traces: [
-        trace("strong", 0.9, 0.8), // topRelevance ≈ 0.86
+        trace("strong", 0.9, 0.8),
         trace("middle", 0.5, 0.4),
-        trace("weak", 0.05, 0.0), // ≈ 0.03 → far below floor
+        trace("weak", 0.05, 0.0),
       ],
       tier2Episodes: [],
       tier3: [],
@@ -208,18 +214,23 @@ describe("retrieval/ranker", () => {
     expect(out.droppedByThreshold).toBeGreaterThanOrEqual(1);
   });
 
-  it("smart-seed refuses to seed a tier when its best candidate is irrelevant", () => {
+  it("smart-seed refuses to seed a tier when its best candidate is far from pool top", () => {
     // Tier-1 + Tier-3 only have weak candidates; Tier-2 has a strong
-    // signal. With smartSeed=true, the ranker should ship just the
-    // tier-2 hit and skip the noisy seeds — the previous behaviour
-    // would have force-injected a marginal Tier-1 + Tier-3 each.
+    // signal. With smartSeedRatio=0.7 AND the relative threshold on,
+    // the irrelevant tiers should be cut by threshold — smart-seed is
+    // the Phase-A gate, threshold is the pool-wide gate.
     const out = rank({
       tier1: [skill("sk_irrelevant", 0.05, 0.9)],
       tier2Traces: [trace("t_strong", 0.9, 0.8)],
       tier2Episodes: [],
       tier3: [world("w_irrelevant", 0.05)],
       limit: 5,
-      config: { ...cfg, relativeThresholdFloor: 0.4, smartSeed: true },
+      config: {
+        ...cfg,
+        relativeThresholdFloor: 0.4,
+        smartSeed: true,
+        smartSeedRatio: 0.7,
+      },
       now: NOW,
     });
     const ids = out.ranked.map((r) => String(r.candidate.refId));
@@ -228,6 +239,29 @@ describe("retrieval/ranker", () => {
     expect(ids).not.toContain("w_irrelevant");
   });
 
+  it("smart-seed blocks Phase-A tier seeding even when threshold is disabled", () => {
+    // When threshold=0 the pool keeps everyone, but Phase-A must still
+    // skip seeding weak tiers. We verify t_strong is seeded first
+    // (proving Phase-A ran) and that sk_irrelevant / w_irrelevant can
+    // only appear via Phase-B MMR, not as forced tier seeds.
+    const out = rank({
+      tier1: [skill("sk_irrelevant", 0.05, 0.9)],
+      tier2Traces: [trace("t_strong", 0.9, 0.8)],
+      tier2Episodes: [],
+      tier3: [world("w_irrelevant", 0.05)],
+      limit: 1,
+      config: {
+        ...cfg,
+        relativeThresholdFloor: 0,
+        smartSeed: true,
+        smartSeedRatio: 0.7,
+      },
+      now: NOW,
+    });
+    expect(out.ranked.length).toBe(1);
+    expect(String(out.ranked[0]!.candidate.refId)).toBe("t_strong");
+  });
+
   it("smartSeed=false restores legacy behaviour (force-seed every tier)", () => {
     const out = rank({
       tier1: [skill("sk_irrelevant", 0.05, 0.9)],
@@ -247,14 +281,14 @@ describe("retrieval/ranker", () => {
     expect(ids).toContain("w_irrelevant");
   });
 
-  it("multi-channel hits get an RRF lift over single-channel hits at same cosine", () => {
+  it("multi-channel hits get an RRF lift over single-channel hits at same base", () => {
     const single = trace("single_ch", 0.6, 0.0);
     single.channels = [{ channel: "vec_summary", rank: 0, score: 0.6 }];
     const multi = trace("multi_ch", 0.6, 0.0);
     multi.channels = [
       { channel: "vec_summary", rank: 0, score: 0.6 },
-      { channel: "fts", rank: 0, score: 1 / 61 },
-      { channel: "pattern", rank: 1, score: 1 / 62 },
+      { channel: "fts", rank: 0, score: 1 },
+      { channel: "pattern", rank: 1, score: 0.5 },
     ];
     const out = rank({
       tier1: [],
@@ -268,10 +302,58 @@ describe("retrieval/ranker", () => {
     expect(String(out.ranked[0]!.candidate.refId)).toBe("multi_ch");
   });
 
+  it("multi-channel bypass lets low-relevance keyword hits survive threshold", () => {
+    // Strong candidate pulls topRelevance up; keyword-only single-channel
+    // hit would be guillotined by the relative floor, BUT a multi-channel
+    // hit with the same base should survive via the bypass.
+    const strong = trace("strong", 0.9, 0.9);
+    strong.channels = [{ channel: "vec_summary", rank: 0, score: 0.9 }];
+    const ftsOnly = trace("fts_only", 0.1, 0.0);
+    ftsOnly.channels = [{ channel: "fts", rank: 3, score: 0.25 }];
+    const confirmed = trace("confirmed", 0.12, 0.0);
+    confirmed.channels = [
+      { channel: "fts", rank: 3, score: 0.25 },
+      { channel: "pattern", rank: 2, score: 0.33 },
+    ];
+    const out = rank({
+      tier1: [],
+      tier2Traces: [strong, ftsOnly, confirmed],
+      tier2Episodes: [],
+      tier3: [],
+      limit: 5,
+      config: { ...cfg, relativeThresholdFloor: 0.4, multiChannelBypass: true },
+      now: NOW,
+    });
+    const ids = out.ranked.map((r) => String(r.candidate.refId));
+    expect(ids).toContain("strong");
+    expect(ids).toContain("confirmed");
+    // The single-channel weak FTS hit should still get cut.
+    expect(ids).not.toContain("fts_only");
+  });
+
+  it("multiChannelBypass=false restores strict threshold for multi-channel hits", () => {
+    const strong = trace("strong", 0.9, 0.9);
+    strong.channels = [{ channel: "vec_summary", rank: 0, score: 0.9 }];
+    const confirmed = trace("confirmed", 0.12, 0.0);
+    confirmed.channels = [
+      { channel: "fts", rank: 3, score: 0.25 },
+      { channel: "pattern", rank: 2, score: 0.33 },
+    ];
+    const out = rank({
+      tier1: [],
+      tier2Traces: [strong, confirmed],
+      tier2Episodes: [],
+      tier3: [],
+      limit: 5,
+      config: { ...cfg, relativeThresholdFloor: 0.5, multiChannelBypass: false },
+      now: NOW,
+    });
+    const ids = out.ranked.map((r) => String(r.candidate.refId));
+    expect(ids).toContain("strong");
+    expect(ids).not.toContain("confirmed");
+  });
+
   it("skill η no longer dominates cosine — the more-relevant skill wins", () => {
-    // Old behaviour blended `0.4·η`, so a high-η stale skill could
-    // outrank a fresh, query-aligned one. With the new default
-    // `skillEtaBlend=0.15`, cosine dominates.
     const fresh = skill("fresh_match", 0.85, 0.5);
     fresh.channels = [{ channel: "vec", rank: 0, score: 0.85 }];
     const stale = skill("stale_high_eta", 0.2, 0.95);
@@ -287,4 +369,28 @@ describe("retrieval/ranker", () => {
     });
     expect(String(out.ranked[0]!.candidate.refId)).toBe("fresh_match");
   });
+
+  it("tallies channel hits for observability", () => {
+    const a = trace("a", 0.8, 0.5);
+    a.channels = [
+      { channel: "vec_summary", rank: 0, score: 0.8 },
+      { channel: "fts", rank: 1, score: 0.5 },
+    ];
+    const b = trace("b", 0.6, 0.5);
+    b.channels = [{ channel: "pattern", rank: 0, score: 0.9 }];
+    const out = rank({
+      tier1: [],
+      tier2Traces: [a, b],
+      tier2Episodes: [],
+      tier3: [],
+      limit: 5,
+      config: { ...cfg, relativeThresholdFloor: 0 },
+      now: NOW,
+    });
+    expect(out.channelHits.vec_summary).toBe(1);
+    expect(out.channelHits.fts).toBe(1);
+    expect(out.channelHits.pattern).toBe(1);
+    expect(out.topRelevance).toBeGreaterThan(0);
+    expect(out.thresholdFloor).toBe(0);
+  });
 });
diff --git a/apps/memos-local-plugin/tests/unit/web/tasks-chat.test.ts b/apps/memos-local-plugin/tests/unit/web/tasks-chat.test.ts
index 7cb81b876..a1c3a23ee 100644
--- a/apps/memos-local-plugin/tests/unit/web/tasks-chat.test.ts
+++ b/apps/memos-local-plugin/tests/unit/web/tasks-chat.test.ts
@@ -31,14 +31,11 @@ function trace(part: Partial<TimelineTrace>): TimelineTrace {
 }
 
 describe("flattenChat", () => {
-  it("emits user → thinking → tools → assistant in that order; reflection is dropped", () => {
+  it("emits user → [thinking+tool pairs] → assistant; reflection is dropped", () => {
     const t = trace({
       id: "tr1",
       userText: "go fix the deploy",
       agentText: "done — see PR #42",
-      // LLM-native thinking — must surface as a chat bubble.
-      agentThinking: "Looking at the error chain, pg_config is missing.",
-      // Plugin-internal reflection — must NEVER appear in the chat log.
       reflection:
         "INTERNAL: scoring note — α should be high because this step pinpointed the root cause.",
       toolCalls: [
@@ -49,6 +46,7 @@ describe("flattenChat", () => {
           startedAt: T0 + 10,
           endedAt: T0 + 200,
           errorCode: "EXIT_1",
+          thinkingBefore: "Looking at the error chain, pg_config is missing.",
         },
         {
           name: "bash",
@@ -67,20 +65,15 @@ describe("flattenChat", () => {
       "tool",
       "assistant",
     ]);
-    // The thinking bubble is the model's NATIVE reasoning, NOT
-    // reflection (which is the plugin's scoring scratchpad).
     expect(msgs[1]!.text).toContain("pg_config is missing");
     expect(msgs[1]!.text).not.toContain("INTERNAL: scoring note");
-    // Both tools point back at the same trace and carry their full payload.
     expect(msgs[2]!.traceId).toBe("tr1");
     expect(msgs[2]!.toolName).toBe("bash");
     expect(msgs[2]!.toolInput).toContain("pip install psycopg2");
     expect(msgs[2]!.toolOutput).toContain("pg_config not found");
     expect(msgs[2]!.errorCode).toBe("EXIT_1");
     expect(msgs[2]!.toolDurationMs).toBe(190);
-    // Assistant is the agent text.
     expect(msgs[4]!.text).toBe("done — see PR #42");
-    // No bubble's text leaks the reflection content anywhere.
     for (const m of msgs) {
       expect(m.text).not.toContain("INTERNAL: scoring note");
     }
@@ -210,6 +203,107 @@ describe("flattenChat", () => {
     ]);
   });
 
+  it("interleaves per-tool thinking when thinkingBefore is present", () => {
+    const t = trace({
+      id: "tr_interleave",
+      userText: "fix the build",
+      agentText: "Fixed — build passes now.",
+      agentThinking: "Check error log.\n\nNeed libpq-dev.\n\nRetry the build.",
+      toolCalls: [
+        {
+          name: "sh",
+          input: "cat error.log",
+          output: "pg_config not found",
+          startedAt: T0 + 10,
+          endedAt: T0 + 200,
+          thinkingBefore: "Check error log.",
+        },
+        {
+          name: "sh",
+          input: "apt-get install libpq-dev",
+          output: "ok",
+          startedAt: T0 + 300,
+          endedAt: T0 + 800,
+          thinkingBefore: "Need libpq-dev.",
+        },
+        {
+          name: "sh",
+          input: "make build",
+          output: "BUILD SUCCESSFUL",
+          startedAt: T0 + 900,
+          endedAt: T0 + 1500,
+          thinkingBefore: "Retry the build.",
+        },
+      ],
+    });
+    const msgs = flattenChat([t]);
+    expect(msgs.map((m) => m.role)).toEqual([
+      "user",
+      "thinking",   // before tool 0
+      "tool",
+      "thinking",   // before tool 1
+      "tool",
+      "thinking",   // before tool 2
+      "tool",
+      "assistant",
+    ]);
+    expect(msgs[1]!.text).toBe("Check error log.");
+    expect(msgs[3]!.text).toBe("Need libpq-dev.");
+    expect(msgs[5]!.text).toBe("Retry the build.");
+  });
+
+  it("no thinking bubbles when tools lack thinkingBefore (agentThinking only shown for no-tool turns)", () => {
+    const t = trace({
+      id: "tr_no_tb",
+      userText: "go",
+      agentText: "done",
+      agentThinking: "Some thinking.",
+      toolCalls: [
+        { name: "tool_a", startedAt: T0 + 10, endedAt: T0 + 100 },
+        { name: "tool_b", startedAt: T0 + 200, endedAt: T0 + 300 },
+      ],
+    });
+    const msgs = flattenChat([t]);
+    expect(msgs.map((m) => m.role)).toEqual([
+      "user",
+      "tool",
+      "tool",
+      "assistant",
+    ]);
+  });
+
+  it("only some tools have thinkingBefore — those without get no bubble", () => {
+    const t = trace({
+      id: "tr_partial",
+      userText: "go",
+      agentText: "done",
+      agentThinking: "initial\n\nsecond thought",
+      toolCalls: [
+        {
+          name: "tool_a",
+          startedAt: T0 + 10,
+          endedAt: T0 + 100,
+          thinkingBefore: "initial",
+        },
+        {
+          name: "tool_b",
+          startedAt: T0 + 200,
+          endedAt: T0 + 300,
+          // no thinkingBefore — model went straight to the next tool
+        },
+      ],
+    });
+    const msgs = flattenChat([t]);
+    expect(msgs.map((m) => m.role)).toEqual([
+      "user",
+      "thinking",   // before tool_a
+      "tool",
+      "tool",       // no thinking before tool_b
+      "assistant",
+    ]);
+    expect(msgs[1]!.text).toBe("initial");
+  });
+
   it("returns empty array for empty input", () => {
     expect(flattenChat([])).toEqual([]);
   });
diff --git a/apps/memos-local-plugin/web/src/stores/i18n.ts b/apps/memos-local-plugin/web/src/stores/i18n.ts
index 47c1a306f..c5192b082 100644
--- a/apps/memos-local-plugin/web/src/stores/i18n.ts
+++ b/apps/memos-local-plugin/web/src/stores/i18n.ts
@@ -524,6 +524,7 @@ const en = {
   "logs.search.droppedByLlm": "Dropped by LLM",
   "logs.search.noCandidates": "No candidates.",
   "logs.search.noneRelevant": "Candidates were returned but the LLM dropped them all.",
+  "logs.search.funnel": "Retrieval funnel",
   "logs.add.warnings": "Warnings",
   "logs.add.details": "Per-turn items",
   "pager.pageN": "Page {n} / {total}",
@@ -1067,6 +1068,7 @@ const zh: Record<TranslationKey, string> = {
   "logs.search.droppedByLlm": "LLM 剔除",
   "logs.search.noCandidates": "没有候选。",
   "logs.search.noneRelevant": "有候选但被 LLM 全部剔除。",
+  "logs.search.funnel": "召回漏斗",
   "logs.add.warnings": "警告",
   "logs.add.details": "每轮条目",
   "pager.pageN": "第 {n} 页 / 共 {total} 页",
diff --git a/apps/memos-local-plugin/web/src/views/LogsView.tsx b/apps/memos-local-plugin/web/src/views/LogsView.tsx
index a090fb38f..828ddb5b8 100644
--- a/apps/memos-local-plugin/web/src/views/LogsView.tsx
+++ b/apps/memos-local-plugin/web/src/views/LogsView.tsx
@@ -354,8 +354,25 @@ interface SearchOutput {
   hubCandidates?: SearchCandidate[];
   filtered?: SearchCandidate[];
   droppedByLlm?: SearchCandidate[];
+  stats?: RetrievalStatsPayload;
   error?: string;
 }
+interface RetrievalStatsPayload {
+  raw?: number;
+  ranked?: number;
+  droppedByThreshold?: number;
+  thresholdFloor?: number;
+  topRelevance?: number;
+  llmFilter?: {
+    outcome?: string;
+    kept?: number;
+    dropped?: number;
+    sufficient?: boolean | null;
+  };
+  channelHits?: Record<string, number>;
+  queryTokens?: number;
+  queryTags?: string[];
+}
 interface SearchCandidate {
   tier?: number;
   refKind?: string;
@@ -404,6 +421,7 @@ function MemorySearchDetail({
         </section>
       ) : (
         <>
+          {out.stats && <RetrievalFunnel stats={out.stats} />}
           <CandidateSection
             title={t("logs.search.initial")}
             count={candidates.length}
@@ -442,6 +460,63 @@ function MemorySearchDetail({
   );
 }
 
+function RetrievalFunnel({ stats }: { stats: RetrievalStatsPayload }) {
+  const raw = stats.raw ?? 0;
+  const ranked = stats.ranked ?? 0;
+  const dropped = stats.droppedByThreshold ?? 0;
+  const lf = stats.llmFilter ?? {};
+  const kept = lf.kept;
+  const outcome = lf.outcome ?? "unknown";
+  const fmtNum = (n: number | undefined, digits = 3) =>
+    typeof n === "number" && Number.isFinite(n) ? n.toFixed(digits) : "—";
+  const channelEntries = Object.entries(stats.channelHits ?? {}).filter(
+    ([, v]) => typeof v === "number" && v > 0,
+  );
+  return (
+    <section class="card card--flat">
+      <div class="hstack" style="margin-bottom:var(--sp-2)">
+        <span style="font-size:var(--fs-sm);font-weight:var(--fw-semi)">
+          {t("logs.search.funnel")}
+        </span>
+      </div>
+      <div
+        class="hstack"
+        style="gap:var(--sp-3);flex-wrap:wrap;font-size:var(--fs-xs)"
+      >
+        <span class="pill pill--info">raw {raw}</span>
+        <span class="pill pill--info">ranked {ranked}</span>
+        {dropped > 0 && (
+          <span class="pill pill--failed">dropped≥floor {dropped}</span>
+        )}
+        {typeof kept === "number" && (
+          <span class="pill pill--active">llm kept {kept}</span>
+        )}
+        <span class="pill">outcome {outcome}</span>
+        {lf.sufficient !== null && lf.sufficient !== undefined && (
+          <span class={`pill ${lf.sufficient ? "pill--active" : "pill--failed"}`}>
+            sufficient {String(lf.sufficient)}
+          </span>
+        )}
+        <span class="muted">
+          floor {fmtNum(stats.thresholdFloor)} · top {fmtNum(stats.topRelevance)}
+        </span>
+      </div>
+      {channelEntries.length > 0 && (
+        <div
+          class="hstack"
+          style="gap:var(--sp-2);flex-wrap:wrap;font-size:var(--fs-xs);margin-top:var(--sp-2)"
+        >
+          {channelEntries.map(([ch, n]) => (
+            <span key={ch} class="pill">
+              {ch} · {n}
+            </span>
+          ))}
+        </div>
+      )}
+    </section>
+  );
+}
+
 function CandidateSection({
   title,
   count,
diff --git a/apps/memos-local-plugin/web/src/views/tasks-chat-data.ts b/apps/memos-local-plugin/web/src/views/tasks-chat-data.ts
index 7dff8502a..04fd297be 100644
--- a/apps/memos-local-plugin/web/src/views/tasks-chat-data.ts
+++ b/apps/memos-local-plugin/web/src/views/tasks-chat-data.ts
@@ -15,6 +15,7 @@ export interface TimelineToolCall {
   errorCode?: string;
   startedAt?: number;
   endedAt?: number;
+  thinkingBefore?: string | null;
 }
 
 export interface TimelineTrace {
@@ -71,13 +72,10 @@ const TOOL_OUTPUT_PREVIEW_CHARS = 1_600;
  * recognise, in pi-ai's natural emission order:
  *
  *   1. `user`       — the user query that opened the step (if non-empty).
- *   2. `thinking`   — LLM-native thinking blocks the model emitted
- *                     before its visible reply (Claude extended,
- *                     pi-ai `ThinkingContent`). Sourced from
- *                     `trace.agentThinking`. Never from `reflection`.
- *   3. `tool` × N   — every tool call the assistant made, sorted by
- *                     `startedAt` so the chain reads chronologically.
- *   4. `assistant`  — the assistant's final text reply (if non-empty).
+ *   2. Interleaved `thinking` + `tool` blocks — each tool call's
+ *      `thinkingBefore` is rendered as a thinking bubble directly
+ *      before its tool, faithfully mirroring the model's think→act loop.
+ *   3. `assistant`  — the assistant's final text reply (if non-empty).
  *
  * `trace.reflection` is **deliberately not** turned into a chat bubble.
  * Reflection is the MemOS plugin's own post-hoc note used to compute
@@ -104,21 +102,38 @@ export function flattenChat(traces: readonly TimelineTrace[]): ChatMsg[] {
       });
     }
 
-    const thinking = (tr.agentThinking ?? "").trim();
-    if (thinking) {
-      out.push({
-        role: "thinking",
-        text: thinking,
-        ts: tr.ts,
-        key: `${tr.id}:thinking`,
-        traceId: tr.id,
-      });
-    }
-
     const tools = [...(tr.toolCalls ?? [])].sort(
       (a, b) => (a.startedAt ?? tr.ts) - (b.startedAt ?? tr.ts),
     );
+
+    // When there are no tool calls, agentThinking (if present) appears
+    // as a standalone thinking bubble. When tools exist, the per-tool
+    // `thinkingBefore` fields carry the interleaved reasoning instead.
+    if (tools.length === 0) {
+      const thinking = (tr.agentThinking ?? "").trim();
+      if (thinking) {
+        out.push({
+          role: "thinking",
+          text: thinking,
+          ts: tr.ts,
+          key: `${tr.id}:thinking`,
+          traceId: tr.id,
+        });
+      }
+    }
+
     tools.forEach((tc, idx) => {
+      const tb = (tc.thinkingBefore ?? "").trim();
+      if (tb) {
+        out.push({
+          role: "thinking",
+          text: tb,
+          ts: tc.startedAt ?? tr.ts,
+          key: `${tr.id}:thinking:${idx}`,
+          traceId: tr.id,
+        });
+      }
+
       const inputStr = serializeToolPayload(tc.input);
       const outputStr = serializeToolPayload(tc.output);
       const dur =