MemTensor · hijzy · Apr 22, 2026 · Apr 22, 2026
diff --git a/apps/memos-local-plugin/adapters/openclaw/bridge.ts b/apps/memos-local-plugin/adapters/openclaw/bridge.ts
@@ -189,10 +189,15 @@ export function flattenMessages(input: unknown[] | undefined): FlatMessage[] {
       }
       for (const tc of inlineToolCalls) out.push(tc);
 
-      // OpenAI legacy: assistant has a top-level `tool_calls` array
-      // (separate from content). Fold these in after pi-ai inline
-      // tool calls so order is preserved when both shapes coexist.
-      if (Array.isArray(m.tool_calls)) {
+      // OpenAI-legacy fallback only: when the message has NO pi-ai
+      // inline tool calls but does have a top-level `tool_calls` array
+      // (pure OpenAI Function-Calling shape). When both shapes coexist
+      // (as OpenClaw's pi-ai bundled OpenAI adapter does), pi-ai
+      // already populated `content[].toolCall`, so re-reading the
+      // top-level field would emit each call twice — which in turn
+      // causes `extractTurn`'s `pendingCalls.set(key, …)` to clobber
+      // the first stub's `thinkingBefore` with an empty second stub.
+      if (inlineToolCalls.length === 0 && Array.isArray(m.tool_calls)) {
         for (const tc of m.tool_calls as Array<Record<string, unknown>>) {
           const fn = tc.function as Record<string, unknown> | undefined;
           if (!fn) continue;
@@ -476,35 +481,51 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
   const userText = messages[lastUserIdx].content.trim();
   const tail = messages.slice(lastUserIdx + 1);
 
-  const assistantParts: string[] = [];
-  const thinkingParts: string[] = [];
   const pendingCalls = new Map<string, Partial<ToolCallDTO> & { _id?: string }>();
   const toolCalls: ToolCallDTO[] = [];
 
+  // Two separate buffers accumulate content not yet assigned to a tool.
+  //
+  // `pendingThinking`: Claude extended-thinking blocks (`ThinkingContent`)
+  // `pendingAssistant`: regular model text (`TextContent`)
+  //
+  // When a `tool_call` arrives, BOTH buffers are flushed together into
+  // that tool's `thinkingBefore` — this is the reasoning (structured OR
+  // natural language) the model did before deciding to invoke the tool.
+  //
+  // After all messages are processed, whatever remains in the buffers
+  // forms the final output: `pendingAssistant` → `agentText` (the
+  // reply) and `pendingThinking` → `agentThinking` (model reasoning
+  // shown in a dedicated bubble for non-tool turns).
+  let pendingThinking: string[] = [];
+  let pendingAssistant: string[] = [];
+
   for (const m of tail) {
     if (m.role === "assistant") {
-      if (m.content) assistantParts.push(m.content);
+      if (m.content) pendingAssistant.push(m.content);
       continue;
     }
     if (m.role === "thinking") {
-      if (m.content) thinkingParts.push(m.content);
+      if (m.content) pendingThinking.push(m.content);
       continue;
     }
     if (m.role === "tool_call" && m.toolName) {
-      // Assistant decided to call a tool. Stash until the matching
-      // tool_result lands so we can stitch the full ToolCallDTO.
+      const parts = [...pendingThinking, ...pendingAssistant];
+      const thinkingBefore = parts.join("\n\n").trim() || undefined;
+      pendingThinking = [];
+      pendingAssistant = [];
+
       const key = m.toolCallId ?? m.toolName;
       pendingCalls.set(key, {
         _id: m.toolCallId,
         name: m.toolName,
         input: m.toolInput,
         startedAt: m.ts ?? now,
+        thinkingBefore,
       });
       continue;
     }
     if (m.role === "tool_result") {
-      // Pair by id (preferred — works even when two parallel calls hit
-      // the same tool name) or fall back to toolName.
       const key = m.toolCallId ?? m.toolName ?? "";
       const stub = pendingCalls.get(key);
       const errorCode = stub
@@ -517,16 +538,13 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
         errorCode,
         startedAt: stub?.startedAt ?? (m.ts ?? now),
         endedAt: m.ts ?? now,
+        thinkingBefore: stub?.thinkingBefore,
       });
       if (key) pendingCalls.delete(key);
       continue;
     }
-    // system / unknown: ignore for the purpose of extractTurn.
   }
 
-  // Any tool call that never received a paired tool_result still lands
-  // in the trace (with `output: undefined`) so the viewer can show
-  // "tool was invoked but produced no result".
   for (const stub of pendingCalls.values()) {
     if (!stub.name) continue;
     toolCalls.push({
@@ -535,14 +553,15 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
       output: undefined,
       startedAt: stub.startedAt ?? now,
       endedAt: now,
+      thinkingBefore: stub.thinkingBefore,
     });
   }
 
-  const agentThinking = thinkingParts.join("\n\n").trim();
+  const agentThinking = pendingThinking.join("\n\n").trim();
   return {
     userText,
-    agentText: assistantParts.join("\n\n").trim(),
-    agentThinking: agentThinking ? agentThinking : undefined,
+    agentText: pendingAssistant.join("\n\n").trim(),
+    agentThinking: agentThinking || undefined,
     toolCalls,
   };
 }
@@ -796,6 +815,7 @@ export function createOpenClawBridge(opts: BridgeOptions): BridgeHandle {
       hasError: !!event.error,
     });
 
+
     try {
       // Legacy adapter parity: even when `success === false` we still
       // enqueue the user's message (and whatever the assistant managed

diff --git a/apps/memos-local-plugin/agent-contract/dto.ts b/apps/memos-local-plugin/agent-contract/dto.ts
@@ -40,6 +40,16 @@ export interface ToolCallDTO {
   errorCode?: string;
   startedAt: EpochMs;
   endedAt: EpochMs;
+  /**
+   * LLM-native thinking emitted *before* the model decided to invoke this
+   * tool — e.g. "I got an error from tool_1, let me try a different
+   * approach". Populated by the adapter when the model interleaves
+   * thinking blocks between tool calls. `undefined` for legacy data or
+   * when no thinking preceded this particular call.
+   *
+   * Stored inside `tool_calls_json` (no schema migration needed).
+   */
+  thinkingBefore?: string;
 }
 
 export interface TurnInputDTO {

diff --git a/apps/memos-local-plugin/core/capture/normalizer.ts b/apps/memos-local-plugin/core/capture/normalizer.ts
@@ -36,15 +36,25 @@ export function normalizeSteps(
       continue;
     }
 
-    const last = out[out.length - 1];
-    if (
-      last &&
-      last.agentText === agentText &&
-      last.userText === userText &&
-      sameToolCalls(last.toolCalls, toolCalls)
-    ) {
-      log.debug("normalize.skip_duplicate", { key: step.key });
-      continue;
+    // Sub-steps produced by the per-tool-call extractor (V7 §0.1) have
+    // intentionally-identical userText="" / agentText="" and carry only
+    // a single tool call each — but two different tools can still share
+    // a short input fingerprint, which the generic dedup path below
+    // would incorrectly collapse. Skip dedup for sub-steps; the key
+    // uniqueness guarantees they can't be genuine duplicates.
+    const isSubStep = (step.meta as Record<string, unknown> | undefined)?.subStep === true;
+
+    if (!isSubStep) {
+      const last = out[out.length - 1];
+      if (
+        last &&
+        last.agentText === agentText &&
+        last.userText === userText &&
+        sameToolCalls(last.toolCalls, toolCalls)
+      ) {
+        log.debug("normalize.skip_duplicate", { key: step.key });
+        continue;
+      }
     }
 
     out.push({

diff --git a/apps/memos-local-plugin/core/capture/step-extractor.ts b/apps/memos-local-plugin/core/capture/step-extractor.ts
@@ -175,7 +175,11 @@ function segmentToSteps(
     out.push({
       key: `${episode.id}:${ts}:tool:${i}`,
       ts,
-      userText,
+      // Only the first sub-step carries the user query; subsequent
+      // sub-steps leave `userText` empty so the viewer's flattenChat
+      // doesn't render the same user bubble N times. The turn's
+      // provenance (episodeId) still links them together.
+      userText: i === 0 ? userText : "",
       agentText: "",
       agentThinking: i === 0 ? fullThinking : null,
       toolCalls: [tc],
@@ -232,13 +236,15 @@ function toolCallFromTurn(turn: EpisodeTurn): ToolCallDTO | null {
   const endedAt = typeof meta.endedAt === "number" ? meta.endedAt : turn.ts;
   const input = meta.input ?? meta.args ?? undefined;
   const errorCode = typeof meta.errorCode === "string" ? meta.errorCode : undefined;
+  const thinkingBefore = typeof meta.thinkingBefore === "string" ? meta.thinkingBefore : undefined;
   return {
     name,
     input,
     output: turn.content,
     errorCode,
     startedAt,
     endedAt,
+    thinkingBefore,
   };
 }
 
@@ -264,7 +270,8 @@ function coerceToolCall(raw: unknown): ToolCallDTO | null {
   const startedAt =
     typeof r.startedAt === "number" ? r.startedAt : Date.now();
   const endedAt = typeof r.endedAt === "number" ? r.endedAt : startedAt;
-  return { name, input, output, errorCode, startedAt, endedAt };
+  const thinkingBefore = typeof r.thinkingBefore === "string" ? r.thinkingBefore : undefined;
+  return { name, input, output, errorCode, startedAt, endedAt, thinkingBefore };
 }
 
 function depthFromMeta(meta: Record<string, unknown>): number {

diff --git a/apps/memos-local-plugin/core/config/defaults.ts b/apps/memos-local-plugin/core/config/defaults.ts
@@ -157,21 +157,27 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
       episodeGoalMinSim: 0.45,
       tagFilter: "auto",
       keywordTopK: 20,
-      relativeThresholdFloor: 0.4,
+      // Lowered from 0.4 → 0.2 with the 2026 ranker overhaul: the new
+      // base relevance already uses channel rank as a first-class
+      // signal, so the old 0.4 floor was over-pruning keyword hits
+      // with modest V·decay.
+      relativeThresholdFloor: 0.2,
       skillEtaBlend: 0.15,
       smartSeed: true,
+      smartSeedRatio: 0.7,
+      multiChannelBypass: true,
       skillInjectionMode: "summary",
       skillSummaryChars: 200,
       llmFilterEnabled: true,
       // Tighter than the legacy default (5) so the LLM filter has a
-      // budget that forces "drop, don't pad". Combined with the
-      // few-shot prompt this dramatically improves precision.
+      // small budget; combined with the richer prompt (v3) this keeps
+      // packets concise without over-dropping.
       llmFilterMaxKeep: 4,
-      // Lowered from 3 → 2: small packets (e.g. just a Tier-1 skill +
-      // a Tier-2 trace) used to skip the LLM filter entirely and ship
-      // both items even when one was tangential. Now anything > 1
-      // candidate gets a precision pass.
-      llmFilterMinCandidates: 2,
+      // Lowered from 2 → 1: even a single candidate gets a precision
+      // pass. Mirrors `memos-local-openclaw`'s tool-level filter and
+      // prevents a lone off-topic memory from sneaking through unchecked.
+      llmFilterMinCandidates: 1,
+      llmFilterCandidateBodyChars: 500,
     },
   },
   hub: {

diff --git a/apps/memos-local-plugin/core/config/schema.ts b/apps/memos-local-plugin/core/config/schema.ts
@@ -322,8 +322,14 @@ const AlgorithmSchema = Type.Object({
      * `minTraceSim` — when the best hit is weak, we keep more (lower
      * absolute floor); when there's a clear winner, we drop noise.
      * Set to 0 to disable the relative cutoff entirely.
+     *
+     * Default lowered to 0.2 with the 2026 ranker overhaul: the new
+     * base formula already weighs channel-rank evidence (so a raw
+     * FTS-only hit lands in a comparable range to a cosine-0.8 hit),
+     * and the old 0.4 floor was over-pruning keyword matches with
+     * modest V·decay.
      */
-    relativeThresholdFloor: NumberInRange(0.4, 0, 1),
+    relativeThresholdFloor: NumberInRange(0.2, 0, 1),
     /**
      * Tier-1 skill relevance blend weight for `η` (skill reliability).
      * Old default `0.4` made well-trodden skills outrank obviously-more-
@@ -333,12 +339,28 @@ const AlgorithmSchema = Type.Object({
     skillEtaBlend: NumberInRange(0.15, 0, 1),
     /**
      * MMR Phase-A seed-by-tier policy. When `true`, only seed a tier
-     * if its best candidate's relevance ≥ `relativeThresholdFloor *
-     * topRelevance`. This prevents the ranker from force-injecting a
-     * stale Tier-1 skill / Tier-3 world-model just because it cleared
-     * the absolute floors.
+     * if its best candidate's relevance ≥ `poolTopRelevance *
+     * smartSeedRatio` (see below). This prevents the ranker from
+     * force-injecting a stale Tier-1 skill / Tier-3 world-model just
+     * because it cleared the absolute floors.
      */
     smartSeed: Bool(true),
+    /**
+     * Seed cutoff for smart-seed MMR — tier is seeded iff its best
+     * candidate's relevance ≥ `poolTopRelevance * smartSeedRatio`.
+     * Independent of `relativeThresholdFloor` so the seed gate can be
+     * stricter than the generic drop floor (0.7 is "within 30% of the
+     * best available candidate anywhere in the pool").
+     */
+    smartSeedRatio: NumberInRange(0.7, 0, 1),
+    /**
+     * When a candidate is surfaced by ≥ 2 retrieval channels (e.g.
+     * both vec and fts hit the same trace), bypass the relative
+     * threshold. Multi-channel agreement is a strong signal, and
+     * without this keyword-only matches with modest V·decay often
+     * get dropped by a noisy `topRelevance`.
+     */
+    multiChannelBypass: Bool(true),
     /**
      * How Tier-1 skills are surfaced in the injected prompt:
      *   - "summary" (default): inject only `name + η + 1-line summary +
@@ -368,10 +390,21 @@ const AlgorithmSchema = Type.Object({
     /** Keep at most this many candidates after the LLM filter. */
     llmFilterMaxKeep: NumberInRange(5, 1, 30),
     /**
-     * Skip the filter when the ranked list already has ≤ this many
-     * items — no point paying an LLM round-trip to prune 3 candidates.
+     * Skip the filter when the ranked list has fewer than this many
+     * items. Default 1 — even a single candidate gets a precision
+     * pass, matching `memos-local-openclaw`'s tool-level filter and
+     * preventing a lone off-topic memory from sneaking through
+     * unchecked.
+     */
+    llmFilterMinCandidates: NumberInRange(1, 1, 50),
+    /**
+     * Body-text budget per candidate when building the LLM filter
+     * prompt. Higher = more context for precise judgement, at the
+     * cost of more tokens per round-trip. Default 500 (openclaw uses
+     * 300 without tags/channels; we include richer metadata, so a
+     * slightly larger window pays for itself).
      */
-    llmFilterMinCandidates: NumberInRange(4, 1, 50),
+    llmFilterCandidateBodyChars: NumberInRange(500, 120, 2000),
   }, { default: {} }),
 }, { default: {} });