MemTensor · hijzy · Apr 22, 2026 · Apr 22, 2026
diff --git a/apps/memos-local-plugin/agent-contract/dto.ts b/apps/memos-local-plugin/agent-contract/dto.ts
@@ -143,6 +143,17 @@ export interface TraceDTO {
   rHuman?: Reward;
   /** Cached priority used for L2 candidate selection. */
   priority: number;
+  /**
+   * Stable group key shared by every L1 trace produced from the same
+   * user message. Equal to the user turn's `ts` (epoch ms). The
+   * viewer collapses rows with identical `(episodeId, turnId)` into
+   * a single "one round = one memory" card; algorithm-side machinery
+   * (V/α/L2/Tier 2/Decision Repair) ignores the field.
+   *
+   * Optional because rows written before migration 013 have NULL
+   * `turn_id`; the viewer falls back to per-row rendering for them.
+   */
+  turnId?: EpochMs | null;
 }
 
 /**

diff --git a/apps/memos-local-plugin/core/capture/alpha-scorer.ts b/apps/memos-local-plugin/core/capture/alpha-scorer.ts
@@ -17,6 +17,10 @@
 
 import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js";
 import type { LlmClient } from "../llm/index.js";
+import {
+  detectDominantLanguage,
+  languageSteeringLine,
+} from "../llm/prompts/index.js";
 import { REFLECTION_SCORE_PROMPT } from "../llm/prompts/reflection.js";
 import { rootLogger } from "../logger/index.js";
 import type { NormalizedStep, ReflectionScore } from "./types.js";
@@ -69,13 +73,23 @@ export async function scoreReflection(
     .filter(Boolean)
     .join("\n");
 
+  // Match the `reason` string's language to the step's own language so
+  // the Memories viewer doesn't mix 中文 + English per row.
+  const stepLang = detectDominantLanguage([
+    input.step.userText,
+    input.step.agentText,
+    input.step.agentThinking,
+    input.reflectionText,
+  ]);
+
   const rsp = await llm.completeJson<{
     alpha: unknown;
     usable: unknown;
     reason?: unknown;
   }>(
     [
       { role: "system", content: REFLECTION_SCORE_PROMPT.system },
+      { role: "system", content: languageSteeringLine(stepLang) },
       { role: "user", content: userPayload },
     ],
     {

diff --git a/apps/memos-local-plugin/core/capture/batch-scorer.ts b/apps/memos-local-plugin/core/capture/batch-scorer.ts
@@ -30,6 +30,10 @@
 
 import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js";
 import type { LlmClient } from "../llm/index.js";
+import {
+  detectDominantLanguage,
+  languageSteeringLine,
+} from "../llm/prompts/index.js";
 import { BATCH_REFLECTION_PROMPT } from "../llm/prompts/reflection.js";
 import { rootLogger } from "../logger/index.js";
 import type { NormalizedStep, ReflectionScore } from "./types.js";
@@ -131,9 +135,23 @@ export async function batchScoreReflections(
     })),
   };
 
+  // Reflections are first-person narrations — written in the same
+  // language the user + agent were speaking so the Memories panel
+  // stays coherent. Detect once per batch from the aggregate turn
+  // texts; all steps in one episode share a language in practice.
+  const reflectionLang = detectDominantLanguage(
+    inputs.flatMap((i) => [
+      i.step.userText,
+      i.step.agentText,
+      i.step.agentThinking,
+      i.existingReflection,
+    ]),
+  );
+
   const rsp = await llm.completeJson<BatchPayload>(
     [
       { role: "system", content: BATCH_REFLECTION_PROMPT.system },
+      { role: "system", content: languageSteeringLine(reflectionLang) },
       { role: "user", content: JSON.stringify(payload) },
     ],
     {

diff --git a/apps/memos-local-plugin/core/capture/capture.ts b/apps/memos-local-plugin/core/capture/capture.ts
@@ -534,6 +534,12 @@ export function createCaptureRunner(deps: CaptureDeps): CaptureRunner {
       }),
       vecSummary: t.vecSummary,
       vecAction: t.vecAction,
+      // step-extractor stamps every sub-step that came from the same
+      // user message with a stable `turnId` (= the user turn's ts).
+      // The viewer collapses rows with identical (episodeId, turnId)
+      // into a single "one round = one memory" card; algorithm-side
+      // machinery ignores the field.
+      turnId: pickTurnId(t.meta, t.ts),
       schemaVersion: 1,
     }));
   }
@@ -798,3 +804,14 @@ function errDetail(err: unknown): Record<string, unknown> {
   if (err instanceof Error) return { name: err.name, message: err.message };
   return { value: String(err) };
 }
+
+/**
+ * Pull the `turnId` stamped by `step-extractor` out of the
+ * `StepCandidate.meta` blob. Falls back to the trace's own `ts` so
+ * old fixtures that pre-date the field still group as a singleton
+ * (one row → one card). Always returns a finite number.
+ */
+function pickTurnId(meta: Record<string, unknown> | undefined, fallbackTs: number): number {
+  const raw = (meta as Record<string, unknown> | undefined)?.turnId;
+  return typeof raw === "number" && Number.isFinite(raw) ? raw : fallbackTs;
+}
diff --git a/apps/memos-local-plugin/core/capture/step-extractor.ts b/apps/memos-local-plugin/core/capture/step-extractor.ts
@@ -17,6 +17,11 @@
  *   - `toolCalls` = [single ToolCallDTO with input + output]
  *   - `agentThinking` = model thinking (first sub-step only, since
  *     the host provides thinking as a single blob)
+ *   - `meta.turnId` = the user turn's `ts`. Stable identifier shared
+ *     by every sub-step that came from the same user message — the
+ *     viewer uses it to collapse the row of sub-steps back into a
+ *     single "one round = one memory" card while the algorithm pipe-
+ *     line keeps operating on the step-level traces.
  *
  * This matches the algorithm spec `f(1)_{k,t} = (s, a, o, ρ, r)` where
  * each tool invocation is an independent action `a` with its own
@@ -68,7 +73,7 @@ export function extractSteps(episode: EpisodeSnapshot): StepCandidate[] {
         rawReflection: null,
         depth: depthFromMeta(episode.meta),
         isSubagent: Boolean(episode.meta.isSubagent),
-        meta: { synthetic: true },
+        meta: { synthetic: true, turnId: firstUser.ts },
       });
     }
   }
@@ -96,11 +101,17 @@ function segmentToSteps(
   const thinkingParts: string[] = [];
   let rawReflection: string | null = null;
   let segMeta: Record<string, unknown> = {};
+  // Stable id shared by every sub-step of the same user message.
+  // Defaults to the first user turn's `ts`; falls back to the first
+  // turn's `ts` for assistant-only segments (rare, but the synthetic
+  // step path also relies on this).
+  let turnId: EpochMs | null = null;
 
   for (const turn of turns) {
     switch (turn.role) {
       case "user":
         userTexts.push(turn.content);
+        if (turnId === null) turnId = turn.ts;
         break;
       case "tool":
         toolTurns.push(turn);
@@ -127,6 +138,10 @@ function segmentToSteps(
   const depth = depthFromMeta({ ...episode.meta, ...segMeta });
   const isSubagent = Boolean(segMeta.isSubagent ?? episode.meta.isSubagent);
   const fullThinking = thinkingParts.join("\n\n").trim() || null;
+  // Fallback if the segment had no user turn (assistant-only segment
+  // produced by some adapters): anchor turnId on the first turn we
+  // ever saw so downstream group_by still has something stable.
+  const segTurnId: EpochMs = (turnId ?? turns[0]!.ts);
 
   // ─── No tool calls → single step (backward compatible) ────────
   if (toolTurns.length === 0) {
@@ -146,7 +161,7 @@ function segmentToSteps(
       rawReflection,
       depth,
       isSubagent,
-      meta: segMeta,
+      meta: { ...segMeta, turnId: segTurnId },
     }];
   }
 
@@ -186,7 +201,13 @@ function segmentToSteps(
       rawReflection: null,
       depth,
       isSubagent,
-      meta: { ...segMeta, subStep: true, subStepIdx: i, subStepTotal: total },
+      meta: {
+        ...segMeta,
+        subStep: true,
+        subStepIdx: i,
+        subStepTotal: total,
+        turnId: segTurnId,
+      },
     });
   }
 
@@ -202,7 +223,13 @@ function segmentToSteps(
       rawReflection,
       depth,
       isSubagent,
-      meta: { ...segMeta, subStep: true, subStepIdx: toolTurns.length, subStepTotal: total },
+      meta: {
+        ...segMeta,
+        subStep: true,
+        subStepIdx: toolTurns.length,
+        subStepTotal: total,
+        turnId: segTurnId,
+      },
     });
   }
 

diff --git a/apps/memos-local-plugin/core/llm/prompts/index.ts b/apps/memos-local-plugin/core/llm/prompts/index.ts
@@ -37,3 +37,48 @@ export function languageSteeringLine(lang: "auto" | "zh" | "en"): string {
       return "Answer in the same natural language the user used. Do not mix languages.";
   }
 }
+
+/**
+ * Detect the dominant natural language of a set of text samples.
+ *
+ * Used by knowledge-generation callers (skill crystallization, L2
+ * induction, L3 abstraction, reflection synthesis) to decide whether to
+ * emit the generated knowledge in Chinese or English, matching the
+ * user's original query/evidence language.
+ *
+ * Heuristic:
+ *   - Count CJK Unified Ideographs (U+4E00..U+9FFF) as `zh`.
+ *   - Count ASCII letters A-Z/a-z as `en`.
+ *   - If total signal is too small (< `minSignal`), fall back to
+ *     "auto" — caller will emit a neutral "match user language"
+ *     directive.
+ *   - Otherwise if ≥ 20% of the counted signal is CJK, pick "zh"
+ *     (Chinese is very information-dense per character and tends to
+ *     be interleaved with ASCII tokens like filenames/commands).
+ *   - Else if ≥ 70% is ASCII letters, pick "en".
+ *   - Otherwise "auto".
+ *
+ * Deliberately small and allocation-free — this runs on every
+ * knowledge-generation LLM call.
+ */
+export function detectDominantLanguage(
+  samples: ReadonlyArray<string | null | undefined>,
+  opts: { minSignal?: number } = {},
+): "auto" | "zh" | "en" {
+  const minSignal = opts.minSignal ?? 8;
+  let zh = 0;
+  let en = 0;
+  for (const s of samples) {
+    if (!s) continue;
+    for (let i = 0; i < s.length; i++) {
+      const code = s.charCodeAt(i);
+      if (code >= 0x4e00 && code <= 0x9fff) zh++;
+      else if ((code >= 0x41 && code <= 0x5a) || (code >= 0x61 && code <= 0x7a)) en++;
+    }
+  }
+  const total = zh + en;
+  if (total < minSignal) return "auto";
+  if (zh / total >= 0.2) return "zh";
+  if (en / total >= 0.7) return "en";
+  return "auto";
+}
diff --git a/apps/memos-local-plugin/core/llm/prompts/l2-induction.ts b/apps/memos-local-plugin/core/llm/prompts/l2-induction.ts
@@ -7,30 +7,92 @@ import type { PromptDef } from "./index.js";
  * state + similar action), distill a candidate L2 policy that describes
  * "when you see X, prefer Y because Z". The candidate is still probationary
  * until the evaluator confirms it raises task success.
+ *
+ * Boundary contract (see `docs/GRANULARITY-AND-MEMORY-LAYERS.md` §6):
+ * an L2 policy is **procedural** ("how to do it") — it MUST contain an
+ * action template. Anything declarative ("the environment looks like X")
+ * belongs to the L3 world model, not here. The system prompt explicitly
+ * rejects environment-fact drift to keep the two layers semantically
+ * orthogonal. Bumping the version to v2 captures that change.
  */
 export const L2_INDUCTION_PROMPT: PromptDef = {
   id: "l2.induction",
-  version: 1,
-  description: "Distill an L2 policy from a cluster of similar L1 traces.",
-  system: `You induce reusable policies from agent experience.
+  version: 2,
+  description:
+    "Distill an L2 policy (procedural sub-task strategy) from a cluster of similar L1 traces, with explicit boundaries against L3 world-model drift.",
+  system: `You induce reusable **procedural policies** from agent experience.
+
+A policy is a "how-to": "when you see condition X in the agent's state,
+do action Y, verify with Z, watch out for caveat W." It is **NOT** a
+description of the environment.
 
 Input TRACES: a list of { state_summary, action, outcome, utility } records
 that all share a similar state signature.
 
-Produce ONE policy describing the pattern, ready to be referenced later by
-future turns. The policy must:
-- Name a TRIGGER condition recognizable from state alone.
-- Prescribe an ACTION template (not a single exact command).
-- Note at least one CAVEAT or failure mode observed in the traces.
-- Not restate a single example — generalize.
+Produce ONE policy describing the action pattern. The policy must:
+- Name a TRIGGER recognizable from the agent's STATE — a condition the
+  agent can detect at the moment of decision (an error code, a missing
+  file, a request shape). NOT a fact about the environment in general.
+- Prescribe an ACTION template — a parameterized step or short step
+  sequence. Templates over single exact commands. NOT a single example.
+- Note at least one CAVEAT or failure mode observed in the traces — a
+  step-level pitfall, NOT a generic environment taboo.
+- Generalize across the input traces, not restate one of them.
+
+──────────────────── Boundaries — what NOT to write ────────────────────
+
+This output is a **procedural policy**, not an environment world model.
+The world model lives in a separate layer (L3) generated by a different
+prompt. Cross-contamination on either side dilutes both.
+
+Do NOT write any of these — they belong to L3 (env world model), not here:
+  - Topology facts: "Alpine containers ship musl libc"
+                    "Python deps form a 3-layer stack"
+                    "src/components/ holds React components"
+  - Environment behavioural rules (in pure declarative form):
+                    "binary wheels are incompatible with musl"
+                    "the service reads config only at startup"
+  - Environment taboos detached from a specific action choice:
+                    "this directory is read-only"
+                    "production tables shouldn't be DROPped lightly"
+
+If a trace tells you the environment looks a certain way, FOLD that fact
+INTO the trigger or caveat as a state-level CONDITION the agent can
+check, not as a standalone description. Example:
+
+  Wrong (drifts into env-fact):
+    trigger:  "Alpine ships musl libc"
+    caveats:  ["Python deps have a 3-layer stack"]
+
+  Right (states it as actionable conditions):
+    trigger:  "container is Alpine AND pip install fails with
+               '<lib> not found' or 'header not found'"
+    caveats:  ["if first apk add still fails, also check musl-vs-glibc
+               wheel compatibility before retrying"]
+
+──────────────────── Same fact, two framings ─────────────────────
+
+If the underlying truth is "Alpine containers don't ship system dev
+libs by default":
+
+  Express here (procedural):
+    "When pip install fails inside an Alpine container with a missing
+     system library, run apk add <pkg>-dev then retry pip."
+
+  Do NOT express here (declarative — that's L3's job):
+    "Alpine container images ship only the pure-Python tier of the
+     Python dependency stack."
+
+──────────────────── Output ─────────────────────
 
 Return JSON:
 {
   "title": "short imperative title",
-  "trigger": "when should this policy fire?",
-  "action": "what to do, templated",
-  "rationale": "why this works, grounded in the traces",
-  "caveats": ["caveat string", ...],
+  "trigger": "state-level condition the agent can detect",
+  "action": "templated step or step sequence",
+  "rationale": "why this action works ON THESE TRACES (not why the
+                environment behaves this way)",
+  "caveats": ["step-level pitfall string", ...],
   "confidence": number in [0, 1],
   "support_trace_ids": ["tr_...", ...]
 }`,