diff --git a/apps/memos-local-plugin/agent-contract/dto.ts b/apps/memos-local-plugin/agent-contract/dto.ts index 7b2353a71..c5f883aa6 100644 --- a/apps/memos-local-plugin/agent-contract/dto.ts +++ b/apps/memos-local-plugin/agent-contract/dto.ts @@ -143,6 +143,17 @@ export interface TraceDTO { rHuman?: Reward; /** Cached priority used for L2 candidate selection. */ priority: number; + /** + * Stable group key shared by every L1 trace produced from the same + * user message. Equal to the user turn's `ts` (epoch ms). The + * viewer collapses rows with identical `(episodeId, turnId)` into + * a single "one round = one memory" card; algorithm-side machinery + * (V/α/L2/Tier 2/Decision Repair) ignores the field. + * + * Optional because rows written before migration 013 have NULL + * `turn_id`; the viewer falls back to per-row rendering for them. + */ + turnId?: EpochMs | null; } /** diff --git a/apps/memos-local-plugin/core/capture/alpha-scorer.ts b/apps/memos-local-plugin/core/capture/alpha-scorer.ts index e165b9fa6..c4346396c 100644 --- a/apps/memos-local-plugin/core/capture/alpha-scorer.ts +++ b/apps/memos-local-plugin/core/capture/alpha-scorer.ts @@ -17,6 +17,10 @@ import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js"; import type { LlmClient } from "../llm/index.js"; +import { + detectDominantLanguage, + languageSteeringLine, +} from "../llm/prompts/index.js"; import { REFLECTION_SCORE_PROMPT } from "../llm/prompts/reflection.js"; import { rootLogger } from "../logger/index.js"; import type { NormalizedStep, ReflectionScore } from "./types.js"; @@ -69,6 +73,15 @@ export async function scoreReflection( .filter(Boolean) .join("\n"); + // Match the `reason` string's language to the step's own language so + // the Memories viewer doesn't mix 中文 + English per row. + const stepLang = detectDominantLanguage([ + input.step.userText, + input.step.agentText, + input.step.agentThinking, + input.reflectionText, + ]); + const rsp = await llm.completeJson<{ alpha: unknown; usable: unknown; @@ -76,6 +89,7 @@ export async function scoreReflection( }>( [ { role: "system", content: REFLECTION_SCORE_PROMPT.system }, + { role: "system", content: languageSteeringLine(stepLang) }, { role: "user", content: userPayload }, ], { diff --git a/apps/memos-local-plugin/core/capture/batch-scorer.ts b/apps/memos-local-plugin/core/capture/batch-scorer.ts index 1dcf0f9e5..6a35c5806 100644 --- a/apps/memos-local-plugin/core/capture/batch-scorer.ts +++ b/apps/memos-local-plugin/core/capture/batch-scorer.ts @@ -30,6 +30,10 @@ import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js"; import type { LlmClient } from "../llm/index.js"; +import { + detectDominantLanguage, + languageSteeringLine, +} from "../llm/prompts/index.js"; import { BATCH_REFLECTION_PROMPT } from "../llm/prompts/reflection.js"; import { rootLogger } from "../logger/index.js"; import type { NormalizedStep, ReflectionScore } from "./types.js"; @@ -131,9 +135,23 @@ export async function batchScoreReflections( })), }; + // Reflections are first-person narrations — written in the same + // language the user + agent were speaking so the Memories panel + // stays coherent. Detect once per batch from the aggregate turn + // texts; all steps in one episode share a language in practice. + const reflectionLang = detectDominantLanguage( + inputs.flatMap((i) => [ + i.step.userText, + i.step.agentText, + i.step.agentThinking, + i.existingReflection, + ]), + ); + const rsp = await llm.completeJson( [ { role: "system", content: BATCH_REFLECTION_PROMPT.system }, + { role: "system", content: languageSteeringLine(reflectionLang) }, { role: "user", content: JSON.stringify(payload) }, ], { diff --git a/apps/memos-local-plugin/core/capture/capture.ts b/apps/memos-local-plugin/core/capture/capture.ts index 8bae80169..fd1f55168 100644 --- a/apps/memos-local-plugin/core/capture/capture.ts +++ b/apps/memos-local-plugin/core/capture/capture.ts @@ -534,6 +534,12 @@ export function createCaptureRunner(deps: CaptureDeps): CaptureRunner { }), vecSummary: t.vecSummary, vecAction: t.vecAction, + // step-extractor stamps every sub-step that came from the same + // user message with a stable `turnId` (= the user turn's ts). + // The viewer collapses rows with identical (episodeId, turnId) + // into a single "one round = one memory" card; algorithm-side + // machinery ignores the field. + turnId: pickTurnId(t.meta, t.ts), schemaVersion: 1, })); } @@ -798,3 +804,14 @@ function errDetail(err: unknown): Record { if (err instanceof Error) return { name: err.name, message: err.message }; return { value: String(err) }; } + +/** + * Pull the `turnId` stamped by `step-extractor` out of the + * `StepCandidate.meta` blob. Falls back to the trace's own `ts` so + * old fixtures that pre-date the field still group as a singleton + * (one row → one card). Always returns a finite number. + */ +function pickTurnId(meta: Record | undefined, fallbackTs: number): number { + const raw = (meta as Record | undefined)?.turnId; + return typeof raw === "number" && Number.isFinite(raw) ? raw : fallbackTs; +} diff --git a/apps/memos-local-plugin/core/capture/step-extractor.ts b/apps/memos-local-plugin/core/capture/step-extractor.ts index 2e566930f..fd0137b60 100644 --- a/apps/memos-local-plugin/core/capture/step-extractor.ts +++ b/apps/memos-local-plugin/core/capture/step-extractor.ts @@ -17,6 +17,11 @@ * - `toolCalls` = [single ToolCallDTO with input + output] * - `agentThinking` = model thinking (first sub-step only, since * the host provides thinking as a single blob) + * - `meta.turnId` = the user turn's `ts`. Stable identifier shared + * by every sub-step that came from the same user message — the + * viewer uses it to collapse the row of sub-steps back into a + * single "one round = one memory" card while the algorithm pipe- + * line keeps operating on the step-level traces. * * This matches the algorithm spec `f(1)_{k,t} = (s, a, o, ρ, r)` where * each tool invocation is an independent action `a` with its own @@ -68,7 +73,7 @@ export function extractSteps(episode: EpisodeSnapshot): StepCandidate[] { rawReflection: null, depth: depthFromMeta(episode.meta), isSubagent: Boolean(episode.meta.isSubagent), - meta: { synthetic: true }, + meta: { synthetic: true, turnId: firstUser.ts }, }); } } @@ -96,11 +101,17 @@ function segmentToSteps( const thinkingParts: string[] = []; let rawReflection: string | null = null; let segMeta: Record = {}; + // Stable id shared by every sub-step of the same user message. + // Defaults to the first user turn's `ts`; falls back to the first + // turn's `ts` for assistant-only segments (rare, but the synthetic + // step path also relies on this). + let turnId: EpochMs | null = null; for (const turn of turns) { switch (turn.role) { case "user": userTexts.push(turn.content); + if (turnId === null) turnId = turn.ts; break; case "tool": toolTurns.push(turn); @@ -127,6 +138,10 @@ function segmentToSteps( const depth = depthFromMeta({ ...episode.meta, ...segMeta }); const isSubagent = Boolean(segMeta.isSubagent ?? episode.meta.isSubagent); const fullThinking = thinkingParts.join("\n\n").trim() || null; + // Fallback if the segment had no user turn (assistant-only segment + // produced by some adapters): anchor turnId on the first turn we + // ever saw so downstream group_by still has something stable. + const segTurnId: EpochMs = (turnId ?? turns[0]!.ts); // ─── No tool calls → single step (backward compatible) ──────── if (toolTurns.length === 0) { @@ -146,7 +161,7 @@ function segmentToSteps( rawReflection, depth, isSubagent, - meta: segMeta, + meta: { ...segMeta, turnId: segTurnId }, }]; } @@ -186,7 +201,13 @@ function segmentToSteps( rawReflection: null, depth, isSubagent, - meta: { ...segMeta, subStep: true, subStepIdx: i, subStepTotal: total }, + meta: { + ...segMeta, + subStep: true, + subStepIdx: i, + subStepTotal: total, + turnId: segTurnId, + }, }); } @@ -202,7 +223,13 @@ function segmentToSteps( rawReflection, depth, isSubagent, - meta: { ...segMeta, subStep: true, subStepIdx: toolTurns.length, subStepTotal: total }, + meta: { + ...segMeta, + subStep: true, + subStepIdx: toolTurns.length, + subStepTotal: total, + turnId: segTurnId, + }, }); } diff --git a/apps/memos-local-plugin/core/llm/prompts/index.ts b/apps/memos-local-plugin/core/llm/prompts/index.ts index ee03247ed..a809c8800 100644 --- a/apps/memos-local-plugin/core/llm/prompts/index.ts +++ b/apps/memos-local-plugin/core/llm/prompts/index.ts @@ -37,3 +37,48 @@ export function languageSteeringLine(lang: "auto" | "zh" | "en"): string { return "Answer in the same natural language the user used. Do not mix languages."; } } + +/** + * Detect the dominant natural language of a set of text samples. + * + * Used by knowledge-generation callers (skill crystallization, L2 + * induction, L3 abstraction, reflection synthesis) to decide whether to + * emit the generated knowledge in Chinese or English, matching the + * user's original query/evidence language. + * + * Heuristic: + * - Count CJK Unified Ideographs (U+4E00..U+9FFF) as `zh`. + * - Count ASCII letters A-Z/a-z as `en`. + * - If total signal is too small (< `minSignal`), fall back to + * "auto" — caller will emit a neutral "match user language" + * directive. + * - Otherwise if ≥ 20% of the counted signal is CJK, pick "zh" + * (Chinese is very information-dense per character and tends to + * be interleaved with ASCII tokens like filenames/commands). + * - Else if ≥ 70% is ASCII letters, pick "en". + * - Otherwise "auto". + * + * Deliberately small and allocation-free — this runs on every + * knowledge-generation LLM call. + */ +export function detectDominantLanguage( + samples: ReadonlyArray, + opts: { minSignal?: number } = {}, +): "auto" | "zh" | "en" { + const minSignal = opts.minSignal ?? 8; + let zh = 0; + let en = 0; + for (const s of samples) { + if (!s) continue; + for (let i = 0; i < s.length; i++) { + const code = s.charCodeAt(i); + if (code >= 0x4e00 && code <= 0x9fff) zh++; + else if ((code >= 0x41 && code <= 0x5a) || (code >= 0x61 && code <= 0x7a)) en++; + } + } + const total = zh + en; + if (total < minSignal) return "auto"; + if (zh / total >= 0.2) return "zh"; + if (en / total >= 0.7) return "en"; + return "auto"; +} diff --git a/apps/memos-local-plugin/core/llm/prompts/l2-induction.ts b/apps/memos-local-plugin/core/llm/prompts/l2-induction.ts index fa8c924d6..beed29fa5 100644 --- a/apps/memos-local-plugin/core/llm/prompts/l2-induction.ts +++ b/apps/memos-local-plugin/core/llm/prompts/l2-induction.ts @@ -7,30 +7,92 @@ import type { PromptDef } from "./index.js"; * state + similar action), distill a candidate L2 policy that describes * "when you see X, prefer Y because Z". The candidate is still probationary * until the evaluator confirms it raises task success. + * + * Boundary contract (see `docs/GRANULARITY-AND-MEMORY-LAYERS.md` §6): + * an L2 policy is **procedural** ("how to do it") — it MUST contain an + * action template. Anything declarative ("the environment looks like X") + * belongs to the L3 world model, not here. The system prompt explicitly + * rejects environment-fact drift to keep the two layers semantically + * orthogonal. Bumping the version to v2 captures that change. */ export const L2_INDUCTION_PROMPT: PromptDef = { id: "l2.induction", - version: 1, - description: "Distill an L2 policy from a cluster of similar L1 traces.", - system: `You induce reusable policies from agent experience. + version: 2, + description: + "Distill an L2 policy (procedural sub-task strategy) from a cluster of similar L1 traces, with explicit boundaries against L3 world-model drift.", + system: `You induce reusable **procedural policies** from agent experience. + +A policy is a "how-to": "when you see condition X in the agent's state, +do action Y, verify with Z, watch out for caveat W." It is **NOT** a +description of the environment. Input TRACES: a list of { state_summary, action, outcome, utility } records that all share a similar state signature. -Produce ONE policy describing the pattern, ready to be referenced later by -future turns. The policy must: -- Name a TRIGGER condition recognizable from state alone. -- Prescribe an ACTION template (not a single exact command). -- Note at least one CAVEAT or failure mode observed in the traces. -- Not restate a single example — generalize. +Produce ONE policy describing the action pattern. The policy must: +- Name a TRIGGER recognizable from the agent's STATE — a condition the + agent can detect at the moment of decision (an error code, a missing + file, a request shape). NOT a fact about the environment in general. +- Prescribe an ACTION template — a parameterized step or short step + sequence. Templates over single exact commands. NOT a single example. +- Note at least one CAVEAT or failure mode observed in the traces — a + step-level pitfall, NOT a generic environment taboo. +- Generalize across the input traces, not restate one of them. + +──────────────────── Boundaries — what NOT to write ──────────────────── + +This output is a **procedural policy**, not an environment world model. +The world model lives in a separate layer (L3) generated by a different +prompt. Cross-contamination on either side dilutes both. + +Do NOT write any of these — they belong to L3 (env world model), not here: + - Topology facts: "Alpine containers ship musl libc" + "Python deps form a 3-layer stack" + "src/components/ holds React components" + - Environment behavioural rules (in pure declarative form): + "binary wheels are incompatible with musl" + "the service reads config only at startup" + - Environment taboos detached from a specific action choice: + "this directory is read-only" + "production tables shouldn't be DROPped lightly" + +If a trace tells you the environment looks a certain way, FOLD that fact +INTO the trigger or caveat as a state-level CONDITION the agent can +check, not as a standalone description. Example: + + Wrong (drifts into env-fact): + trigger: "Alpine ships musl libc" + caveats: ["Python deps have a 3-layer stack"] + + Right (states it as actionable conditions): + trigger: "container is Alpine AND pip install fails with + ' not found' or 'header not found'" + caveats: ["if first apk add still fails, also check musl-vs-glibc + wheel compatibility before retrying"] + +──────────────────── Same fact, two framings ───────────────────── + +If the underlying truth is "Alpine containers don't ship system dev +libs by default": + + Express here (procedural): + "When pip install fails inside an Alpine container with a missing + system library, run apk add -dev then retry pip." + + Do NOT express here (declarative — that's L3's job): + "Alpine container images ship only the pure-Python tier of the + Python dependency stack." + +──────────────────── Output ───────────────────── Return JSON: { "title": "short imperative title", - "trigger": "when should this policy fire?", - "action": "what to do, templated", - "rationale": "why this works, grounded in the traces", - "caveats": ["caveat string", ...], + "trigger": "state-level condition the agent can detect", + "action": "templated step or step sequence", + "rationale": "why this action works ON THESE TRACES (not why the + environment behaves this way)", + "caveats": ["step-level pitfall string", ...], "confidence": number in [0, 1], "support_trace_ids": ["tr_...", ...] }`, diff --git a/apps/memos-local-plugin/core/llm/prompts/l3-abstraction.ts b/apps/memos-local-plugin/core/llm/prompts/l3-abstraction.ts index 5e1aa5651..193927a89 100644 --- a/apps/memos-local-plugin/core/llm/prompts/l3-abstraction.ts +++ b/apps/memos-local-plugin/core/llm/prompts/l3-abstraction.ts @@ -9,43 +9,97 @@ import type { PromptDef } from "./index.js"; * * Output must follow the V7 triple (ℰ, ℐ, C): * - environment: topology facts ("src/ contains components/, utils/, …") - * - inference: behavioural rules ("pip fails in alpine → musl wheels") + * - inference: behavioural rules ("Alpine ships musl libc; binary + * wheels built against glibc fail to load") * - constraints: taboos ("don't edit node_modules/") * * The LLM also names up to 4 `domain_tags` — stable short strings * (`docker`, `node`, `npm`) we use for Tier-3 retrieval and for merging * future world models into the same row. * - * We deliberately do NOT include `procedure` / `action` fields here — - * that is L2's job. A good world model generalises above actions. + * Boundary contract (see `docs/GRANULARITY-AND-MEMORY-LAYERS.md` §6): + * A world model is **declarative** ("how the environment is"), not + * **procedural** ("what to do"). Procedural knowledge belongs to the + * L2 layer; this prompt explicitly rejects action-prescription drift to + * keep the two layers semantically orthogonal. Bumping to v2 captures + * that change. */ export const L3_ABSTRACTION_PROMPT: PromptDef = { id: "l3.abstraction", - version: 1, - description: "Distill an L3 world model from a cluster of L2 policies.", + version: 2, + description: + "Distill an L3 world model (declarative environment knowledge) from a cluster of L2 policies, with explicit boundaries against L2 procedural drift.", system: `You abstract environment world models from cross-task policy evidence. -Input POLICIES: a list of L2 policies (with trigger / procedure / verification / -boundary / support / gain), plus a short sample of the L1 traces that minted -each. Every policy shares a compatible domain (matched by primary tag / tool). +A world model is **declarative** knowledge about how the environment IS: +its topology, its causal/behavioural regularities, its taboos. It is +**NOT** a recipe for what to do — that lives in the L2 procedural +layer, generated by a separate prompt. Cross-contamination on either +side dilutes both. + +Input POLICIES: a list of L2 policies (with trigger / procedure / +verification / boundary / support / gain), plus a short sample of the L1 +traces that minted each. Every policy shares a compatible domain +(matched by primary tag / tool). Produce ONE world model describing the **environment** those policies operate in. It must answer: - - Environment topology (ℰ) — what lives where, what is the shape of - this environment? (e.g. "Alpine containers ship musl libc, no - pre-built binary wheels"; "Node repos group code under src/") - - Inference rules (ℐ) — how does the environment typically respond - to common actions? (e.g. "pip install fails → compile path needs - dev libs"; "npm publish rejects scope mismatch") - - Constraints (C) — what must you NOT do here? (e.g. "don't - edit node_modules/ directly"; "don't use binary wheels on musl") - -Do NOT: - - Prescribe a procedure — that belongs to L2. + - Environment topology (ℰ) — what lives where, what is the shape of + this environment? Pure facts of existence and structure. + GOOD: "Alpine containers ship musl libc, no glibc" + "Node project repos group source under src/" + "macOS bundles BSD sed; Linux distros bundle GNU sed" + BAD (drifts into procedure): + "use apk add to install system libs" + "prefer Python scripts over sed on macOS" + + - Inference rules (ℐ) — how does the environment causally respond to + common stimuli? Phrase as cause→effect, NOT as guidance. + GOOD: "loading a glibc-linked binary wheel inside Alpine raises a + dynamic-link error" + "editing config.yaml does not propagate until the process + restarts (no in-process watcher)" + BAD (drifts into procedure): + "if pip install fails, install dev libs and retry" + ← that's an action plan, belongs to L2 + "always restart the service after editing config" + ← that's a recommendation, belongs to L2 + + - Constraints (C) — what facts of the environment make some actions + unsafe or invalid? State the FACT, not the avoidance behavior. + GOOD: "node_modules/ is rewritten by npm install; manual edits are + lost on the next sync" + "production database tables hold customer data; destructive + DDL is irreversible" + BAD (drifts into procedure): + "don't edit node_modules/ directly" + ← that's a behavioural rule, belongs to L2 / decision repair + "don't run DROP TABLE in production" + ← same — phrase the underlying environment fact instead + +Do NOT, under any section: + - Use imperative or recommendation verbs (do / don't / should / use / + prefer / avoid / try / install / run). The world model never tells + the agent what to do. - Restate a single trace — the model must generalise across policies. - Include advice tied to a single user or session. +──────────────────── Same fact, two framings ───────────────────── + +If the underlying truth is "Alpine containers don't ship system dev +libs by default": + + Express here (declarative): + inference: "Python C-extension packages fail to compile in Alpine + containers when the matching system header / library + package is not pre-installed in the image." + + Do NOT express here (procedural — that's L2's job): + "When pip fails in Alpine, apk add -dev and retry pip." + +──────────────────── Output ───────────────────── + Return JSON: { "title": "short noun phrase, e.g. 'Alpine python dependency model'", diff --git a/apps/memos-local-plugin/core/memory/l2/induce.ts b/apps/memos-local-plugin/core/memory/l2/induce.ts index 97d90c702..d5db18fd1 100644 --- a/apps/memos-local-plugin/core/memory/l2/induce.ts +++ b/apps/memos-local-plugin/core/memory/l2/induce.ts @@ -11,6 +11,10 @@ */ import { ERROR_CODES, MemosError } from "../../../agent-contract/errors.js"; +import { + detectDominantLanguage, + languageSteeringLine, +} from "../../llm/prompts/index.js"; import { L2_INDUCTION_PROMPT } from "../../llm/prompts/l2-induction.js"; import type { LlmClient } from "../../llm/index.js"; import type { Logger } from "../../logger/types.js"; @@ -58,6 +62,13 @@ export async function induceDraft( const userPayload = packTraces(input.evidenceTraces, input.charCap, input.signatureLabel); + // Match the induced policy's title/trigger/action/rationale to the + // dominant language of the evidence bucket — Chinese users expect + // their own L2 memories in 中文, English users expect English. + const evidenceLang = detectDominantLanguage( + input.evidenceTraces.flatMap((t) => [t.userText, t.agentText, t.reflection]), + ); + try { const rsp = await llm.completeJson<{ title: unknown; @@ -72,6 +83,7 @@ export async function induceDraft( }>( [ { role: "system", content: L2_INDUCTION_PROMPT.system }, + { role: "system", content: languageSteeringLine(evidenceLang) }, { role: "user", content: userPayload }, ], { diff --git a/apps/memos-local-plugin/core/memory/l3/abstract.ts b/apps/memos-local-plugin/core/memory/l3/abstract.ts index d778ed709..a38cd38f1 100644 --- a/apps/memos-local-plugin/core/memory/l3/abstract.ts +++ b/apps/memos-local-plugin/core/memory/l3/abstract.ts @@ -7,6 +7,10 @@ */ import { ERROR_CODES, MemosError } from "../../../agent-contract/errors.js"; +import { + detectDominantLanguage, + languageSteeringLine, +} from "../../llm/prompts/index.js"; import { L3_ABSTRACTION_PROMPT } from "../../llm/prompts/l3-abstraction.js"; import type { LlmClient } from "../../llm/index.js"; import type { Logger } from "../../logger/types.js"; @@ -55,10 +59,24 @@ export async function abstractDraft( const userPayload = packPrompt(input, config); + // Pick the world-model's rendering language from the underlying + // policies + trace evidence. A Chinese user generating "docker alpine + // 依赖" policies should see the environment/inference/constraint bullets + // written in Chinese; an English user should see them in English. + const langSamples: Array = []; + for (const p of input.cluster.policies) { + langSamples.push(p.title, p.trigger, p.procedure, p.boundary, p.verification); + } + for (const traces of input.evidenceByPolicy.values()) { + for (const t of traces) langSamples.push(t.userText, t.agentText, t.reflection); + } + const evidenceLang = detectDominantLanguage(langSamples); + try { const rsp = await llm.completeJson>( [ { role: "system", content: L3_ABSTRACTION_PROMPT.system }, + { role: "system", content: languageSteeringLine(evidenceLang) }, { role: "user", content: userPayload }, ], { diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index 2d26d5670..8f5c1d60b 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -1708,6 +1708,7 @@ export function createMemoryCore( tags: [], vecSummary: null, vecAction: null, + turnId: dto.turnId ?? null, schemaVersion: 1, } as TraceRow); imported++; @@ -2083,6 +2084,7 @@ function traceRowToDTO(row: TraceRow): TraceDTO { alpha: row.alpha, rHuman: row.rHuman ?? undefined, priority: row.priority, + turnId: row.turnId ?? null, }; } diff --git a/apps/memos-local-plugin/core/retrieval/injector.ts b/apps/memos-local-plugin/core/retrieval/injector.ts index 9f6dd2f85..01e6432b2 100644 --- a/apps/memos-local-plugin/core/retrieval/injector.ts +++ b/apps/memos-local-plugin/core/retrieval/injector.ts @@ -256,19 +256,23 @@ function renderWorldModel(c: WorldModelCandidate): InjectionSnippet { * adapter so downstream prompts see the same shape): * * ``` - * ## User's conversation history (from memory system) + * # User's conversation history (from memory system) * * IMPORTANT: The following are facts from previous conversations with * this user. You MUST treat these as established knowledge and use them * directly when answering. Do NOT say you don't know if the answer is * in these memories. * + * ## Memories + * * 1. [Trace · 2026-03-05 10:12] * [user] 我喜欢的运动是游泳 * [assistant] 记住了。 * refId="trace_xyz" * - * 2. [Skill · Python dependency fix] (η=0.82) + * ## Skills + * + * 1. [Skill · Python dependency fix] (η=0.82) * When container pip fails, install -dev OS lib first … * refId="skill_abc" * @@ -302,10 +306,10 @@ function renderWholePacket( // call". The bodies already carry the per-skill `skill_get(...)` // hint, so the agent knows how to expand them on demand. parts.push( - "# Candidate skills (call `skill_get` to load any you decide to use)\n", + "## Candidate skills (call `skill_get` to load any you decide to use)\n", ); } else { - parts.push("# Skills\n"); + parts.push("## Skills\n"); } skills.forEach((s, i) => { parts.push(renderNumberedSnippet(s, i + 1)); @@ -313,14 +317,14 @@ function renderWholePacket( } if (traces.length > 0) { - parts.push("# Memories\n"); + parts.push("## Memories\n"); traces.forEach((s, i) => { parts.push(renderNumberedSnippet(s, i + 1)); }); } if (worlds.length > 0) { - parts.push("# Environment Knowledge\n"); + parts.push("## Environment Knowledge\n"); worlds.forEach((s, i) => { parts.push(renderNumberedSnippet(s, i + 1)); }); @@ -340,21 +344,21 @@ function renderNumberedSnippet(s: InjectionSnippet, n: number): string { const HEADER_BY_REASON: Record = { turn_start: - "## User's conversation history (from memory system)\n\n" + + "# User's conversation history (from memory system)\n\n" + "IMPORTANT: The following are facts from previous conversations with this user.\n" + "You MUST treat these as established knowledge and use them directly when answering.\n" + "Do NOT say you don't know or don't have information if the answer is in these memories.", tool_driven: - "## Memory search results\n\n" + + "# Memory search results\n\n" + "The memory tool returned the following hits. They are ranked by relevance.", skill_invoke: - "## Invoked skill\n\n" + + "# Invoked skill\n\n" + "Follow the procedure below; the verification step tells you when you're done.", sub_agent: - "## Parent-agent context\n\n" + + "# Parent-agent context\n\n" + "Relevant memory surfaced for this sub-agent's mission.", decision_repair: - "## Decision repair — please read before your next action\n\n" + + "# Decision repair — please read before your next action\n\n" + "You have failed this tool multiple times in a row. Below are preferred / avoided actions\n" + "distilled from similar past situations. Please adapt your plan accordingly.", }; diff --git a/apps/memos-local-plugin/core/skill/crystallize.ts b/apps/memos-local-plugin/core/skill/crystallize.ts index 3e67c2f83..8c954abb4 100644 --- a/apps/memos-local-plugin/core/skill/crystallize.ts +++ b/apps/memos-local-plugin/core/skill/crystallize.ts @@ -10,6 +10,10 @@ */ import type { LlmClient } from "../llm/types.js"; +import { + detectDominantLanguage, + languageSteeringLine, +} from "../llm/prompts/index.js"; import { SKILL_CRYSTALLIZE_PROMPT } from "../llm/prompts/skill-crystallize.js"; import type { Logger } from "../logger/types.js"; import type { PolicyRow, SkillRow, TraceRow } from "../types.js"; @@ -64,10 +68,22 @@ export async function crystallizeDraft( const userPayload = packPrompt(input, config); + // Detect the language of the evidence so the crystallised skill's + // human-facing fields (display_title, summary, preconditions, steps, + // examples) come out in the same language the user was using. The + // `name` slug stays snake_case regardless — enforced by `sanitiseName`. + const evidenceLang = detectDominantLanguage([ + input.policy.title, + input.policy.trigger, + input.policy.procedure, + ...input.evidence.flatMap((t) => [t.userText, t.agentText, t.reflection]), + ]); + try { const rsp = await llm.completeJson>( [ { role: "system", content: SKILL_CRYSTALLIZE_PROMPT.system }, + { role: "system", content: languageSteeringLine(evidenceLang) }, { role: "user", content: userPayload }, ], { diff --git a/apps/memos-local-plugin/core/storage/migrations/012-status-unification.sql b/apps/memos-local-plugin/core/storage/migrations/012-status-unification.sql index b20965f9c..fb6544d44 100644 --- a/apps/memos-local-plugin/core/storage/migrations/012-status-unification.sql +++ b/apps/memos-local-plugin/core/storage/migrations/012-status-unification.sql @@ -49,11 +49,15 @@ UPDATE skills -- ─── 2. Swap CHECK constraints in sqlite_master ─────────────────────── PRAGMA writable_schema = 1; +-- Note: SQLite treats double-quoted strings as identifiers in strict / +-- modern builds (it's also what `better-sqlite3` ≥ v11 enforces). All +-- string literals below use single quotes with inner quotes doubled +-- (standard SQL escape). UPDATE sqlite_master SET sql = replace( sql, - "CHECK (status IN ('candidate','active','retired'))", - "CHECK (status IN ('candidate','active','archived'))" + 'CHECK (status IN (''candidate'',''active'',''retired''))', + 'CHECK (status IN (''candidate'',''active'',''archived''))' ) WHERE type = 'table' AND name = 'policies'; @@ -61,8 +65,8 @@ UPDATE sqlite_master UPDATE sqlite_master SET sql = replace( sql, - "CHECK (status IN ('probationary','active','retired'))", - "CHECK (status IN ('candidate','active','archived'))" + 'CHECK (status IN (''probationary'',''active'',''retired''))', + 'CHECK (status IN (''candidate'',''active'',''archived''))' ) WHERE type = 'table' AND name = 'skills'; @@ -70,7 +74,7 @@ UPDATE sqlite_master -- Update default values so future INSERTs without a `status` land on -- `candidate` (skills used to default to `'probationary'`). UPDATE sqlite_master - SET sql = replace(sql, "DEFAULT 'probationary'", "DEFAULT 'candidate'") + SET sql = replace(sql, 'DEFAULT ''probationary''', 'DEFAULT ''candidate''') WHERE type = 'table' AND name = 'skills'; diff --git a/apps/memos-local-plugin/core/storage/migrations/013-trace-turn-id.sql b/apps/memos-local-plugin/core/storage/migrations/013-trace-turn-id.sql new file mode 100644 index 000000000..cd1a7f468 --- /dev/null +++ b/apps/memos-local-plugin/core/storage/migrations/013-trace-turn-id.sql @@ -0,0 +1,43 @@ +-- Migration 013 — add `turn_id` column to `traces`. +-- +-- What +-- ──── +-- A new INTEGER column (nullable) on `traces` that carries a stable +-- identifier shared by every L1 trace produced from the same user +-- message. Defaults to the user turn's `ts` (epoch ms). +-- +-- Why +-- ─── +-- V7 §0.1 keeps L1 traces at the step level (one tool call → one +-- trace, plus one trace for the final reply). Algorithm machinery — +-- reflection-weighted backprop, L2 incremental association, Tier-2 +-- error-signature retrieval, Decision Repair — all need that step +-- granularity. +-- +-- The viewer, however, wants to surface a coherent "one round = one +-- memory" card so users aren't drowned in N rows per question. The +-- frontend collapses sibling sub-steps into a single card by grouping +-- on `(episode_id, turn_id)`; this column is the stable group key +-- that survives reorderings, late-arriving rows, and partial +-- captures. +-- +-- Shape +-- ───── +-- `INTEGER NULL`. Filled in by `step-extractor` (writes the user +-- turn's `ts` into every sub-step's `meta.turnId`, which capture.ts +-- threads through to the row). Older rows from before this migration +-- stay NULL and the viewer falls back to per-row rendering for them. +-- +-- Indexing +-- ──────── +-- Indexed by `(episode_id, turn_id)` so the timeline endpoint can +-- group rows in a single scan without sorting in JS. +-- +-- FTS integration +-- ─────────────── +-- N/A — this is a numeric grouping key, not searchable text. + +ALTER TABLE traces ADD COLUMN turn_id INTEGER; + +CREATE INDEX IF NOT EXISTS idx_traces_episode_turn + ON traces(episode_id, turn_id, ts); diff --git a/apps/memos-local-plugin/core/storage/migrator.ts b/apps/memos-local-plugin/core/storage/migrator.ts index 33d0c5017..a6eb0c0a0 100644 --- a/apps/memos-local-plugin/core/storage/migrator.ts +++ b/apps/memos-local-plugin/core/storage/migrator.ts @@ -89,27 +89,43 @@ export function runMigrations(db: StorageDb, dir: string = defaultMigrationsDir( const applied: MigrationsResult["applied"] = []; let skipped = 0; - for (const file of allFiles) { - if (appliedVersions.has(file.version)) { - skipped++; - continue; + // better-sqlite3 ≥ v11 enables SQLITE_DBCONFIG_DEFENSIVE by default, which + // blocks writes to `sqlite_master` even when `PRAGMA writable_schema=ON`. + // A handful of migrations need that (e.g. 012 swaps CHECK constraints + // in-place). Migration files are shipped with the plugin and never user + // input, so turning unsafe mode on for the migration phase is safe. + // `.unsafeMode()` may not be toggled inside a transaction, so we flip it + // at the outer boundary. + const needsUnsafe = allFiles.some( + (f) => !appliedVersions.has(f.version) && migrationNeedsUnsafeMode(f.fullPath), + ); + if (needsUnsafe) db.raw.unsafeMode(true); + + try { + for (const file of allFiles) { + if (appliedVersions.has(file.version)) { + skipped++; + continue; + } + const sql = fs.readFileSync(file.fullPath, "utf8"); + const t0 = now(); + db.tx(() => { + db.exec(sql); + db.prepare( + `INSERT INTO schema_migrations (version, name, applied_at) VALUES (@version, @name, @applied_at)`, + ).run({ version: file.version, name: file.name, applied_at: now() }); + }); + const durationMs = now() - t0; + applied.push({ version: file.version, name: file.name, durationMs }); + log.info("migration.applied", { + version: file.version, + name: file.name, + durationMs, + file: path.basename(file.fullPath), + }); } - const sql = fs.readFileSync(file.fullPath, "utf8"); - const t0 = now(); - db.tx(() => { - db.exec(sql); - db.prepare( - `INSERT INTO schema_migrations (version, name, applied_at) VALUES (@version, @name, @applied_at)`, - ).run({ version: file.version, name: file.name, applied_at: now() }); - }); - const durationMs = now() - t0; - applied.push({ version: file.version, name: file.name, durationMs }); - log.info("migration.applied", { - version: file.version, - name: file.name, - durationMs, - file: path.basename(file.fullPath), - }); + } finally { + if (needsUnsafe) db.raw.unsafeMode(false); } markReady(db); @@ -123,6 +139,16 @@ export function runMigrations(db: StorageDb, dir: string = defaultMigrationsDir( return { applied, skipped, total: allFiles.length }; } +/** + * Detect migrations that need `SQLITE_DBCONFIG_DEFENSIVE` relaxed. We + * look for the `writable_schema` pragma (the only legitimate reason to + * poke `sqlite_master` from SQL). + */ +function migrationNeedsUnsafeMode(fullPath: string): boolean { + const sql = fs.readFileSync(fullPath, "utf8"); + return /PRAGMA\s+writable_schema/i.test(sql); +} + function ensureSchemaMigrationsTable(db: StorageDb): void { db.exec( `CREATE TABLE IF NOT EXISTS schema_migrations ( diff --git a/apps/memos-local-plugin/core/storage/repos/traces.ts b/apps/memos-local-plugin/core/storage/repos/traces.ts index 45ed38071..309d4e843 100644 --- a/apps/memos-local-plugin/core/storage/repos/traces.ts +++ b/apps/memos-local-plugin/core/storage/repos/traces.ts @@ -35,6 +35,7 @@ const COLUMNS = [ "share_scope", "share_target", "shared_at", + "turn_id", "schema_version", ]; @@ -475,6 +476,7 @@ interface RawTraceRow { share_scope: string | null; share_target: string | null; shared_at: number | null; + turn_id: number | null; schema_version: number; } @@ -524,6 +526,7 @@ function rowToParams(row: TraceRow): Record { share_scope: row.share?.scope ?? null, share_target: row.share?.target ?? null, shared_at: row.share?.sharedAt ?? null, + turn_id: row.turnId ?? null, schema_version: row.schemaVersion, }; } @@ -556,6 +559,7 @@ function mapRow(r: RawTraceRow): TraceRow { sharedAt: r.shared_at, } : null, + turnId: r.turn_id, schemaVersion: r.schema_version, }; } diff --git a/apps/memos-local-plugin/core/types.ts b/apps/memos-local-plugin/core/types.ts index b126d4871..d7756cd71 100644 --- a/apps/memos-local-plugin/core/types.ts +++ b/apps/memos-local-plugin/core/types.ts @@ -125,6 +125,19 @@ export interface TraceRow { errorSignatures?: string[]; vecSummary: EmbeddingVector | null; vecAction: EmbeddingVector | null; + /** + * Stable group key shared by every L1 trace that came from the same + * user message. `step-extractor` fills it with the user turn's `ts` + * (epoch ms); the viewer collapses traces with identical + * `(episodeId, turnId)` into a single "one round = one memory" + * card. Algorithm-side machinery (V/α/L2/Tier 2) ignores this + * field — it is purely a UI grouping anchor. + * + * Optional on the read side: rows written before migration 013 + * (`013-trace-turn-id`) are NULL and the viewer falls back to + * per-row rendering for them. + */ + turnId?: EpochMs | null; /** Schema version that wrote this row (helps with migrations). */ schemaVersion: number; } diff --git a/apps/memos-local-plugin/docs/GRANULARITY-AND-MEMORY-LAYERS.md b/apps/memos-local-plugin/docs/GRANULARITY-AND-MEMORY-LAYERS.md new file mode 100644 index 000000000..8d03b9451 --- /dev/null +++ b/apps/memos-local-plugin/docs/GRANULARITY-AND-MEMORY-LAYERS.md @@ -0,0 +1,365 @@ +# 粒度与认知层级 — 术语对齐 + +> 解决"小步 / 轮 / 任务 / 经验 / 环境认知 / 技能"几个概念之间反复混淆的根因。 +> 任何对实现细节有疑问的同学**先看这一篇**,再去读对应模块的 `ALGORITHMS.md`。 +> +> 配套阅读: +> - 算法规范:`docs/Reflect2Skill_算法设计核心.md` +> - 算法 ↔ 实现对照:`docs/ALGORITHM_ALIGNMENT.md` +> - 捕获管线:`core/capture/ALGORITHMS.md` +> - 检索三层:`core/retrieval/ALGORITHMS.md` + +--- + +## 1. 三个交互粒度(必须先锁死) + +| 术语 | 内容 | 实现里的对应 | +|---|---|---| +| **小步 (step)** | 一次 `(think + 动作 + 观测)` 的完整迭代。
具体形态有三种:
① `` 一个工具调用
② `` 一句模型对外的回复
③ `` 没调工具的纯问答 | = 一条 `traces` 表行 = 文档的 `f^(1)_{k,t}` | +| **轮 (turn / round)** | ``
用户感知的"一次问答" | = 多条 `traces` 共享同一 `turn_id`
= 前端 `MemoryGroup` 一张卡
**算法层不存在这个概念** | +| **任务 (task / episode)** | 同一 topic 的多个轮串成的完整任务
`<轮_1 + 轮_2 + …>` | = `episodes` 表一行 = 同一 `episode_id` 的所有 `traces` | + +> **关键认知**:**算法所有操作的粒度只有"小步"和"任务"两种**。 +> "轮"是给用户看的视觉聚合,对算法**完全不可见**——不打分、不检索、不诱导、不结晶。 + +代码中保证这一点的几处关键设计: +- `core/capture/step-extractor.ts` 把一个用户消息触发的所有动作拆成多个小步 sub-step,全部带同一个 `turnId` 写入 `traces.turn_id`。 +- 前端 `web/src/views/MemoriesView.tsx::buildGroups` 按 `(episodeId, turnId)` 把多条小步聚合成一张卡片显示。 +- 检索路径 `core/retrieval/tier2-trace.ts` 永远以 `traces` 行为最小单位,从不读 `turn_id`。 + +--- + +## 2. 打分粒度 + +每个动作有自己的"分"吗?看下表: + +| 对象 | 有自己的分吗 | 字段 / 来源 | +|---|---|---| +| **小步** | **有**:反思权重 `α_t` + 步价值 `V_t` | `traces.alpha` / `traces.value`,由反思加权回溯算出 | +| **任务** | **有**:终局奖励 `R_human` | `episodes.r_task`,任务结束时 LLM 按 rubric 打分 | +| **轮** | **没有** | 前端展示时取轮内 V 平均值凑一个,仅作展示 | + +### 打分的实际触发流程 + +按算法文档 §0.6 + `core/capture/capture.ts::runReflect`: + +``` +任务结束 (episode.finalized 事件) + ↓ +1 次 LLM 批量调用 + ├─ 算 R_human (任务级,1 个标量) + └─ 给任务内每个小步打 α_t (N 个权重) + ↓ +反思加权回溯 (core/reward/backprop.ts) + ├─ V_T = R_human ← 任务最后一步 + └─ V_t = α_t · R_human + (1-α_t) · γ · V_{t+1} ← 往前回溯 + ↓ +写回每条 trace 的 reflection / alpha / value 字段 +``` + +**注意**: +- **不是每个小步发生时都跑 LLM 打分**,而是任务结束时一次性批量打。lite 阶段只写空骨架(`alpha=0, value=0, reflection=null`),等 `runReflect` 阶段填充。 +- **R_human 是任务级唯一信号**,回溯到每个小步去——不是每个小步都问一次用户"这一步好不好"。 +- **轮在数学公式里完全不出现**。 + +### 例子 — 一次性看清楚 + +``` +任务 ep_42 (= 同一 topic 的 3 轮) + 轮 t1: 查 cpu/内存/硬盘 → 4 个小步 + 轮 t2: 在 Alpine 装 lxml → 4 个小步 + 轮 t3: 用户回复"好的" → 1 个小步 +共 9 个小步 + +任务结束 → 用户最后说"好,记下来" → R_human = +0.85 + ↓ +对 9 个小步分别打 α (反思识别"关键发现"的高,闲聊的低) + ↓ +反思加权回溯 → 9 个小步各拿到自己的 V + ↓ +关键步 step2.1 (Alpine + pip lxml → xmlsec1 not found, α=0.7) + 最终拿到 V ≈ 0.78 (高) +闲聊步 step3.1 (用户回复"好的", α=0.2) + 最终拿到 V ≈ 0.15 (低) +``` + +`R_human = +0.85` 是任务级,整任务唯一一个;`V` 是小步级,9 个小步 9 个值;轮 t1 / t2 / t3 没有自己的"分"。 + +--- + +## 3. 检索粒度 + +按算法文档 §2.6 三层。**没有任何一层是按"轮"召回的**——这是设计选择,不是疏漏。 + +### 3.1 第一层 · 技能召回(任务入口) + +| | | +|---|---| +| 触发时机 | 新任务到来(用户给出 query 那一刻) | +| 召回粒度 | **一个完整技能对象**(trigger / procedure / verification / scope / decision_guidance / domain_model) | +| 匹配方式 | 路由器看 user query 是否命中某个**激活技能**的 trigger | +| 实现 | `core/retrieval/tier1-skill.ts` | + +### 3.2 第二层 a · 单步小步召回(执行中按需) + +| | | +|---|---| +| 触发时机 | 当前小步执行失败(exit code≠0、错误模式命中、反思出现失败信号) | +| 召回粒度 | **1 条小步**(一行 `traces`) | +| 匹配方式 | 三路并行:error signature 结构匹配 / state embedding 语义匹配 / 标签过滤 → 按 V 排序取 Top-K | +| 实现 | `core/retrieval/tier2-trace.ts::runTier2`(trace 通道) | + +### 3.3 第二层 b · 子任务序列召回 + +| | | +|---|---| +| 触发时机 | 当前子任务的"目标"和某段历史子任务的"目标"语义相似 | +| 召回粒度 | **N 条连续小步**(按 episode 业务连续性拼,**不按 turn_id 拼**) | +| 匹配方式 | goal-to-goal 语义相似度 + 整段累计 V 为正 | +| 实现 | `core/retrieval/tier2-trace.ts::bucketByEpisode` + `EpisodeCandidate` | + +### 3.4 第三层 · 环境认知召回(深层推理时) + +| | | +|---|---| +| 触发时机 | 反思 ρ_t 里出现**结构性不确定**——不是操作性疑问 | +| 召回粒度 | **一个环境认知对象**(𝓔 空间结构 / 𝓘 行为规律 / 𝓒 约束禁忌) | +| 实现 | `core/retrieval/tier3-world.ts` | + +#### "结构性不确定" vs "操作性疑问" + +| 反思内容 | 类型 | 触发第三层吗 | +|---|---|---| +| "pip 装失败了,可能要换个命令" | 操作性 | ❌ 单步召回就够 | +| "命令找不到 pg_config,可能是 PATH 问题" | 操作性 | ❌ | +| **"装失败了,但我不确定是缺系统库还是 musl 不兼容——这俩走的是完全不同的修复路径"** | **结构性** | ✅ 召回"Python 三层依赖 + Alpine musl 边界" | +| **"改了配置后服务没生效,不知道这种配置是热加载还是要重启"** | **结构性** | ✅ 召回"该服务行为规律" | +| **"删了这个目录后另一组件挂了,不知道这两个之间什么依赖关系"** | **结构性** | ✅ 召回"项目空间结构" | +| **"用户让我 DROP TABLE,不确定这是不是生产环境"** | **结构性 (约束类)** | ✅ 召回"环境约束与禁忌" | + +判别信号大致是:反思里出现 *"不确定 / 哪一层 / 是不是因为 / 副作用 / 影响范围"* 等结构性词汇,或遇到 embedding 在历史所有小步里都找不到相似的全新状态。 + +### 3.5 为什么算法故意不设计"按轮召回" + +三个直接理由: + +1. **一轮 ≠ 一个完整子任务**。同一 user query 可能展开多个独立子任务("查 cpu / 装 lxml / 跑测试"塞在一句话里);同一子任务也可能横跨多轮(用户分 3 轮逐步引导装一个东西)。**轮和子任务正交**。 +2. **单步精准检索是必须的**。当前小步出 `'pg_config not found'` 错时,最想要的就是"上次哪一步遇到这个错、怎么救活的"——按轮召回会把当时整轮的所有小步连同汇报性回复一锅端,稀释精准信号。 +3. **上下文连续性该由"子任务"承担**。子任务边界是"这段连续小步是不是在做同一件事",按 episode 拼;轮边界是"这些小步是不是同一个 user query 引出的",没有归纳价值。 + +### 3.6 用例子对比 + +未来用户在 Debian 容器执行 `pip install cryptography` 失败: + +| 召回方式 | 实际拿到什么 | 评价 | +|---|---|---| +| **算法实际做的(单步)** | 仅 step2.1:Alpine + pip lxml → 'xmlsec1 not found' (V=0.78) | ✅ 精准对症 | +| **算法实际做的(子任务)** | step2.1 + step2.2 + step2.3 三条连续序列,整段动作链作为参考方案 | ✅ 给出完整救活流程 | +| 假想:按轮召回 | step2.1 + 2.2 + 2.3 + 2.4(含 "装好了" 汇报) | ❌ 多了无用汇报 | +| 假想:按轮召回 | 把不相关的轮 t1(lscpu / free / df)也一起召回 | ❌ 容易混入无关轮 | + +--- + +## 4. 经验 / 环境认知 / 技能 的层级关系 + +### 4.1 全景 + +``` +事实层 ┌─────────────┐ + │ 小步 ←── 用户每次 query 引出的每个动作 + │ (记忆) + └──────┬──────┘ + │ 跨任务模式归纳 + │ (每条小步写入即触发,轻量) + ▼ +认知层 ┌─────────────┐ + │ 经验 │ "这类子任务该怎么做" + │ (子任务策略) │ + └──────┬──────┘ + │ 多条同领域经验背后稳定原理抽象 + │ (异步触发) + ▼ + ┌─────────────┐ + │ 环境认知 │ "这个环境长什么样" + │ (世界模型) │ + └──────┬──────┘ + │ +能力层 经验 + 环境认知 ▼ + + 三阈值满足 → ┌─────────────┐ + + 双重检验 │ 技能 │ "路由器可调用的成熟能力" + └─────────────┘ + +"轮" 不在这张图里 ── 它只活在前端展示层 +``` + +### 4.2 一句话定义对照 + +| | 是什么 | 回答的问题 | 类比 | +|---|---|---|---| +| **小步 (记忆)** | 一次 `(think + 动作 + 观测)` 的 grounded 记录 | "这一步做了什么、发生了什么" | 病历的一条诊疗记录 | +| **经验** | 跨任务相似小步归纳出的子任务策略 `(trigger / procedure / verification / boundary)` | "在这种条件下该怎么做" | 临床路径文档 | +| **环境认知** | 多条同领域经验背后抽象的环境压缩认知 `(空间结构 / 行为规律 / 约束禁忌)` | "这个环境长什么样、有什么规律、什么不能做" | 解剖学 + 生理学 | +| **技能** | 经验 + 环境认知 打包成可调用对象 + 持续修订的可靠性 | "当前 query 该不该调用这条策略,调用后怎么走" | 一名熟练的内科医生 | + +### 4.3 两两关系 + +#### 经验 ↔ 环境认知 + +**单向抽象 + 反向增强**: +- 多条经验背后的稳定组织原理 → 抽象出 1 条环境认知(**素材关系**) +- 环境认知反过来给经验提供结构先验,让经验的 trigger 更精准(**先验关系**) + +数量关系:1 条环境认知背后通常有 N≥2 条经验;1 条经验**不一定**对应到环境认知(小众场景没积累出领域规律就没有)。 + +#### 经验 + 环境认知 → 技能 + +**结晶**:技能 = 1 条经验作为骨架 + 0 或 1 条环境认知作为先验 + 决策修复产出的偏好/反模式 + 用小步级 V 算出的可靠性。 + +``` +Skill { + trigger / procedure / verification / scope ← 主体来自经验 + evidence_anchors ← 来自小步证据 + domain_model ← 来自环境认知(可选) + decision_guidance ← 来自决策修复 + reliability η ← 用 source_steps 的 V 算出来 +} +``` + +**不是每条经验都能变技能**——必须满足三阈值(频率 / 增益 / 稳定性)+ 双重检验。 +**不是每条技能都带环境认知**——小工具型技能可以没有,复杂技能往往携带。 + +#### 经验 / 环境认知 / 技能 ↔ 小步 + +| | 与小步的关系 | +|---|---| +| **经验** | 直接消费小步:写入即触发关联检查、用小步 V 加权更新 procedure / scope / η | +| **环境认知** | 间接消费小步:经过经验中转,但 source_policies 可追溯到底层小步 | +| **技能** | 直接挂证据:`evidence_anchors` 字段指向具体小步 id;调用后产生新小步反馈又会反向修订技能 | + +#### 经验 / 环境认知 / 技能 ↔ 轮 + +**全部无关**。算法的: +- 经验诱导:按"跨任务相似小步集合" +- 环境认知抽象:按"同领域多经验背后的原理" +- 技能结晶:按"独立任务数 + 小步级 V 增益 + 稳定性" + +这三个动作里**没有任何一个用到"轮"这个边界**。 + +--- + +## 5. 完整闭环(用例子从头串到尾) + +``` +1. 用户在任务 ep_42 内问 3 轮 → 写入 9 条小步记忆 (带 turn_id / episode_id) + ↓ +2. 每条小步写入时即时触发增量经验关联检查 (轻量,无 LLM) + - step2.1 写入时和历史 task_X 的 pg_config 小步配对 → 诱导新经验 + ↓ +3. 用户关闭任务 (说"好的,记下来") → 触发任务级评分 + - 1 次 LLM 调用:算 R_human=+0.85 + 给 9 个小步各打 α + - 反思加权回溯:算出 9 个小步各自的 V + - 写回每条记忆的 reflection / alpha / value 字段 + ↓ +4. 异步:发现"容器系统库缺失排障"经验已关联 ≥3 个独立任务 + - 满足三阈值 → 触发结晶 + - 收集证据集 (证据全是"小步",不是"轮"也不是"任务") + - LLM 诱导 → 双检验 → 激活 + ↓ +5. 未来某次用户说"在 Debian 装 cryptography": + - 任务入口 → 召回完整技能对象 + - 某步失败 → 召回 1 条小步 step2.1 给参考 + - 子任务相似 → 召回 step2.1+2.2+2.3 整段作为参考方案 + - 深层反思 → 召回环境认知"Python 三层结构" + ★ 这一系列召回,没有任何一次是按"轮"召回的 + ↓ +6. 这次任务结束 → 又是 1 次任务级评分 → 又一轮 V 回溯 + → 经验/技能可靠性更新 +``` + +--- + +## 6. 经验 vs 环境认知 — 边界裁剪(FAQ:要不要合并?) + +回答开发者反复问到的那个问题:**"经验和环境认知能不能合并成一层?"** + +**结论:不合并**——这两层是两种不同种类的知识,合并会同时损伤两边。但你产生这个疑问通常源自两个真实痛点(**边界模糊** + **环境认知很少被结晶出来**),下面分别处理。 + +### 6.1 为什么不合并 — 7 条理由 + +| # | 理由 | 关键差异 | +|---|---|---| +| 1 | **回答的是不同种类的问题** | 经验 = 过程性 (procedural) "怎么做";环境认知 = 陈述性 (declarative) "环境长啥样" | +| 2 | **泛化粒度不同** | 经验跨任务迁移;环境认知跨**领域**迁移。合并 = 塌缩成单一泛化层 | +| 3 | **更新触发频率不同** | 经验高频(每条新小步触发);环境认知低频(多经验稳定后抽象)。合并 = 强行一种节奏 | +| 4 | **LLM 输出 schema 不同** | trigger/procedure/verification/boundary vs 𝓔/𝓘/𝒞。合并 = 8 字段膨胀,质量下降 | +| 5 | **在技能里的角色已经分清** | 技能里 `domain_model` 是**可选**字段——小工具型技能不需要环境认知 | +| 6 | **检索注入策略不同** | 经验作为技能骨架在任务入口必注入;环境认知仅在反思出现结构性不确定时才注入第三层 | +| 7 | **算法上是不同的数学对象** | $f^{(2)} = (\phi, \pi, \kappa, \Omega, \{f^{(1)}\})$ vs $f^{(3)} = (\mathcal{E}, \mathcal{I}, \mathcal{C}, \{f^{(2)}\})$ | + +### 6.2 你产生这个疑问的真实痛点 + +| 痛点 | 真相 | 处理方式 | +|---|---|---| +| **A. 实现里环境认知很少被生成出来,世界模型表常年空着** | 这是**冷启动期**正常现象——环境认知需要"多条同领域经验背后的稳定原理",单用户、单 agent 场景下经验本身就稀疏 | 不是"该不该合并",是"什么时候才该投入做"。算法 §0.4 明确说"每一条 L3 知识都省掉了未来的探索步骤和 Token"——价值在长期累积**之后**显现 | +| **B. 经验 vs 环境认知的边界在实践里有时模糊** | 这是文档/prompt 没把边界划清,**不是该合并的理由** | → 看下面 6.4 节的判别表 + 同事实多框架对照表 | + +### 6.3 折中方案对比(如果你确实想降低工程负担) + +| 方案 | 做法 | 优点 | 缺点 | +|---|---|---|---| +| **A · 共表不共类型** | 物理上合并 `policies` + `world_model` 进 `knowledge` 表,加 `kind` 列区分;代码层仍是两个 repo + 两套 prompt | 减少 30% 重复代码 | prompt + subscriber 还是两套,收益有限 | +| **B · 让环境认知可关闭** | 加配置 `algorithm.l3Abstraction.enabled = false`,小型部署直接关掉 | 不删功能、不改算法、降低冷启动期成本 | retrieval / capture 都要加分支;后续再开需清空旧库 | +| **C · 不动代码,加文档** | 只把"经验 vs 环境认知判别表"写进文档(即下面 6.4 节) | 零工程成本 | 解决不了"看着空表碍眼"的心理负担 | + +**推荐方案 C**——不要因为冷启动期数据稀疏就否定算法的分层结构。当前实现里环境认知模块代码量并不大(`core/memory/l3/` 200 行 + `tier3-world.ts` 100 行),维护成本可控。**真正的修复点是 prompt + 文档把边界划清**——一旦下游真实场景累积起来,分层带来的可解释性收益会远超合并节省的几百行代码。 + +### 6.4 经验 vs 环境认知 判别表(既给读者看,也给 LLM 看) + +#### A. 判别速查 + +| 维度 | 经验 (procedural) | 环境认知 (declarative) | +|---|---|---| +| **回答** | 在条件 X 下应该做 Y | 这个环境长什么样 / 怎么响应 / 什么不能做 | +| **是否含动作** | 必含具体动作模板(apk add / pip retry) | **不含动作**——只描述结构、规律、约束 | +| **句式** | 命令式 / 条件式:"当 X 时,做 Y" | 陈述式:"X 是 Y" / "X 会导致 Y" / "禁止 Z" | +| **所属层** | 子任务策略 | 环境世界模型 | +| **变量化程度** | trigger 可参数化(容器类型 / 包名) | 不参数化——是环境本身的事实 | +| **可验证性** | "执行后 import 成功" 这种步骤级判据 | "拓扑事实是否被多个经验佐证" 这种存在性判据 | +| **失效条件** | 步骤跑失败 → 经验 boundary 收缩 | 环境实际变了 (升级/迁移) → 环境认知整条作废 | + +#### B. 同事实多框架对照(避免混淆) + +下面三组都是**同一个底层事实**用两种形态表达。生成时**不要把右边的框架塞进左边的对象**,反之亦然。 + +| 底层事实 | 经验形态(怎么做) | 环境认知形态(环境长啥样) | +|---|---|---| +| Alpine 容器装 Python C 扩展库会失败 | "当 Alpine 容器内 pip install 因系统库缺失编译失败时 → 解析缺失组件 → `apk add -dev` → 重试 pip" | "Python 包依赖分三层(系统库 / C 扩展 / 纯 Python);容器镜像默认只装第三层;C 扩展类需要补装系统库" | +| macOS 的 sed 和 GNU sed 不兼容 | "当需要在 macOS 上做正则替换时 → 优先用 Python 脚本,避免 `sed -i`" | "macOS 自带 BSD sed,与 Linux 上的 GNU sed 在 `-i` / 反向引用 / 扩展正则上语法不同" | +| 改了配置后服务没生效 | "当改了 `config.yaml` 后 → 必须 `service restart`,不能依赖热加载" | "该服务只在启动时读 config 一次,运行时无 watcher;约束:禁止假设配置是热加载的" | + +**判别口诀**: +- 看到"做什么 / 步骤 / 命令 / 顺序" → 写进**经验** +- 看到"是什么 / 在哪 / 怎么响应 / 不能怎样" → 写进**环境认知** +- 同一个事实可以同时存在于两边,**但表述形态必须分开** + +#### C. 反例:常见的越界写法(要在 prompt 里禁止) + +**经验越界写成环境认知形态**(应该被 prompt 拒绝): +- ❌ trigger: "Python 包依赖三层结构" ← 这是环境事实,不是触发条件 +- ❌ procedure 里写 "Alpine 用 musl libc" ← 这是环境事实,不是动作步骤 +- ❌ caveat 里写 "容器镜像通常只装纯 Python 层" ← 这是环境规律,不是步骤的注意事项 + +**环境认知越界写成经验形态**(应该被 prompt 拒绝): +- ❌ inference 里写 "pip 失败时应该装 -dev 库" ← "应该做"是动作,属于经验 +- ❌ constraint 里写 "不要用 sed,改用 Python" ← "改用"是动作偏好,属于经验/决策修复 +- ❌ environment 里写 "通过 `apk add` 装系统库" ← 描述了动作,属于经验 + +--- + +## 7. 三个问题的一句话答案(便于快速回顾) + +1. **打分**:每个**小步**有自己的 α 和 V;整个**任务**有一个 R_human;**轮**没有自己的分(前端展示时取轮内 V 平均凑一个)。 +2. **检索**:技能 = 任务入口召回;单步 = 当前小步失败时召回;子任务 = 多个连续小步序列召回(按 episode 拼,**非**按 turn_id 拼);环境认知 = 深层反思时召回。**没有任何一层是按"轮"召回**。 +3. **生成**:小步 → 跨任务模式诱导经验 → 多个经验抽象环境认知 → 经验 + 环境认知 + 三阈值满足 → 结晶技能。**整个生成链路也没有"轮"这个概念**。 + +> "轮"只活在前端那张卡片里,是给你看的;算法的所有数学公式、所有触发逻辑、所有粒度,要么是"小步"要么是"任务",从来没有"轮"。 diff --git a/apps/memos-local-plugin/docs/README.md b/apps/memos-local-plugin/docs/README.md index 999d2acc0..c6ce82adc 100644 --- a/apps/memos-local-plugin/docs/README.md +++ b/apps/memos-local-plugin/docs/README.md @@ -7,7 +7,9 @@ For *user-facing* docs (getting started, configuration, viewer tour), see | File | What it covers | |-------------------------------|---------------------------------------------------------| -| `ALGORITHM.md` | The Reflect2Evolve V7 spec, indexed against the code. | +| `Reflect2Skill_算法设计核心.md` | Reflect2Evolve V7 算法规范(中文原版)。 | +| `ALGORITHM_ALIGNMENT.md` | 算法 ↔ 实现的逐节对照表,标记 ✅/⚠️/❌。 | +| **`GRANULARITY-AND-MEMORY-LAYERS.md`** | **术语与粒度对齐:小步 / 轮 / 任务、经验 / 环境认知 / 技能 之间的关系,打分与检索的粒度选择。读其它文档前先看这一篇。** | | `DATA-MODEL.md` | Every SQLite table, column, and index. | | `EVENTS.md` | Every `CoreEventType`, when it fires, payload shape. | | `PROMPTS.md` | Prompt anatomy, evaluation samples, golden outputs. | diff --git "a/apps/memos-local-plugin/docs/Reflect2Skill_\347\256\227\346\263\225\350\256\276\350\256\241\346\240\270\345\277\203.md" "b/apps/memos-local-plugin/docs/Reflect2Skill_\347\256\227\346\263\225\350\256\276\350\256\241\346\240\270\345\277\203.md" index 7ffe84bb9..457b6fe84 100644 --- "a/apps/memos-local-plugin/docs/Reflect2Skill_\347\256\227\346\263\225\350\256\276\350\256\241\346\240\270\345\277\203.md" +++ "b/apps/memos-local-plugin/docs/Reflect2Skill_\347\256\227\346\263\225\350\256\276\350\256\241\346\240\270\345\277\203.md" @@ -2,6 +2,8 @@ > **核心立场**:智能体的能力提升不应依赖离线日志分析或人工编写的技能模板,而应在与环境和人类的持续交互中自主实现。Skill 沉淀只是自我进化过程的一个具体产出——更根本的是,智能体通过分层记忆的持续积累与修订,实现对环境的理解越来越深、对任务的处理越来越快、与人类偏好越来越对齐。 +> **阅读顺序提示**:第一次接触本文档的同学请先看 [`GRANULARITY-AND-MEMORY-LAYERS.md`](./GRANULARITY-AND-MEMORY-LAYERS.md),把"小步 / 轮 / 任务"和"经验 / 环境认知 / 技能"几个粒度概念锁死,再回来读本文。 + --- ## 双层反馈机制驱动的持续自我进化 diff --git a/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts b/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts index c2bcc2e98..5521daa0d 100644 --- a/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts +++ b/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts @@ -189,7 +189,7 @@ function buildFullChainLlm(): LlmClient { }), // L2 induction — distills a policy from ≥2 similar traces. - "l2.l2.induction.v1": (input) => { + "l2.l2.induction.v2": (input) => { const text = lastUserMessage(input); const isPython = /python|pip|\.py\b/i.test(text); return { @@ -211,7 +211,7 @@ function buildFullChainLlm(): LlmClient { }, // L3 abstraction — environment model across L2 policies. - "l3.abstraction.v1": () => ({ + "l3.abstraction.v2": () => ({ title: "Python 开发辅助环境认知", domain_tags: ["python", "coding-assist"], environment: [ diff --git a/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts b/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts index c5d9679a3..e057defe7 100644 --- a/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts +++ b/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts @@ -26,8 +26,8 @@ * - `capture.alpha.reflection.score.v1`— α scoring * - `capture.summarize` — trace-level summaries * - `reward.reward.r_human.v3` — R_human axis scoring - * - `l2.l2.induction.v1` — L2 policy induction - * - `l3.abstraction.v1` — L3 world-model abstraction + * - `l2.l2.induction.v2` — L2 policy induction + * - `l3.abstraction.v2` — L3 world-model abstraction * - `skill.crystallize` — skill draft * * Each scripted response looks only at the `NEW_USER_MESSAGE:` chunk @@ -234,7 +234,7 @@ function buildLlm(): LlmClient { reason: "concrete root-cause reflection", }), - "l2.l2.induction.v1": (input) => { + "l2.l2.induction.v2": (input) => { const evidence = (input as { evidenceTraces?: Array<{ id: string }> }) ?.evidenceTraces ?? []; return { @@ -256,7 +256,7 @@ function buildLlm(): LlmClient { }; }, - "l3.abstraction.v1": () => ({ + "l3.abstraction.v2": () => ({ title: "Python 开发环境认知 (pip + 标准库)", domain_tags: ["python", "pip", "coding-assist"], environment: [ diff --git a/apps/memos-local-plugin/tests/unit/capture/step-extractor.test.ts b/apps/memos-local-plugin/tests/unit/capture/step-extractor.test.ts index b7cf65ee7..cef4c0c55 100644 --- a/apps/memos-local-plugin/tests/unit/capture/step-extractor.test.ts +++ b/apps/memos-local-plugin/tests/unit/capture/step-extractor.test.ts @@ -39,7 +39,7 @@ function episode(turns: EpisodeTurn[], metaOverride: Record = { describe("capture/step-extractor", () => { beforeAll(() => initTestLogger()); - it("single user → assistant → one step", () => { + it("single user → assistant → one step (no tools, single sub-step)", () => { const ep = episode([ turn("user", "write the readme", 1_000), turn("assistant", "here's the readme", 1_100), @@ -50,9 +50,12 @@ describe("capture/step-extractor", () => { expect(steps[0]!.agentText).toBe("here's the readme"); expect(steps[0]!.toolCalls).toEqual([]); expect(steps[0]!.ts).toBe(1_100); + // turnId anchors on the user turn's ts so the viewer can group + // every L1 trace produced from this message under one card. + expect(steps[0]!.meta.turnId).toBe(1_000); }); - it("assistant + tool + assistant → one step with merged tool call", () => { + it("assistant + tool + assistant → tool sub-step + response sub-step (V7 §0.1)", () => { const ep = episode([ turn("user", "ls", 1_000), turn("assistant", "running ls", 1_050), @@ -65,16 +68,34 @@ describe("capture/step-extractor", () => { turn("assistant", "done", 1_070), ]); const steps = extractSteps(ep); - expect(steps).toHaveLength(1); - expect(steps[0]!.agentText).toBe("running ls\n\ndone"); - expect(steps[0]!.toolCalls).toHaveLength(1); - expect(steps[0]!.toolCalls[0]!.name).toBe("shell"); - expect(steps[0]!.toolCalls[0]!.output).toBe("/a\n/b\n"); - expect(steps[0]!.toolCalls[0]!.input).toEqual({ cmd: "ls" }); - expect(steps[0]!.ts).toBe(1_070); + // V7 §0.1 granularity: one step per agent decision point. + // 1 tool call → 1 sub-step; 1 final reply → 1 sub-step. + expect(steps).toHaveLength(2); + + const toolStep = steps[0]!; + expect(toolStep.userText).toBe("ls"); + expect(toolStep.agentText).toBe(""); + expect(toolStep.toolCalls).toHaveLength(1); + expect(toolStep.toolCalls[0]!.name).toBe("shell"); + expect(toolStep.toolCalls[0]!.output).toBe("/a\n/b\n"); + expect(toolStep.toolCalls[0]!.input).toEqual({ cmd: "ls" }); + expect(toolStep.meta.subStep).toBe(true); + expect(toolStep.meta.subStepIdx).toBe(0); + expect(toolStep.meta.subStepTotal).toBe(2); + + const replyStep = steps[1]!; + expect(replyStep.userText).toBe(""); // only first sub-step carries it + expect(replyStep.agentText).toBe("done"); + expect(replyStep.toolCalls).toEqual([]); + expect(replyStep.meta.subStepIdx).toBe(1); + + // Both sub-steps share the same turnId — the viewer collapses + // them back into one card via group_by(episodeId, turnId). + expect(toolStep.meta.turnId).toBe(1_000); + expect(replyStep.meta.turnId).toBe(1_000); }); - it("two user turns split into two steps", () => { + it("two user turns split into two steps (each turn gets its own turnId)", () => { const ep = episode([ turn("user", "first", 1_000), turn("assistant", "a1", 1_010), @@ -85,8 +106,36 @@ describe("capture/step-extractor", () => { expect(steps).toHaveLength(2); expect(steps[0]!.userText).toBe("first"); expect(steps[0]!.agentText).toBe("a1"); + expect(steps[0]!.meta.turnId).toBe(1_000); expect(steps[1]!.userText).toBe("second"); expect(steps[1]!.agentText).toBe("a2"); + expect(steps[1]!.meta.turnId).toBe(1_020); + }); + + it("multi-tool turn → all sub-steps share the same turnId", () => { + // Two tools then a final reply collapse into 3 sub-steps that + // all carry the user turn's ts as their group key. + const ep = episode([ + turn("user", "查 cpu 内存 磁盘", 2_000), + turn( + "tool", + "8 cores", + 2_010, + { tool: "shell", input: { cmd: "nproc" }, startedAt: 2_005, endedAt: 2_010 }, + ), + turn( + "tool", + "16G", + 2_020, + { tool: "shell", input: { cmd: "free -h" }, startedAt: 2_015, endedAt: 2_020 }, + ), + turn("assistant", "8 核 16G", 2_030), + ]); + const steps = extractSteps(ep); + expect(steps).toHaveLength(3); + const turnIds = steps.map((s) => s.meta.turnId); + expect(new Set(turnIds).size).toBe(1); + expect(turnIds[0]).toBe(2_000); }); it("trailing user without assistant is dropped (incomplete)", () => { @@ -125,6 +174,7 @@ describe("capture/step-extractor", () => { expect(steps).toHaveLength(1); expect(steps[0]!.agentText).toBe(""); expect(steps[0]!.meta.synthetic).toBe(true); + expect(steps[0]!.meta.turnId).toBe(1_000); expect(steps[0]!.ts).toBe(1_000); }); diff --git a/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts b/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts index 9b3931c8a..fc56ae21d 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts @@ -43,7 +43,7 @@ describe("memory/l2/induce", () => { it("returns {ok:true, draft} and fills support_trace_ids when the LLM omits them", async () => { const llm = fakeLlm({ completeJson: { - "l2.l2.induction.v1": { + "l2.l2.induction.v2": { title: "install system libs first", trigger: "pip install fails in container with missing system library", procedure: "1. detect missing lib 2. apk/apt-get install 3. retry pip", @@ -105,7 +105,7 @@ describe("memory/l2/induce", () => { it("reason=llm_failed when the LLM draft is malformed (missing title)", async () => { const llm = fakeLlm({ completeJson: { - "l2.l2.induction.v1": { trigger: "no title", procedure: "..." }, + "l2.l2.induction.v2": { trigger: "no title", procedure: "..." }, }, }); const res = await induceDraft( diff --git a/apps/memos-local-plugin/tests/unit/memory/l2/l2.integration.test.ts b/apps/memos-local-plugin/tests/unit/memory/l2/l2.integration.test.ts index 786478baa..fca001220 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l2/l2.integration.test.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l2/l2.integration.test.ts @@ -109,7 +109,7 @@ describe("memory/l2/integration", () => { const llm = fakeLlm({ completeJson: { - "l2.l2.induction.v1": { + "l2.l2.induction.v2": { title: "install missing system libs in container", trigger: "pip install fails in container with MODULE_NOT_FOUND due to missing system lib", procedure: "1. detect lib 2. use distro pkg manager 3. retry pip", @@ -220,7 +220,7 @@ describe("memory/l2/integration", () => { const llm = fakeLlm({ completeJson: { - "l2.l2.induction.v1": { + "l2.l2.induction.v2": { title: "t", trigger: "tr", procedure: "pr", diff --git a/apps/memos-local-plugin/tests/unit/retrieval/injector.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/injector.test.ts index b9aed9b7d..1c74c243c 100644 --- a/apps/memos-local-plugin/tests/unit/retrieval/injector.test.ts +++ b/apps/memos-local-plugin/tests/unit/retrieval/injector.test.ts @@ -200,7 +200,9 @@ describe("retrieval/injector", () => { expect(skillSnippet.body).not.toContain("skill_get(id="); // The footer should not surface the skill call hints in full mode. expect(packet.rendered).not.toContain("`skill_get(id)`"); - expect(packet.rendered).toContain("# Skills"); + // Subsection headings are level-2 Markdown, nested under the packet's + // level-1 "User's conversation history" header. + expect(packet.rendered).toContain("## Skills"); }); it("empty ranked list produces empty rendered string", () => { diff --git a/apps/memos-local-plugin/web/src/stores/i18n.ts b/apps/memos-local-plugin/web/src/stores/i18n.ts index c5192b082..818a2bb42 100644 --- a/apps/memos-local-plugin/web/src/stores/i18n.ts +++ b/apps/memos-local-plugin/web/src/stores/i18n.ts @@ -260,6 +260,8 @@ const en = { "memories.field.support": "Support count", "memories.field.toolCalls": "Tool calls", "memories.field.episodeTimeline": "Steps in this task", + "memories.field.steps": "Steps in this turn ({n})", + "memories.card.steps": "{n} steps", // Tooltip helpers for memory metadata fields. Shown when the user // hovers the small "?" icon next to each label so they can find out // what the score means without leaving the drawer. @@ -833,6 +835,8 @@ const zh: Record = { "memories.field.support": "支撑任务数", "memories.field.toolCalls": "工具调用", "memories.field.episodeTimeline": "本任务的其他步骤", + "memories.field.steps": "本轮步骤(共 {n} 步)", + "memories.card.steps": "{n} 步", "memories.help.value": "记忆被捕获时的重要性评分(0–1)。值越高表示助手当时越觉得这条记忆值得保留。", "memories.help.alpha": diff --git a/apps/memos-local-plugin/web/src/views/MemoriesView.tsx b/apps/memos-local-plugin/web/src/views/MemoriesView.tsx index 786588d3c..b6b43a119 100644 --- a/apps/memos-local-plugin/web/src/views/MemoriesView.tsx +++ b/apps/memos-local-plugin/web/src/views/MemoriesView.tsx @@ -1,6 +1,24 @@ /** * Memories view — paginated (prev/next), drawer-driven detail. * + * Display granularity: **one user↔agent turn = one card**. + * + * The capture pipeline writes L1 traces at the step level (V7 §0.1 + * — one tool call → one trace, plus one trace for the final reply) + * because every algorithm consumer (R_human backprop, L2 incremental + * association, Tier-2 retrieval, Decision Repair) needs that step + * granularity. The viewer collapses sibling sub-steps back into a + * single card by grouping on `(episodeId, turnId)` — `turnId` is the + * stable group key `step-extractor` stamps onto every trace produced + * from the same user message. + * + * Bulk actions (select / delete / share / export) operate on whole + * cards: the card-level checkbox toggles the full set of member trace + * ids, the delete button removes every member, and so on. The drawer + * lays out each member step as its own collapsible section so users + * can still inspect per-tool value / reflection without leaving the + * "one round = one memory" mental model. + * * Layout (matches TasksView so all three data browsers feel alike): * * ╭─ view-header ─────────────────────────────────────────╮ @@ -12,12 +30,12 @@ * ╭─ toolbar: filter chips (own row) ──────────────────────╮ * │ [All][User][Assistant][Tool] │ * ╰────────────────────────────────────────────────────────╯ - * ╭─ batch-bar (shows when any row is selected) ──────────╮ + * ╭─ batch-bar (shows when any card is selected) ─────────╮ * │ Selected N [Select page] [Copy] [Delete] [Deselect]│ * ╰────────────────────────────────────────────────────────╯ - * ┌─ row (clickable → opens drawer) ──────────────────────┐ + * ┌─ card (one turn; clickable → opens drawer) ───────────┐ * │ ☐ summary line … │ - * │ · role · [scope] · date · V/α · tools │ + * │ · role · [scope] · date · V/α · tools · steps │ * └──────────────────────────────────────────────────────────┘ * ╭─ pager ───────────────────────────────────────────────╮ * │ [prev] N / total [next] │ @@ -46,6 +64,30 @@ interface ListResponse { total?: number; } +/** + * One displayable card in the Memories list — a "user message + every + * sub-step it produced" unit. `traces` are the raw L1 rows the + * pipeline wrote (tool steps + final reply); `head` is the row that + * carries the user query. `turnKey` is what the page groups on: + * `${episodeId}:${turnId}` (or `${episodeId}:${trace.id}` for legacy + * rows that pre-date migration 013 and have NULL `turnId`). + */ +interface MemoryGroup { + turnKey: string; + episodeId: string | null; + ts: number; + head: TraceDTO; + traces: TraceDTO[]; + ids: string[]; + toolCount: number; + toolNames: string[]; + aggValue: number; + aggAlpha: number; + hasReflection: boolean; + scope: "private" | "public" | "hub"; + shared: boolean; +} + const PAGE_SIZE = 25; export function MemoriesView() { @@ -58,7 +100,7 @@ export function MemoriesView() { const [traces, setTraces] = useState([]); const [hasMore, setHasMore] = useState(false); const [selected, setSelected] = useState>(new Set()); - const [detail, setDetail] = useState(null); + const [detail, setDetail] = useState(null); const [toast, setToast] = useState<{ msg: string; kind: "info" | "success" | "error" } | null>(null); const showToast = (msg: string, kind: "info" | "success" | "error" = "success") => { @@ -104,20 +146,38 @@ export function MemoriesView() { // eslint-disable-next-line react-hooks/exhaustive-deps }, [route.value.params.q]); - const filtered = useMemo(() => { - if (!role) return traces; - return traces.filter((tr) => detectRole(tr) === role); + /** + * Bucket the page's traces by `(episodeId, turnId)` so each "user + * message + every sub-step it produced" collapses into one card. + * Then drop groups whose role doesn't match the chip filter. + */ + const groups = useMemo(() => { + const all = buildGroups(traces); + if (!role) return all; + return all.filter((g) => detectGroupRole(g) === role); }, [traces, role]); - const toggleSel = (id: string) => { + /** + * A card is "selected" when every member trace id is in the + * `selected` set — the per-trace store keeps the existing + * bulk-action APIs (bulkDelete / bulkShare) unchanged. + */ + const isGroupSelected = (g: MemoryGroup): boolean => + g.ids.length > 0 && g.ids.every((id) => selected.has(id)); + + const toggleGroupSel = (g: MemoryGroup) => { setSelected((prev) => { const next = new Set(prev); - if (next.has(id)) next.delete(id); - else next.add(id); + const allIn = g.ids.every((id) => next.has(id)); + for (const id of g.ids) { + if (allIn) next.delete(id); + else next.add(id); + } return next; }); }; - const selectPage = () => setSelected(new Set(filtered.map((t) => t.id))); + const selectPage = () => + setSelected(new Set(groups.flatMap((g) => g.ids))); const deselectAll = () => setSelected(new Set()); const bulkDelete = async () => { @@ -163,12 +223,15 @@ export function MemoriesView() { const bulkExport = () => { if (selected.size === 0) return; const lines: string[] = []; - for (const tr of filtered) { - if (!selected.has(tr.id)) continue; - const head = tr.summary || tr.userText || "(empty)"; + for (const g of groups) { + if (!isGroupSelected(g)) continue; + const head = pickSummary(g.head); lines.push(`# ${head}`); - if (tr.userText) lines.push(`[user] ${tr.userText}`); - if (tr.agentText) lines.push(`[assistant] ${tr.agentText}`); + for (const tr of g.traces) { + if (tr.userText) lines.push(`[user] ${tr.userText}`); + for (const tc of tr.toolCalls ?? []) lines.push(`[tool:${tc.name}] ${truncateForExport(tc)}`); + if (tr.agentText) lines.push(`[assistant] ${tr.agentText}`); + } lines.push(""); } const txt = lines.join("\n"); @@ -182,23 +245,39 @@ export function MemoriesView() { } }; - const deleteOne = async (tr: TraceDTO) => { + /** + * Delete a whole displayed card — i.e. every L1 trace produced by + * the same user message. We POST the full id list to the bulk + * endpoint so partial failures don't leave an orphan group on + * screen. + */ + const deleteGroup = async (g: MemoryGroup) => { if (!confirm(t("memories.delete.confirm"))) return; try { - await api.del(`/api/v1/traces/${encodeURIComponent(tr.id)}`); + if (g.ids.length === 1) { + await api.del(`/api/v1/traces/${encodeURIComponent(g.ids[0]!)}`); + } else { + await api.post<{ deleted: number }>(`/api/v1/traces/delete`, { ids: g.ids }); + } await loadPage({ q: query.trim(), page }); setSelected((prev) => { const n = new Set(prev); - n.delete(tr.id); + for (const id of g.ids) n.delete(id); return n; }); - if (detail?.id === tr.id) setDetail(null); + if (detail?.turnKey === g.turnKey) setDetail(null); showToast(t("memories.delete.done")); } catch { showToast("Failed", "error"); } }; + /** + * The edit modal targets the **head trace** of the group — that's + * the only row that carries `userText` / `summary` / tags (sub-steps + * have empty user text by construction, see `step-extractor`). + * Tool inputs / outputs are immutable. + */ const saveEdit = async ( id: string, patch: { @@ -214,21 +293,41 @@ export function MemoriesView() { patch, ); setTraces((prev) => prev.map((x) => (x.id === id ? updated : x))); - setDetail(updated); + setDetail((prev) => + prev ? rebuildGroupAfterTracePatch(prev, updated) : prev, + ); showToast(t("memories.edit.saved")); } catch { showToast("Failed", "error"); } }; - const applyShare = async (id: string, scope: "private" | "public" | "hub" | null) => { + /** + * Share applies to every trace in the group — they belong to the + * same user turn and should always be public/private together. + */ + const applyShareGroup = async ( + g: MemoryGroup, + scope: "private" | "public" | "hub" | null, + ) => { try { - const updated = await api.post( - `/api/v1/traces/${encodeURIComponent(id)}/share`, - { scope }, + const updates = await Promise.all( + g.ids.map((id) => + api + .post(`/api/v1/traces/${encodeURIComponent(id)}/share`, { scope }) + .catch(() => null), + ), ); - setTraces((prev) => prev.map((x) => (x.id === id ? updated : x))); - setDetail(updated); + const next = traces.map((x) => { + const replacement = updates.find((u) => u && u.id === x.id); + return replacement ?? x; + }); + setTraces(next); + setDetail((prev) => { + if (!prev || prev.turnKey !== g.turnKey) return prev; + const fresh = buildGroups(next).find((x) => x.turnKey === g.turnKey); + return fresh ?? prev; + }); showToast(scope ? t("memories.share.done") : t("memories.share.removed")); } catch { showToast("Failed", "error"); @@ -332,7 +431,7 @@ export function MemoriesView() { )} - {loading && filtered.length === 0 && ( + {loading && groups.length === 0 && (
{[0, 1, 2, 3, 4].map((i) => (
@@ -340,7 +439,7 @@ export function MemoriesView() {
)} - {!loading && filtered.length === 0 && ( + {!loading && groups.length === 0 && (
@@ -350,24 +449,28 @@ export function MemoriesView() {
)} - {filtered.length > 0 && ( + {groups.length > 0 && (
- {filtered.map((trace) => { - const isSel = selected.has(trace.id); - const line = pickSummary(trace); - const roleKey = detectRole(trace); - const scope: "private" | "public" | "hub" = trace.share?.scope ?? "private"; + {groups.map((g) => { + const isSel = isGroupSelected(g); + const line = pickSummary(g.head); + const roleKey = detectGroupRole(g); + const scope = g.scope; + const stepLabel = + g.traces.length > 1 + ? t("memories.card.steps", { n: g.traces.length }) + : null; return (
setDetail(trace)} + onClick={() => setDetail(g)} onKeyDown={(e) => { if (e.key === "Enter" || e.key === " ") { e.preventDefault(); - setDetail(trace); + setDetail(g); } }} > @@ -379,7 +482,7 @@ export function MemoriesView() { type="checkbox" class="mem-card__check" checked={isSel} - onChange={() => toggleSel(trace.id)} + onChange={() => toggleGroupSel(g)} aria-label="select" /> @@ -394,23 +497,24 @@ export function MemoriesView() { {t(`memories.share.scope.${scope}` as never).split(" (")[0]} - {formatTs(trace.ts)} + {formatTs(g.ts)} - V {trace.value.toFixed(2)} · α {trace.alpha.toFixed(2)} + V {g.aggValue.toFixed(2)} · α {g.aggAlpha.toFixed(2)} - {(trace.toolCalls?.length ?? 0) > 0 && ( - tc.name) - .join(", ")} - > + {g.toolCount > 0 && ( + - {summarizeToolNames(trace.toolCalls)} + {summarizeToolNames(g.head.toolCalls?.length ? g.head.toolCalls : flattenToolCallList(g))} + + )} + {stepLabel && ( + + + {stepLabel} )} - {trace.reflection && ( - + {g.hasReflection && ( + {t("memories.card.reflection")} @@ -453,11 +557,11 @@ export function MemoriesView() { {detail && ( setDetail(null)} onSave={saveEdit} - onShare={applyShare} - onDelete={deleteOne} + onShare={(scope) => applyShareGroup(detail, scope)} + onDelete={() => deleteGroup(detail)} /> )} @@ -607,6 +711,103 @@ function detectRole(trace: TraceDTO): "user" | "assistant" | "tool" | "" { return ""; } +/** + * Bucket the page's traces by `(episodeId, turnId)`. Within each + * bucket, sort sub-steps by `ts ascending` and pick the first row + * with a non-empty `userText` as the head — the `step-extractor` + * guarantees this is the first sub-step (`subStepIdx === 0`), but we + * fall back to "earliest by ts" so legacy rows still group cleanly. + * + * Aggregates exposed on the card: + * - `aggValue` / `aggAlpha`: arithmetic mean across members. Plain + * mean keeps the card honest about "how was the whole turn?"; + * per-step values are still visible in the drawer. + * - `toolCount` / `toolNames`: union of every member's `toolCalls`. + * - `scope`: take the head's share state (siblings always share the + * same scope thanks to `applyShareGroup`). + */ +function buildGroups(traces: readonly TraceDTO[]): MemoryGroup[] { + const buckets = new Map(); + const order: string[] = []; + for (const tr of traces) { + const key = groupKey(tr); + let bucket = buckets.get(key); + if (!bucket) { + bucket = []; + buckets.set(key, bucket); + order.push(key); + } + bucket.push(tr); + } + return order.map((key) => { + const bucket = buckets.get(key)!; + bucket.sort((a, b) => a.ts - b.ts); + const head = + bucket.find((t) => (t.userText ?? "").trim().length > 0) ?? bucket[0]!; + const tools = bucket.flatMap((t) => t.toolCalls ?? []); + const ids = bucket.map((t) => t.id); + const sumV = bucket.reduce((acc, t) => acc + (t.value ?? 0), 0); + const sumA = bucket.reduce((acc, t) => acc + (t.alpha ?? 0), 0); + const scope: "private" | "public" | "hub" = head.share?.scope ?? "private"; + return { + turnKey: key, + episodeId: head.episodeId ?? null, + ts: bucket[0]!.ts, + head, + traces: bucket, + ids, + toolCount: tools.length, + toolNames: Array.from(new Set(tools.map((tc) => tc.name))), + aggValue: bucket.length === 0 ? 0 : sumV / bucket.length, + aggAlpha: bucket.length === 0 ? 0 : sumA / bucket.length, + hasReflection: bucket.some((t) => Boolean((t.reflection ?? "").trim())), + scope, + shared: scope !== "private", + }; + }); +} + +function groupKey(tr: TraceDTO): string { + // `turnId` is the stable key stamped by `step-extractor`. Falls back + // to the trace id so legacy rows (NULL turn_id) stand on their own. + const turn = (tr as TraceDTO & { turnId?: number | null }).turnId; + if (typeof turn === "number") return `${tr.episodeId ?? "_"}:${turn}`; + return `${tr.episodeId ?? "_"}:${tr.id}`; +} + +function detectGroupRole(g: MemoryGroup): "user" | "assistant" | "tool" | "" { + if (g.toolCount > 0) return "tool"; + return detectRole(g.head); +} + +function flattenToolCallList(g: MemoryGroup): { name: string }[] { + return g.traces.flatMap((t) => t.toolCalls ?? []); +} + +function truncateForExport(tc: { input?: unknown; output?: unknown; errorCode?: string }): string { + if (tc.errorCode) return `ERROR[${tc.errorCode}]`; + const out = tc.output; + if (out == null) return "(no output)"; + if (typeof out === "string") return out.slice(0, 200); + try { + return JSON.stringify(out).slice(0, 200); + } catch { + return String(out).slice(0, 200); + } +} + +/** + * After the edit modal patches the head trace, rebuild the open + * group so the drawer reflects the new userText / summary / tags + * without a round-trip refetch. + */ +function rebuildGroupAfterTracePatch(prev: MemoryGroup, updated: TraceDTO): MemoryGroup { + const traces = prev.traces.map((t) => (t.id === updated.id ? updated : t)); + const head = + traces.find((t) => (t.userText ?? "").trim().length > 0) ?? traces[0]!; + return { ...prev, traces, head }; +} + function formatTs(ts: number): string { if (!ts) return "—"; try { @@ -618,23 +819,37 @@ function formatTs(ts: number): string { // ─── Right-side drawer ─────────────────────────────────────────────────── -interface TimelineRow { - id: string; - ts: number; - userText: string; - agentText: string; - summary?: string | null; - value: number; -} - +/** + * Right-side drawer for one **MemoryGroup** (= one user turn). + * + * The drawer's job is two-fold: + * 1. Show the user-facing meta the card already hinted at (timestamp, + * aggregate V/α, share state, optional tags) plus the head's + * summary + user query, so the row → detail transition feels + * continuous. + * 2. Surface the full step list — every L1 trace produced from this + * turn — as collapsible sections so users can drill into per-step + * value/α/reflection without leaving the "one round = one memory" + * mental model. The first step (head) is expanded by default. + * + * Edit and share intentionally diverge in scope: + * - **Edit** patches the head trace only — that's the row that + * carries `userText` / `summary` / `tags`. Sub-steps have empty + * user text by construction (`step-extractor` only stamps the + * query onto the first sub-step) and their tool inputs/outputs + * are immutable. + * - **Share** flips every member of the group to the same scope so + * "this turn is public" stays a coherent mental model. + * - **Delete** wipes every member id so the card never half-disappears. + */ function TraceDrawer({ - trace, + group, onClose, onSave, onShare, onDelete, }: { - trace: TraceDTO; + group: MemoryGroup; onClose: () => void; onSave: ( id: string, @@ -645,47 +860,31 @@ function TraceDrawer({ tags?: string[]; }, ) => Promise | void; - onShare: (id: string, scope: "private" | "public" | "hub" | null) => Promise | void; - onDelete: (tr: TraceDTO) => Promise | void; + onShare: (scope: "private" | "public" | "hub" | null) => Promise | void; + onDelete: () => Promise | void; }) { + const head = group.head; const [mode, setMode] = useState<"view" | "edit" | "share">("view"); - const [summary, setSummary] = useState(trace.summary ?? ""); - const [userText, setUserText] = useState(trace.userText ?? ""); - const [agentText, setAgentText] = useState(trace.agentText ?? ""); - const [tags, setTags] = useState((trace.tags ?? []).join(", ")); + const [summary, setSummary] = useState(head.summary ?? ""); + const [userText, setUserText] = useState(head.userText ?? ""); + const [agentText, setAgentText] = useState(head.agentText ?? ""); + const [tags, setTags] = useState((head.tags ?? []).join(", ")); const [scope, setScope] = useState<"private" | "public" | "hub">( - trace.share?.scope ?? "public", + head.share?.scope ?? "public", ); - const [timeline, setTimeline] = useState(null); useEffect(() => { - setSummary(trace.summary ?? ""); - setUserText(trace.userText ?? ""); - setAgentText(trace.agentText ?? ""); - setTags((trace.tags ?? []).join(", ")); - setScope(trace.share?.scope ?? "public"); - }, [trace]); + setSummary(head.summary ?? ""); + setUserText(head.userText ?? ""); + setAgentText(head.agentText ?? ""); + setTags((head.tags ?? []).join(", ")); + setScope(head.share?.scope ?? "public"); + }, [head]); - useEffect(() => { - if (!trace.episodeId) { - setTimeline([]); - return; - } - const ctrl = new AbortController(); - api - .get<{ traces: TimelineRow[] }>( - `/api/v1/episodes/${encodeURIComponent(trace.episodeId)}/timeline`, - { signal: ctrl.signal }, - ) - .then((r) => setTimeline(r.traces ?? [])) - .catch(() => setTimeline([])); - return () => ctrl.abort(); - }, [trace.episodeId]); - - const title = pickSummary(trace).slice(0, 100) || t("memories.detail.fallbackTitle"); + const title = pickSummary(head).slice(0, 100) || t("memories.detail.fallbackTitle"); const submitEdit = () => { - void onSave(trace.id, { + void onSave(head.id, { summary: summary.trim() ? summary.trim() : null, userText, agentText, @@ -698,7 +897,7 @@ function TraceDrawer({ }; const submitShare = (s: "private" | "public" | "hub" | null) => { - void onShare(trace.id, s); + void onShare(s); setMode("view"); }; @@ -708,8 +907,8 @@ function TraceDrawer({
- {trace.episodeId - ? t("memories.detail.fromTask", { id: trace.episodeId.slice(0, 10) }) + {group.episodeId + ? t("memories.detail.fromTask", { id: group.episodeId.slice(0, 10) }) : t("memories.detail.oneMemory")}

{title}

@@ -728,30 +927,28 @@ function TraceDrawer({
{t("memories.field.ts")}
-
{trace.ts ? new Date(trace.ts).toLocaleString() : "—"}
+
{group.ts ? new Date(group.ts).toLocaleString() : "—"}
{t("memories.field.value")}
-
{trace.value.toFixed(3)}
+
{group.aggValue.toFixed(3)}
{t("memories.field.alpha")}
-
{trace.alpha.toFixed(3)}
- {trace.rHuman != null && ( +
{group.aggAlpha.toFixed(3)}
+ {head.rHuman != null && ( <>
{t("memories.field.rHuman")}
-
{trace.rHuman.toFixed(3)}
+
{head.rHuman.toFixed(3)}
)}
{t("memories.field.priority")}
-
{trace.priority.toFixed(3)}
+
{head.priority.toFixed(3)}
{t("memories.field.share")}
- - {trace.share?.scope ?? "private"} - + {group.scope}
- {trace.tags && trace.tags.length > 0 && ( + {head.tags && head.tags.length > 0 && ( <>
tags
- {trace.tags.map((tg) => ( + {head.tags.map((tg) => ( {tg} @@ -762,86 +959,27 @@ function TraceDrawer({
- {trace.summary && ( + {head.summary && (
{t("memories.field.summary")}
-
{trace.summary}
+
{head.summary}
)} - {trace.userText && ( + {head.userText && (
{t("memories.field.user")}
-                    {trace.userText}
+                    {head.userText}
                   
)} - {trace.agentText && ( -
-
- {t("memories.field.assistant")} -
-
-                    {trace.agentText}
-                  
-
- )} - - {trace.reflection && ( -
-
- {t("memories.field.takeaway")} -
-
-                    {trace.reflection}
-                  
-
- )} - - {trace.toolCalls && trace.toolCalls.length > 0 && ( -
-

- {t("memories.field.toolCalls")} -

-
- {trace.toolCalls.map((tc, i) => ( - - ))} -
-
- )} - - {timeline && timeline.length > 1 && ( -
-

- {t("memories.field.episodeTimeline")} ({timeline.length}) -

-
- {timeline.map((tr) => ( -
-
-
- {tr.summary?.slice(0, 100) || - tr.userText?.slice(0, 100) || - tr.agentText?.slice(0, 100) || - "(step)"} -
-
- V {tr.value.toFixed(2)} - {new Date(tr.ts).toLocaleTimeString()} -
-
-
- ))} -
-
- )} + )} @@ -923,14 +1061,14 @@ function TraceDrawer({