Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 39 additions & 19 deletions apps/memos-local-plugin/adapters/openclaw/bridge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,15 @@ export function flattenMessages(input: unknown[] | undefined): FlatMessage[] {
}
for (const tc of inlineToolCalls) out.push(tc);

// OpenAI legacy: assistant has a top-level `tool_calls` array
// (separate from content). Fold these in after pi-ai inline
// tool calls so order is preserved when both shapes coexist.
if (Array.isArray(m.tool_calls)) {
// OpenAI-legacy fallback only: when the message has NO pi-ai
// inline tool calls but does have a top-level `tool_calls` array
// (pure OpenAI Function-Calling shape). When both shapes coexist
// (as OpenClaw's pi-ai bundled OpenAI adapter does), pi-ai
// already populated `content[].toolCall`, so re-reading the
// top-level field would emit each call twice — which in turn
// causes `extractTurn`'s `pendingCalls.set(key, …)` to clobber
// the first stub's `thinkingBefore` with an empty second stub.
if (inlineToolCalls.length === 0 && Array.isArray(m.tool_calls)) {
for (const tc of m.tool_calls as Array<Record<string, unknown>>) {
const fn = tc.function as Record<string, unknown> | undefined;
if (!fn) continue;
Expand Down Expand Up @@ -476,35 +481,51 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
const userText = messages[lastUserIdx].content.trim();
const tail = messages.slice(lastUserIdx + 1);

const assistantParts: string[] = [];
const thinkingParts: string[] = [];
const pendingCalls = new Map<string, Partial<ToolCallDTO> & { _id?: string }>();
const toolCalls: ToolCallDTO[] = [];

// Two separate buffers accumulate content not yet assigned to a tool.
//
// `pendingThinking`: Claude extended-thinking blocks (`ThinkingContent`)
// `pendingAssistant`: regular model text (`TextContent`)
//
// When a `tool_call` arrives, BOTH buffers are flushed together into
// that tool's `thinkingBefore` — this is the reasoning (structured OR
// natural language) the model did before deciding to invoke the tool.
//
// After all messages are processed, whatever remains in the buffers
// forms the final output: `pendingAssistant` → `agentText` (the
// reply) and `pendingThinking` → `agentThinking` (model reasoning
// shown in a dedicated bubble for non-tool turns).
let pendingThinking: string[] = [];
let pendingAssistant: string[] = [];

for (const m of tail) {
if (m.role === "assistant") {
if (m.content) assistantParts.push(m.content);
if (m.content) pendingAssistant.push(m.content);
continue;
}
if (m.role === "thinking") {
if (m.content) thinkingParts.push(m.content);
if (m.content) pendingThinking.push(m.content);
continue;
}
if (m.role === "tool_call" && m.toolName) {
// Assistant decided to call a tool. Stash until the matching
// tool_result lands so we can stitch the full ToolCallDTO.
const parts = [...pendingThinking, ...pendingAssistant];
const thinkingBefore = parts.join("\n\n").trim() || undefined;
pendingThinking = [];
pendingAssistant = [];

const key = m.toolCallId ?? m.toolName;
pendingCalls.set(key, {
_id: m.toolCallId,
name: m.toolName,
input: m.toolInput,
startedAt: m.ts ?? now,
thinkingBefore,
});
continue;
}
if (m.role === "tool_result") {
// Pair by id (preferred — works even when two parallel calls hit
// the same tool name) or fall back to toolName.
const key = m.toolCallId ?? m.toolName ?? "";
const stub = pendingCalls.get(key);
const errorCode = stub
Expand All @@ -517,16 +538,13 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
errorCode,
startedAt: stub?.startedAt ?? (m.ts ?? now),
endedAt: m.ts ?? now,
thinkingBefore: stub?.thinkingBefore,
});
if (key) pendingCalls.delete(key);
continue;
}
// system / unknown: ignore for the purpose of extractTurn.
}

// Any tool call that never received a paired tool_result still lands
// in the trace (with `output: undefined`) so the viewer can show
// "tool was invoked but produced no result".
for (const stub of pendingCalls.values()) {
if (!stub.name) continue;
toolCalls.push({
Expand All @@ -535,14 +553,15 @@ export function extractTurn(messages: FlatMessage[], now: number): CapturedTurn
output: undefined,
startedAt: stub.startedAt ?? now,
endedAt: now,
thinkingBefore: stub.thinkingBefore,
});
}

const agentThinking = thinkingParts.join("\n\n").trim();
const agentThinking = pendingThinking.join("\n\n").trim();
return {
userText,
agentText: assistantParts.join("\n\n").trim(),
agentThinking: agentThinking ? agentThinking : undefined,
agentText: pendingAssistant.join("\n\n").trim(),
agentThinking: agentThinking || undefined,
toolCalls,
};
}
Expand Down Expand Up @@ -796,6 +815,7 @@ export function createOpenClawBridge(opts: BridgeOptions): BridgeHandle {
hasError: !!event.error,
});


try {
// Legacy adapter parity: even when `success === false` we still
// enqueue the user's message (and whatever the assistant managed
Expand Down
10 changes: 10 additions & 0 deletions apps/memos-local-plugin/agent-contract/dto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ export interface ToolCallDTO {
errorCode?: string;
startedAt: EpochMs;
endedAt: EpochMs;
/**
* LLM-native thinking emitted *before* the model decided to invoke this
* tool — e.g. "I got an error from tool_1, let me try a different
* approach". Populated by the adapter when the model interleaves
* thinking blocks between tool calls. `undefined` for legacy data or
* when no thinking preceded this particular call.
*
* Stored inside `tool_calls_json` (no schema migration needed).
*/
thinkingBefore?: string;
}

export interface TurnInputDTO {
Expand Down
28 changes: 19 additions & 9 deletions apps/memos-local-plugin/core/capture/normalizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,25 @@ export function normalizeSteps(
continue;
}

const last = out[out.length - 1];
if (
last &&
last.agentText === agentText &&
last.userText === userText &&
sameToolCalls(last.toolCalls, toolCalls)
) {
log.debug("normalize.skip_duplicate", { key: step.key });
continue;
// Sub-steps produced by the per-tool-call extractor (V7 §0.1) have
// intentionally-identical userText="" / agentText="" and carry only
// a single tool call each — but two different tools can still share
// a short input fingerprint, which the generic dedup path below
// would incorrectly collapse. Skip dedup for sub-steps; the key
// uniqueness guarantees they can't be genuine duplicates.
const isSubStep = (step.meta as Record<string, unknown> | undefined)?.subStep === true;

if (!isSubStep) {
const last = out[out.length - 1];
if (
last &&
last.agentText === agentText &&
last.userText === userText &&
sameToolCalls(last.toolCalls, toolCalls)
) {
log.debug("normalize.skip_duplicate", { key: step.key });
continue;
}
}

out.push({
Expand Down
11 changes: 9 additions & 2 deletions apps/memos-local-plugin/core/capture/step-extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,11 @@ function segmentToSteps(
out.push({
key: `${episode.id}:${ts}:tool:${i}`,
ts,
userText,
// Only the first sub-step carries the user query; subsequent
// sub-steps leave `userText` empty so the viewer's flattenChat
// doesn't render the same user bubble N times. The turn's
// provenance (episodeId) still links them together.
userText: i === 0 ? userText : "",
agentText: "",
agentThinking: i === 0 ? fullThinking : null,
toolCalls: [tc],
Expand Down Expand Up @@ -232,13 +236,15 @@ function toolCallFromTurn(turn: EpisodeTurn): ToolCallDTO | null {
const endedAt = typeof meta.endedAt === "number" ? meta.endedAt : turn.ts;
const input = meta.input ?? meta.args ?? undefined;
const errorCode = typeof meta.errorCode === "string" ? meta.errorCode : undefined;
const thinkingBefore = typeof meta.thinkingBefore === "string" ? meta.thinkingBefore : undefined;
return {
name,
input,
output: turn.content,
errorCode,
startedAt,
endedAt,
thinkingBefore,
};
}

Expand All @@ -264,7 +270,8 @@ function coerceToolCall(raw: unknown): ToolCallDTO | null {
const startedAt =
typeof r.startedAt === "number" ? r.startedAt : Date.now();
const endedAt = typeof r.endedAt === "number" ? r.endedAt : startedAt;
return { name, input, output, errorCode, startedAt, endedAt };
const thinkingBefore = typeof r.thinkingBefore === "string" ? r.thinkingBefore : undefined;
return { name, input, output, errorCode, startedAt, endedAt, thinkingBefore };
}

function depthFromMeta(meta: Record<string, unknown>): number {
Expand Down
22 changes: 14 additions & 8 deletions apps/memos-local-plugin/core/config/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,21 +157,27 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
episodeGoalMinSim: 0.45,
tagFilter: "auto",
keywordTopK: 20,
relativeThresholdFloor: 0.4,
// Lowered from 0.4 → 0.2 with the 2026 ranker overhaul: the new
// base relevance already uses channel rank as a first-class
// signal, so the old 0.4 floor was over-pruning keyword hits
// with modest V·decay.
relativeThresholdFloor: 0.2,
skillEtaBlend: 0.15,
smartSeed: true,
smartSeedRatio: 0.7,
multiChannelBypass: true,
skillInjectionMode: "summary",
skillSummaryChars: 200,
llmFilterEnabled: true,
// Tighter than the legacy default (5) so the LLM filter has a
// budget that forces "drop, don't pad". Combined with the
// few-shot prompt this dramatically improves precision.
// small budget; combined with the richer prompt (v3) this keeps
// packets concise without over-dropping.
llmFilterMaxKeep: 4,
// Lowered from 32: small packets (e.g. just a Tier-1 skill +
// a Tier-2 trace) used to skip the LLM filter entirely and ship
// both items even when one was tangential. Now anything > 1
// candidate gets a precision pass.
llmFilterMinCandidates: 2,
// Lowered from 21: even a single candidate gets a precision
// pass. Mirrors `memos-local-openclaw`'s tool-level filter and
// prevents a lone off-topic memory from sneaking through unchecked.
llmFilterMinCandidates: 1,
llmFilterCandidateBodyChars: 500,
},
},
hub: {
Expand Down
49 changes: 41 additions & 8 deletions apps/memos-local-plugin/core/config/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,14 @@ const AlgorithmSchema = Type.Object({
* `minTraceSim` — when the best hit is weak, we keep more (lower
* absolute floor); when there's a clear winner, we drop noise.
* Set to 0 to disable the relative cutoff entirely.
*
* Default lowered to 0.2 with the 2026 ranker overhaul: the new
* base formula already weighs channel-rank evidence (so a raw
* FTS-only hit lands in a comparable range to a cosine-0.8 hit),
* and the old 0.4 floor was over-pruning keyword matches with
* modest V·decay.
*/
relativeThresholdFloor: NumberInRange(0.4, 0, 1),
relativeThresholdFloor: NumberInRange(0.2, 0, 1),
/**
* Tier-1 skill relevance blend weight for `η` (skill reliability).
* Old default `0.4` made well-trodden skills outrank obviously-more-
Expand All @@ -333,12 +339,28 @@ const AlgorithmSchema = Type.Object({
skillEtaBlend: NumberInRange(0.15, 0, 1),
/**
* MMR Phase-A seed-by-tier policy. When `true`, only seed a tier
* if its best candidate's relevance ≥ `relativeThresholdFloor *
* topRelevance`. This prevents the ranker from force-injecting a
* stale Tier-1 skill / Tier-3 world-model just because it cleared
* the absolute floors.
* if its best candidate's relevance ≥ `poolTopRelevance *
* smartSeedRatio` (see below). This prevents the ranker from
* force-injecting a stale Tier-1 skill / Tier-3 world-model just
* because it cleared the absolute floors.
*/
smartSeed: Bool(true),
/**
* Seed cutoff for smart-seed MMR — tier is seeded iff its best
* candidate's relevance ≥ `poolTopRelevance * smartSeedRatio`.
* Independent of `relativeThresholdFloor` so the seed gate can be
* stricter than the generic drop floor (0.7 is "within 30% of the
* best available candidate anywhere in the pool").
*/
smartSeedRatio: NumberInRange(0.7, 0, 1),
/**
* When a candidate is surfaced by ≥ 2 retrieval channels (e.g.
* both vec and fts hit the same trace), bypass the relative
* threshold. Multi-channel agreement is a strong signal, and
* without this keyword-only matches with modest V·decay often
* get dropped by a noisy `topRelevance`.
*/
multiChannelBypass: Bool(true),
/**
* How Tier-1 skills are surfaced in the injected prompt:
* - "summary" (default): inject only `name + η + 1-line summary +
Expand Down Expand Up @@ -368,10 +390,21 @@ const AlgorithmSchema = Type.Object({
/** Keep at most this many candidates after the LLM filter. */
llmFilterMaxKeep: NumberInRange(5, 1, 30),
/**
* Skip the filter when the ranked list already has ≤ this many
* items — no point paying an LLM round-trip to prune 3 candidates.
* Skip the filter when the ranked list has fewer than this many
* items. Default 1 — even a single candidate gets a precision
* pass, matching `memos-local-openclaw`'s tool-level filter and
* preventing a lone off-topic memory from sneaking through
* unchecked.
*/
llmFilterMinCandidates: NumberInRange(1, 1, 50),
/**
* Body-text budget per candidate when building the LLM filter
* prompt. Higher = more context for precise judgement, at the
* cost of more tokens per round-trip. Default 500 (openclaw uses
* 300 without tags/channels; we include richer metadata, so a
* slightly larger window pays for itself).
*/
llmFilterMinCandidates: NumberInRange(4, 1, 50),
llmFilterCandidateBodyChars: NumberInRange(500, 120, 2000),
}, { default: {} }),
}, { default: {} });

Expand Down
Loading
Loading