diff --git a/ITBench b/ITBench new file mode 160000 index 00000000..29544a54 --- /dev/null +++ b/ITBench @@ -0,0 +1 @@ +Subproject commit 29544a547e77e862dbc450e0f3c1ec8b3935674a diff --git a/package.json b/package.json index 95d8172d..53395d2f 100644 --- a/package.json +++ b/package.json @@ -94,7 +94,7 @@ "copy:web": "node -e \"const fs=require('fs');const p=require('path');const s=p.join('src','gateway','web','dist');const d=p.join('dist','gateway','web','dist');if(fs.existsSync(s)){fs.cpSync(s,d,{recursive:true});console.log('Copied web UI to '+d)}\"", "prepublishOnly": "npm run build:web && npm run build", "dev": "tsx src/cli-main.ts", - "dev:gateway": "npm run build:web && tsx src/gateway-main.ts", + "dev:gateway": "[ -f .siclaw/trace-env.sh ] && . .siclaw/trace-env.sh; npm run build:web && tsx src/gateway-main.ts", "dev:agentbox": "tsx src/agentbox-main.ts", "start": "node siclaw-tui.mjs", "start:gateway": "node siclaw-gateway.mjs", diff --git a/siclaw-trace-db-full.yaml b/siclaw-trace-db-full.yaml new file mode 100644 index 00000000..43fdd8f0 --- /dev/null +++ b/siclaw-trace-db-full.yaml @@ -0,0 +1,114 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: siclaw-trace-db-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: local-hostpath + volumeName: siclaw-trace-db-pv +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: siclaw-trace-db + labels: + app: siclaw-trace-db +spec: + replicas: 1 + selector: + matchLabels: + app: siclaw-trace-db + template: + metadata: + labels: + app: siclaw-trace-db + spec: + nodeName: cpu-10-208-55-85 + initContainers: + - name: init-mkdir + image: busybox + command: ["sh", "-c", "mkdir -p /data/siclaw-trace-db && chmod 777 /data/siclaw-trace-db"] + volumeMounts: + - name: host-data + mountPath: /data + securityContext: + privileged: true + containers: + - name: siclaw-trace-db + image: mysql:8.0 + ports: + - containerPort: 3306 + env: + - name: MYSQL_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: siclaw-trace-db-secret + key: mysql-root-password + - name: MYSQL_DATABASE + valueFrom: + secretKeyRef: + name: siclaw-trace-db-secret + key: mysql-database + - name: MYSQL_USER + valueFrom: + secretKeyRef: + name: siclaw-trace-db-secret + key: mysql-user + - name: MYSQL_PASSWORD + valueFrom: + secretKeyRef: + name: siclaw-trace-db-secret + key: mysql-password + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + volumeMounts: + - name: mysql-storage + mountPath: /var/lib/mysql + livenessProbe: + exec: + command: + - mysqladmin + - ping + - -h + - localhost + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + exec: + command: + - mysqladmin + - ping + - -h + - localhost + initialDelaySeconds: 15 + periodSeconds: 5 + volumes: + - name: mysql-storage + persistentVolumeClaim: + claimName: siclaw-trace-db-pvc + - name: host-data + hostPath: + path: /data + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: Service +metadata: + name: siclaw-trace-db +spec: + selector: + app: siclaw-trace-db + ports: + - protocol: TCP + port: 3306 + targetPort: 3306 + type: ClusterIP diff --git a/siclaw-trace-db-pv.yaml b/siclaw-trace-db-pv.yaml new file mode 100644 index 00000000..11841ee3 --- /dev/null +++ b/siclaw-trace-db-pv.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: siclaw-trace-db-pv +spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: local-hostpath + local: + path: /data/siclaw-trace-db + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - cpu-10-208-55-85 diff --git a/src/agentbox/http-server.ts b/src/agentbox/http-server.ts index 22553c16..a37afd3c 100644 --- a/src/agentbox/http-server.ts +++ b/src/agentbox/http-server.ts @@ -241,7 +241,7 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S * The message is sent to the Agent, and responses are returned via SSE stream. */ addRoute("POST", "/api/prompt", async (req, res) => { - const body = (await parseJsonBody(req)) as { sessionId?: string; text?: string; mode?: SessionMode; modelProvider?: string; modelId?: string; brainType?: BrainType; systemPromptTemplate?: string; modelConfig?: Record; credentials?: { manifest: Array>; files: Array<{ name: string; content: string; mode?: number }> } }; + const body = (await parseJsonBody(req)) as { sessionId?: string; text?: string; mode?: SessionMode; modelProvider?: string; modelId?: string; brainType?: BrainType; systemPromptTemplate?: string; modelConfig?: Record; credentials?: { manifest: Array>; files: Array<{ name: string; content: string; mode?: number }> }; username?: string }; if (!body.text) { sendJson(res, 400, { error: "Missing 'text' field" }); @@ -321,7 +321,11 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S // Subscribe to buffer events so SSE can replay them even if it connects late const brainUnsub = managed.brain.subscribe((event) => { if (!managed._promptDone) { - managed._eventBuffer.push(event); + // Stamp with server time when emitted so replayed events have accurate timestamps + const tsEvent = typeof event === "object" && event !== null + ? { ...(event as object), ts: Date.now() } + : event; + managed._eventBuffer.push(tsEvent); } // Null dpState.checklist when deep_search completes — this is the exit signal // for the SDK brain's auto-continue loop in claude-sdk-brain.ts. @@ -343,6 +347,7 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S type: "tool_progress", toolName: "deep_search", progress: event, + ts: Date.now(), }); } // Sync phase events to SDK brain's dpState so the auto-continue loop @@ -436,6 +441,19 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S } catch { /* best-effort, don't block prompt */ } } + // Mark the explicit trace boundary: ONE user prompt = ONE trace file, even + // if pi-agent internally fires multiple agent_start/end cycles (retry or + // auto-compaction). beginPrompt is the start, endPrompt is called below in + // actuallyFinish() after the whole prompt (including any retries) settles. + // Also forward the displayable username so filenames use "admin" instead of + // the internal hex userId. + if (managed._traceRecorder) { + try { + if (typeof body.username === "string" && body.username) managed._traceRecorder.setUsername(body.username); + if (typeof body.text === "string") await managed._traceRecorder.beginPrompt(body.text); + } catch (err) { console.warn("[agentbox-http] trace-recorder beginPrompt failed:", err); } + } + // Execute prompt asynchronously; notify SSE to close on completion console.log(`[agentbox-http] Starting prompt for session ${managed.id} [lang=${detectedLang}]`); @@ -461,6 +479,18 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S userId: sessionManager.userId, }); + // Flush the explicit trace — fires ONCE per user prompt, even if pi-agent + // internally executed multiple agent_start/end cycles (retry, compaction). + // actuallyFinish() is the definitive "prompt is truly done" point (it waits + // for auto_compaction_end / auto_retry_end before firing). + if (managed._traceRecorder) { + // Fire-and-forget: actuallyFinish() is a sync callback, and we don't + // want to block the SSE stream close on DB flush. Failures are warned. + managed._traceRecorder.endPrompt(promptOutcome).catch((err) => { + console.warn("[agentbox-http] trace-recorder endPrompt failed:", err); + }); + } + // Stop buffering if (managed._bufferUnsub) { managed._bufferUnsub(); @@ -551,7 +581,11 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S if (closed || res.writableEnded) return; try { sseEventCount++; - const data = JSON.stringify(event); + // Add server timestamp if not already present (buffered events carry their original ts) + const out = typeof event === "object" && event !== null && !("ts" in (event as object)) + ? { ...(event as object), ts: Date.now() } + : event; + const data = JSON.stringify(out); res.write(`data: ${data}\n\n`); } catch (err) { console.warn(`[agentbox-http] SSE write error for session ${sessionId}:`, err); @@ -674,6 +708,14 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S } console.log(`[agentbox-http] Steering session ${sessionId}: ${body.text.slice(0, 80)}`); + // Record a standalone trace row for this steer BEFORE the brain consumes + // it. Steer messages bypass the /api/prompt path that normally triggers + // beginPrompt(), so without this the DP button clicks ([DP_CONFIRM], + // [DP_ADJUST], [DP_SKIP], [DP_REINVESTIGATE]) leave zero audit trail. + if (managed._traceRecorder) { + try { await managed._traceRecorder.recordSteerEvent(body.text); } + catch (err) { console.warn("[agentbox-http] recordSteerEvent failed:", err); } + } try { await managed.brain.steer(body.text); sendJson(res, 200, { ok: true }); @@ -993,6 +1035,11 @@ export function createHttpServer(sessionManager: AgentBoxSessionManager): http.S } }); + // NOTE: /api/traces routes are intentionally NOT registered here. + // They live on the Gateway (src/gateway/server.ts) because querying the + // trace DB is a pure read that must not depend on the lazy-spawned + // AgentBox — you shouldn't have to send a prompt before you can query. + // ==================== Server ==================== /** Main request handler shared by HTTP and HTTPS servers */ diff --git a/src/agentbox/session.ts b/src/agentbox/session.ts index d4d939a4..3198a1e0 100644 --- a/src/agentbox/session.ts +++ b/src/agentbox/session.ts @@ -24,6 +24,7 @@ import { saveSessionKnowledge } from "../memory/session-summarizer.js"; import type { DpState } from "../tools/workflow/dp-tools.js"; import { loadConfig, getEmbeddingConfig } from "../core/config.js"; import { emitDiagnostic } from "../shared/diagnostic-events.js"; +import { maybeCreateTraceRecorder, type TraceRecorder } from "../core/trace-recorder.js"; // topic-consolidator import removed — consolidation disabled export interface ManagedSession { @@ -70,6 +71,10 @@ export interface ManagedSession { _lastSavedMessageCount: number; /** Pending release timer (cleared when a new prompt arrives before TTL expires) */ _releaseTimer: ReturnType | null; + /** Trace recorder — writes per-prompt JSON to .siclaw/traces. null when disabled. */ + _traceRecorder?: TraceRecorder | null; + /** Unsubscribe fn for trace recorder's brain subscription. */ + _traceUnsub?: (() => void) | null; } export interface PersistedDpStateSnapshot { @@ -262,6 +267,27 @@ export class AgentBoxSessionManager { this.sessions.set(id, managed); emitDiagnostic({ type: "session_created", sessionId: id }); + // Trace recorder — writes per-prompt JSON traces to .siclaw/traces for + // offline retrospective. Filesystem only, not exposed via HTTP/SSE/WS. + // Disable with SICLAW_TRACE_DISABLE=1; override path with SICLAW_TRACE_DIR. + try { + const recorder = await maybeCreateTraceRecorder({ + sessionId: id, + userId: this.userId, + mode: effectiveMode, + brainType: effectiveBrainType, + getSessionStats: () => managed!.brain.getSessionStats(), + getModel: () => managed!.brain.getModel(), + dpStateRef: result.dpStateRef, + }); + if (recorder) { + managed._traceRecorder = recorder; + managed._traceUnsub = recorder.attach(managed.brain); + } + } catch (err) { + console.warn(`[agentbox-session] Trace recorder setup failed for ${id}:`, err); + } + // Tool execution timing (for tool_call diagnostic events). // NOTE: tool_execution_start/end events depend on the brain implementation. // claude-sdk brain emits them reliably; pi-agent brain depends on the SDK's @@ -491,6 +517,18 @@ export class AgentBoxSessionManager { console.warn(`[agentbox-session] Memory auto-save failed for ${sessionId}:`, err); } + // 1b. Close trace recorder — flushes any in-flight trace to disk. + if (managed._traceUnsub) { + try { managed._traceUnsub(); } catch { /* ignore */ } + managed._traceUnsub = null; + } + if (managed._traceRecorder) { + try { await managed._traceRecorder.close(); } catch (err) { + console.warn(`[agentbox-session] Trace recorder close failed for ${sessionId}:`, err); + } + managed._traceRecorder = null; + } + // 2. Shutdown per-session MCP connections if (managed.mcpManager) { try { diff --git a/src/cli-main.ts b/src/cli-main.ts index d1f8f316..bd9b649f 100644 --- a/src/cli-main.ts +++ b/src/cli-main.ts @@ -1,4 +1,5 @@ import fs from "node:fs"; +import os from "node:os"; import path from "node:path"; import { InteractiveMode, @@ -13,6 +14,7 @@ import { saveSessionKnowledge } from "./memory/session-summarizer.js"; // topic-consolidator import removed — consolidation disabled import type { BrainType } from "./core/brain-session.js"; import { debugPodGC, debugPodCache } from "./tools/infra/debug-pod.js"; +import { maybeCreateTraceRecorder } from "./core/trace-recorder.js"; // Parse arguments @@ -116,6 +118,41 @@ if (memoryIndexer) { .catch(err => console.warn("[siclaw] Startup maintenance failed:", err)); } +// Trace recorder — writes per-prompt JSON traces to .siclaw/traces for offline +// retrospective. Not exposed via HTTP/SSE. Disable with SICLAW_TRACE_DISABLE=1. +const osUsername = (() => { try { return os.userInfo().username; } catch { return process.env.USER ?? "unknown"; } })(); +const traceRecorder = await maybeCreateTraceRecorder({ + sessionId: sessionManager.getSessionId?.() ?? `cli-${Date.now()}`, + userId: osUsername, + username: osUsername, + mode: "cli", + brainType: brain.brainType, + getSessionStats: () => brain.getSessionStats(), + getModel: () => brain.getModel(), +}); +if (traceRecorder) { + traceRecorder.attach(brain); + const traceDir = process.env.SICLAW_TRACE_DIR ?? path.join(process.cwd(), ".siclaw", "traces"); + console.log(`[siclaw] Trace recording → ${path.relative(process.cwd(), traceDir) || traceDir}`); + + // Wrap session.prompt (what InteractiveMode calls) so each user-initiated + // prompt yields exactly ONE trace file, even if pi-agent internally runs + // multiple agent_start/end cycles (empty-response retry, auto-compaction). + const origSessionPrompt = session.prompt.bind(session); + (session as unknown as { prompt: (text: string) => Promise }).prompt = async (text: string) => { + await traceRecorder.beginPrompt(text); + let outcome: "completed" | "error" = "completed"; + try { + await origSessionPrompt(text); + } catch (err) { + outcome = "error"; + throw err; + } finally { + await traceRecorder.endPrompt(outcome); + } + }; +} + // Debug: subscribe to all session events and write to log file if (debugMode) { const logFile = path.join(process.cwd(), "siclaw-debug.log"); @@ -228,6 +265,10 @@ if (session.sessionFile) { } } +// Close trace recorder — flushes any in-flight trace. +if (traceRecorder) { + try { await traceRecorder.close(); } catch { /* ignore */ } +} // Clean up cached debug pods try { await debugPodCache.evictAll(); } catch { /* ignore */ } // Shutdown MCP connections diff --git a/src/core/trace-recorder.test.ts b/src/core/trace-recorder.test.ts new file mode 100644 index 00000000..aff24c55 --- /dev/null +++ b/src/core/trace-recorder.test.ts @@ -0,0 +1,529 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import fs from "node:fs"; +import path from "node:path"; +import os from "node:os"; +import { TraceRecorder } from "./trace-recorder.js"; +import { emitDiagnostic } from "../shared/diagnostic-events.js"; + +describe("TraceRecorder", () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "trace-rec-")); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + function makeRecorder(sessionId = "sess-1") { + return new TraceRecorder({ + traceDir: tmpDir, + sessionId, + userId: "u1", + mode: "cli", + brainType: "pi-agent", + getSessionStats: () => ({ + tokens: { input: 100, output: 50, cacheRead: 10, cacheWrite: 5, total: 165 }, + cost: 0.01, + }), + getModel: () => ({ + id: "test-model", + name: "Test", + provider: "fake", + contextWindow: 8000, + maxTokens: 2000, + reasoning: false, + }), + }); + } + + function readTraces(): Array> { + return fs + .readdirSync(tmpDir) + .filter((f) => f.endsWith(".json")) + .map((f) => JSON.parse(fs.readFileSync(path.join(tmpDir, f), "utf-8"))); + } + + it("writes a JSON trace on agent_start → agent_end with tool calls", () => { + const rec = makeRecorder(); + const listeners: Array<(e: unknown) => void> = []; + const fakeBrain = { + brainType: "pi-agent" as const, + subscribe(fn: (e: unknown) => void) { + listeners.push(fn); + return () => { + const i = listeners.indexOf(fn); + if (i >= 0) listeners.splice(i, 1); + }; + }, + } as any; + + rec.attach(fakeBrain); + const emit = (e: unknown) => listeners.forEach((fn) => fn(e)); + + emit({ type: "message_end", message: { role: "user", content: "check pods" } }); + emit({ type: "agent_start" }); + emit({ type: "turn_start" }); + emit({ + type: "tool_execution_start", + toolName: "restricted_bash", + toolCallId: "tc1", + args: { command: "kubectl get pods" }, + }); + emit({ + type: "tool_execution_end", + toolName: "restricted_bash", + toolCallId: "tc1", + result: { content: [{ type: "text", text: "pod1 Running" }] }, + }); + emit({ + type: "message_end", + message: { + role: "assistant", + stopReason: "end_turn", + content: [{ type: "text", text: "All pods healthy." }], + }, + }); + emit({ type: "turn_end" }); + emit({ type: "agent_end" }); + + const traces = readTraces(); + expect(traces).toHaveLength(1); + const t = traces[0] as any; + expect(t.sessionId).toBe("sess-1"); + expect(t.userMessage).toBe("check pods"); + expect(t.outcome).toBe("completed"); + expect(t.mode).toBe("cli"); + expect(t.brainType).toBe("pi-agent"); + + const toolCallSteps = t.steps.filter((s: any) => s.kind === "tool_call"); + expect(toolCallSteps).toHaveLength(1); + expect(toolCallSteps[0].name).toBe("restricted_bash"); + expect(toolCallSteps[0].args).toEqual({ command: "kubectl get pods" }); + expect(toolCallSteps[0].output).toBe("pod1 Running"); + expect(toolCallSteps[0].isError).toBe(false); + expect(typeof toolCallSteps[0].durationMs).toBe("number"); + // Beijing-time strings: "YYYY-MM-DD HH:mm:ss.SSS" + expect(toolCallSteps[0].startedAt).toMatch(/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}$/); + expect(toolCallSteps[0].endedAt).toMatch(/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}$/); + + const msgSteps = t.steps.filter((s: any) => s.kind === "message"); + expect(msgSteps[0].text).toBe("All pods healthy."); + expect(msgSteps[0].role).toBe("assistant"); + expect(msgSteps[0].ts).toMatch(/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}$/); + + expect(t.startedAt).toMatch(/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}$/); + expect(t.endedAt).toMatch(/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}$/); + + expect(t.stats.tokensDelta).toBeDefined(); + // schemaVersion 1.2 since isInjectedPrompt + dpStatusEnd were added. + expect(t.schemaVersion).toBe("1.2"); + expect(typeof t.isInjectedPrompt).toBe("boolean"); + expect(t.dpStatusEnd).toBe("idle"); + // Redundant fields removed. + expect(t.traceId).toBeUndefined(); + expect(t.eventCount).toBeUndefined(); + expect(t.stats.before).toBeUndefined(); + expect(t.stats.after).toBeUndefined(); + }); + + it("records skill via local_script (Path B)", () => { + const rec = makeRecorder(); + const listeners: Array<(e: unknown) => void> = []; + const fakeBrain = { + brainType: "pi-agent" as const, + subscribe(fn: (e: unknown) => void) { listeners.push(fn); return () => {}; }, + } as any; + rec.attach(fakeBrain); + const emit = (e: unknown) => listeners.forEach((fn) => fn(e)); + + emit({ type: "agent_start" }); + emit({ + type: "tool_execution_start", + toolName: "local_script", + toolCallId: "tc1", + args: { skill: "pod-diagnosis", script: "check.sh" }, + }); + emit({ + type: "tool_execution_end", + toolName: "local_script", + toolCallId: "tc1", + result: { content: [{ type: "text", text: "ok" }] }, + }); + emit({ type: "agent_end" }); + + const t = readTraces()[0] as any; + const toolCall = t.steps.find((s: any) => s.kind === "tool_call"); + expect(toolCall.skill).toEqual({ skillName: "pod-diagnosis", scriptName: "check.sh", via: "local_script" }); + expect(t.skillsUsed).toHaveLength(1); + expect(t.skillsUsed[0]).toMatchObject({ skillName: "pod-diagnosis", via: "local_script" }); + }); + + it("records skill via read(SKILL.md) (Path A)", () => { + const rec = makeRecorder(); + const listeners: Array<(e: unknown) => void> = []; + const fakeBrain = { + brainType: "pi-agent" as const, + subscribe(fn: (e: unknown) => void) { listeners.push(fn); return () => {}; }, + } as any; + rec.attach(fakeBrain); + const emit = (e: unknown) => listeners.forEach((fn) => fn(e)); + + emit({ type: "agent_start" }); + emit({ + type: "tool_execution_start", + toolName: "read", + toolCallId: "r1", + args: { path: "/home/yye/siclaw/skills/core/cluster-events/SKILL.md" }, + }); + emit({ + type: "tool_execution_end", + toolName: "read", + toolCallId: "r1", + result: { content: [{ type: "text", text: "# Cluster Events\n..." }] }, + }); + emit({ + type: "tool_execution_start", + toolName: "read", + toolCallId: "r2", + args: { path: "/home/yye/siclaw/skills/user/yye/my-skill/SKILL.md" }, + }); + emit({ + type: "tool_execution_end", + toolName: "read", + toolCallId: "r2", + result: { content: [{ type: "text", text: "..." }] }, + }); + emit({ + type: "tool_execution_start", + toolName: "read", + toolCallId: "r3", + args: { path: "/home/yye/siclaw/src/core/agent-factory.ts" }, // non-skill, should be ignored + }); + emit({ + type: "tool_execution_end", + toolName: "read", + toolCallId: "r3", + result: { content: [{ type: "text", text: "..." }] }, + }); + emit({ type: "agent_end" }); + + const t = readTraces()[0] as any; + expect(t.skillsUsed).toHaveLength(2); + expect(t.skillsUsed[0]).toEqual(expect.objectContaining({ + skillName: "cluster-events", scope: "core", via: "read", + })); + expect(t.skillsUsed[1]).toEqual(expect.objectContaining({ + skillName: "my-skill", scope: "user", via: "read", + })); + // Non-SKILL.md read should have no skill field. + const tcs = t.steps.filter((s: any) => s.kind === "tool_call"); + expect(tcs[2].skill).toBeUndefined(); + }); + + it("drops redundant toolResult and user role messages from steps", () => { + const rec = makeRecorder(); + const listeners: Array<(e: unknown) => void> = []; + const fakeBrain = { + brainType: "pi-agent" as const, + subscribe(fn: (e: unknown) => void) { listeners.push(fn); return () => {}; }, + } as any; + rec.attach(fakeBrain); + const emit = (e: unknown) => listeners.forEach((fn) => fn(e)); + + emit({ type: "message_end", message: { role: "user", content: "hi" } }); + emit({ type: "agent_start" }); + emit({ type: "message_end", message: { role: "toolResult", content: "duplicate of tool output" } }); + emit({ type: "message_end", message: { role: "assistant", content: [{ type: "text", text: "reply" }] } }); + emit({ type: "agent_end" }); + + const t = readTraces()[0] as any; + const messages = t.steps.filter((s: any) => s.kind === "message"); + expect(messages).toHaveLength(1); + expect(messages[0].role).toBe("assistant"); + }); + + it("enriches local_script scope when diagnostic event fires BEFORE tool_execution_end", () => { + const rec = makeRecorder(); + const listeners: Array<(e: unknown) => void> = []; + const fakeBrain = { + brainType: "pi-agent" as const, + subscribe(fn: (e: unknown) => void) { listeners.push(fn); return () => {}; }, + } as any; + rec.attach(fakeBrain); + const emit = (e: unknown) => listeners.forEach((fn) => fn(e)); + + // Simulate the real order observed in local-script.ts: + // diagnostic fires *inside* the tool's execute(), before the brain dispatches tool_execution_end. + emit({ type: "agent_start" }); + emit({ type: "tool_execution_start", toolName: "local_script", toolCallId: "tc1", + args: { skill: "volcano-diagnose-pod", script: "diagnose-pod.sh" } }); + emitDiagnostic({ + type: "skill_call", + skillName: "volcano-diagnose-pod", + scriptName: "diagnose-pod.sh", + scope: "personal", + outcome: "success", + durationMs: 123, + sessionId: "sess-1", + }); + emit({ type: "tool_execution_end", toolName: "local_script", toolCallId: "tc1", + result: { content: [{ type: "text", text: "done" }] } }); + emit({ type: "agent_end" }); + + const t = readTraces()[0] as any; + const toolCall = t.steps.find((s: any) => s.kind === "tool_call"); + expect(toolCall.skill.scope).toBe("personal"); + }); + + it("marks outcome as error when assistant stopReason=error", () => { + const rec = makeRecorder(); + const listeners: Array<(e: unknown) => void> = []; + const fakeBrain = { + brainType: "pi-agent" as const, + subscribe(fn: (e: unknown) => void) { + listeners.push(fn); + return () => {}; + }, + } as any; + rec.attach(fakeBrain); + const emit = (e: unknown) => listeners.forEach((fn) => fn(e)); + + emit({ type: "agent_start" }); + emit({ + type: "message_end", + message: { + role: "assistant", + stopReason: "error", + errorMessage: "rate limit", + content: [], + }, + }); + emit({ type: "agent_end" }); + + const t = readTraces()[0] as any; + expect(t.outcome).toBe("error"); + }); + + it("writes filename as trace--