Skip to content

Commit 8dcebaf

Browse files
jerryliang64claude
andauthored
feat(agent-tracing): collect assistant messages into root run outputs (#421)
## Summary - Automatically collect assistant text responses during message processing into `outputs.messages` - Aligns with LangGraph convention where root run `outputs` contains business data (messages) ## Changes - `core/agent-tracing/src/ClaudeAgentTracer.ts` - Add `outputMessages` array to `Trace` class to accumulate assistant text - Collect text from assistant messages in `handleAssistant` - Include `messages` in root run `outputs` in `handleResult` - `core/agent-tracing/test/ClaudeAgentTracer.test.ts` - New test: multi-turn assistant text collected into `outputs.messages` - New test: empty messages array when no assistant text ## Root Run outputs (before → after) ```diff outputs: { + messages: [{ role: 'assistant', content: '...' }, ...], result: '...', is_error: false, num_turns: 1, llmOutput: { promptTokens, completionTokens, ... }, } ``` ## Test plan - [x] All 58 tests pass (56 existing + 2 new) - [x] Verified locally in chair-sandbox-ai-use project 🤖 Generated with [Claude Code](https://claude.com/claude-code) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Assistant text content is now collected per trace and exposed in trace outputs as a messages array. * Mixed content blocks (text and tool-use) are preserved in the recorded messages. * **Tests** * Added tests verifying message collection for multi-part assistant outputs and the empty messages case when no assistant text appears. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5471bda commit 8dcebaf

File tree

2 files changed

+56
-0
lines changed

2 files changed

+56
-0
lines changed

core/agent-tracing/src/ClaudeAgentTracer.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class Trace {
3131
private startTime: number;
3232
private executionOrder = 2; // Start at 2, root is 1
3333
private pendingToolUses = new Map<string, Run>();
34+
private outputMessages: Array<{ role: string; content: ClaudeContentBlock[] }> = [];
3435
private tracer: ClaudeAgentTracer;
3536

3637
constructor(tracer: ClaudeAgentTracer, options?: CreateTraceOptions) {
@@ -87,6 +88,11 @@ class Trace {
8788
const hasToolUse = content.some(c => c.type === 'tool_use');
8889
const hasText = content.some(c => c.type === 'text');
8990

91+
// Collect assistant message for outputs.messages
92+
if (content.length > 0) {
93+
this.outputMessages.push({ role: 'assistant', content });
94+
}
95+
9096
if (hasToolUse) {
9197
const eventTime = Date.now();
9298
// Create LLM run that initiated tool calls
@@ -164,6 +170,7 @@ class Trace {
164170
// Update and log root run end
165171
this.rootRun.end_time = this.startTime + (message.duration_ms || 0);
166172
this.rootRun.outputs = {
173+
messages: this.outputMessages,
167174
result: message.result,
168175
is_error: message.is_error,
169176
num_turns: message.num_turns,

core/agent-tracing/test/ClaudeAgentTracer.test.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,55 @@ describe('test/ClaudeAgentTracer.test.ts', () => {
328328
});
329329
});
330330

331+
describe('Trace outputs.messages in root run', () => {
332+
it('should collect assistant text messages into outputs.messages', async () => {
333+
const { claudeTracer, capturedRuns } = createTestEnv();
334+
const trace = claudeTracer.createTrace();
335+
336+
const messages: SDKMessage[] = [
337+
createMockInit(),
338+
createMockAssistantWithTool(),
339+
createMockUserToolResult(),
340+
createMockAssistantTextOnly(),
341+
createMockResult(),
342+
];
343+
344+
for (const msg of messages) {
345+
await trace.processMessage(msg);
346+
}
347+
348+
const rootEnd = capturedRuns.find(e => !e.run.parent_run_id && e.status === RunStatus.END);
349+
assert(rootEnd, 'Should have root_run end');
350+
const outputMessages = (rootEnd.run.outputs as any)?.messages;
351+
assert(Array.isArray(outputMessages), 'outputs.messages should be an array');
352+
assert.strictEqual(outputMessages.length, 2);
353+
// First message has text + tool_use
354+
assert.strictEqual(outputMessages[0].role, 'assistant');
355+
assert.strictEqual(outputMessages[0].content.length, 2);
356+
assert.strictEqual(outputMessages[0].content[0].type, 'text');
357+
assert.strictEqual(outputMessages[0].content[0].text, 'Let me run that command for you.');
358+
assert.strictEqual(outputMessages[0].content[1].type, 'tool_use');
359+
assert.strictEqual(outputMessages[0].content[1].name, 'Bash');
360+
// Second message has text only
361+
assert.strictEqual(outputMessages[1].role, 'assistant');
362+
assert.deepStrictEqual(outputMessages[1].content, [{ type: 'text', text: 'The answer is 21.' }]);
363+
});
364+
365+
it('should have empty messages array when no assistant text', async () => {
366+
const { claudeTracer, capturedRuns } = createTestEnv();
367+
const trace = claudeTracer.createTrace();
368+
369+
await trace.processMessage(createMockInit());
370+
await trace.processMessage(createMockResult());
371+
372+
const rootEnd = capturedRuns.find(e => !e.run.parent_run_id && e.status === RunStatus.END);
373+
assert(rootEnd, 'Should have root_run end');
374+
const outputMessages = (rootEnd.run.outputs as any)?.messages;
375+
assert(Array.isArray(outputMessages), 'outputs.messages should be an array');
376+
assert.strictEqual(outputMessages.length, 0);
377+
});
378+
});
379+
331380
describe('Batch mode + text-only', () => {
332381
it('should trace a text-only response via processMessages', async () => {
333382
const { claudeTracer, capturedRuns } = createTestEnv();

0 commit comments

Comments
 (0)