Skip to content

Commit bb986b4

Browse files
authored
Minor simplifications (#98)
* add rationale to dedent.ts * simplify claude logging * simplify logging setup * clean lock file * add back system message types * upgrade clack
1 parent 2c3e4df commit bb986b4

File tree

13 files changed

+73
-253
lines changed

13 files changed

+73
-253
lines changed

eval/evals/100-flight-booking-plain/hooks.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import type { Hooks } from '../../types.ts';
2+
import { log } from '@clack/prompts';
23

34
const hooks: Hooks = {
4-
postPrepareExperiment: async (experimentArgs, log) => {
5+
postPrepareExperiment: async (experimentArgs) => {
56
// Custom logic to run after preparing the experiment
67
log.success(
78
`Post-prepare hook executed for experiment at ${experimentArgs.experimentPath}`,

eval/evals/110-flight-booking-reshaped/hooks.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ import * as path from 'node:path';
22
import * as fs from 'node:fs/promises';
33
import type { Hooks } from '../../types.ts';
44
import { addDependency } from 'nypm';
5+
import { log } from '@clack/prompts';
56

67
const hooks: Hooks = {
7-
postPrepareExperiment: async (experimentArgs, log) => {
8+
postPrepareExperiment: async (experimentArgs) => {
89
log.message('Installing the reshaped package');
910
await addDependency('reshaped@latest', {
1011
cwd: experimentArgs.projectPath,

eval/evals/120-flight-booking-radix/hooks.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import type { Hooks } from '../../types.ts';
22
import { addDependency } from 'nypm';
3+
import { log } from '@clack/prompts';
34

45
const hooks: Hooks = {
5-
postPrepareExperiment: async (experimentArgs, log) => {
6+
postPrepareExperiment: async (experimentArgs) => {
67
log.message('Installing the radix-ui package');
78
const options = {
89
cwd: experimentArgs.projectPath,

eval/evals/130-flight-booking-rsuite/hooks.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import type { Hooks } from '../../types.ts';
22
import { addDependency } from 'nypm';
3+
import { log } from '@clack/prompts';
34

45
const hooks: Hooks = {
5-
postPrepareExperiment: async (experimentArgs, log) => {
6+
postPrepareExperiment: async (experimentArgs) => {
67
log.message('Installing the rsuite package');
78
await addDependency('rsuite@latest', {
89
cwd: experimentArgs.projectPath,

eval/lib/agents/claude-code-cli.ts

Lines changed: 14 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import { x } from 'tinyexec';
22
import * as fs from 'node:fs/promises';
33
import * as path from 'node:path';
4-
import { styleText } from 'node:util';
54
import type { Agent } from '../../types.ts';
6-
import { spinner, taskLog } from '@clack/prompts';
5+
import { spinner } from '@clack/prompts';
76
import Tokenizer, { models, type Model } from 'ai-tokenizer';
87
import { runHook } from '../run-hook.ts';
98

@@ -136,98 +135,6 @@ type ClaudeCodeStreamMessage =
136135
| UserMessage
137136
| ResultMessage;
138137

139-
function formatMessageForLog(
140-
message: ClaudeCodeStreamMessage,
141-
projectPath: string,
142-
deltaSeconds?: number,
143-
): string {
144-
const timePrefix =
145-
deltaSeconds !== undefined ? `[+${deltaSeconds.toFixed(1)}s] ` : '';
146-
const tokenSuffix =
147-
message.tokenCount !== undefined ? ` (${message.tokenCount} tokens)` : '';
148-
switch (message.type) {
149-
case 'system': {
150-
const mcpInfo =
151-
message.mcp_servers.length > 0
152-
? styleText(
153-
['cyan'],
154-
message.mcp_servers
155-
.map((s) => `${s.name}:${s.status}`)
156-
.join(', '),
157-
)
158-
: 'None';
159-
return `${timePrefix}[INIT] Model: ${message.model}, Tools: ${message.tools.length}, MCPs: ${mcpInfo}${tokenSuffix}`;
160-
}
161-
case 'assistant': {
162-
const content = message.message.content;
163-
const textContent = content.find(
164-
(c): c is TextContent => c.type === 'text',
165-
);
166-
const toolUses = content.filter(
167-
(c): c is ToolUseContent => c.type === 'tool_use',
168-
);
169-
170-
if (toolUses.length > 0) {
171-
const todoWrite = toolUses.find((t) => t.name === 'TodoWrite');
172-
if (todoWrite && todoWrite.input.todos) {
173-
const lines = [];
174-
lines.push(`${timePrefix}[ASSISTANT] Todo List:`);
175-
for (const todo of todoWrite.input.todos) {
176-
let checkbox: string;
177-
switch (todo.status) {
178-
case 'completed':
179-
checkbox = '[x]';
180-
break;
181-
case 'in_progress':
182-
checkbox = '[~]';
183-
break;
184-
default:
185-
checkbox = '[ ]';
186-
}
187-
lines.push(` ${checkbox} ${todo.content}`);
188-
}
189-
return lines.join('\n');
190-
} else {
191-
const toolDescriptions = toolUses.map((t) => {
192-
const isMcpTool = t.name.startsWith('mcp__');
193-
let toolDesc: string;
194-
195-
if (
196-
(t.name === 'Read' || t.name === 'Write' || t.name === 'Edit') &&
197-
t.input.file_path
198-
) {
199-
const relPath = path.relative(projectPath, t.input.file_path);
200-
toolDesc = `${t.name}(./${relPath})`;
201-
} else if (t.name === 'Bash' && t.input.command) {
202-
const cmd =
203-
t.input.command.length > 50
204-
? t.input.command.slice(0, 50) + '...'
205-
: t.input.command;
206-
toolDesc = `Bash(${cmd})`;
207-
} else {
208-
toolDesc = t.name;
209-
}
210-
211-
return isMcpTool ? styleText('cyan', toolDesc) : toolDesc;
212-
});
213-
return `${timePrefix}[ASSISTANT] Tools: ${toolDescriptions.join(', ')}${tokenSuffix}`;
214-
}
215-
} else if (textContent) {
216-
const preview = textContent.text.slice(0, 80).replace(/\n/g, ' ');
217-
return `${timePrefix}[ASSISTANT] ${preview}${textContent.text.length > 80 ? '...' : ''}${tokenSuffix}`;
218-
}
219-
return `${timePrefix}[ASSISTANT] (no content)${tokenSuffix}`;
220-
}
221-
case 'user': {
222-
return `${timePrefix}[USER] Tool results: ${message.message.content.length}${tokenSuffix}`;
223-
}
224-
case 'result':
225-
return `${timePrefix}[RESULT] ${message.subtype.toUpperCase()} - ${message.num_turns} turns, ${(message.duration_ms / 1000).toFixed(1)}s, $${message.total_cost_usd.toFixed(4)}${tokenSuffix}`;
226-
default:
227-
return `${timePrefix}[UNKNOWN MESSAGE TYPE]${tokenSuffix}`;
228-
}
229-
}
230-
231138
interface TodoProgress {
232139
current: number;
233140
total: number;
@@ -327,23 +234,16 @@ function getTodoProgress(
327234

328235
export const claudeCodeCli: Agent = {
329236
async execute(prompt, experimentArgs, mcpServerConfig) {
330-
const { projectPath, resultsPath, verbose } = experimentArgs;
237+
const { projectPath, resultsPath } = experimentArgs;
331238
if (mcpServerConfig) {
332239
await fs.writeFile(
333240
path.join(projectPath, '.mcp.json'),
334241
JSON.stringify({ mcpServers: mcpServerConfig }, null, 2),
335242
);
336243
}
337-
const verboseLog = (verbose &&
338-
taskLog({
339-
title: `Executing prompt with Claude Code CLI`,
340-
retainLog: verbose,
341-
})) as ReturnType<typeof taskLog>;
342-
const normalLog = (!verbose && spinner()) as ReturnType<typeof spinner>;
343-
if (!verbose) {
344-
normalLog.start('Agent is working');
345-
}
346-
await runHook('pre-execute-agent', experimentArgs, verboseLog ?? normalLog);
244+
const log = spinner();
245+
log.start('Executing prompt with Claude Code CLI');
246+
await runHook('pre-execute-agent', experimentArgs);
347247

348248
const claudeEncoding = await import('ai-tokenizer/encoding/claude');
349249
const model = models['anthropic/claude-sonnet-4.5'];
@@ -386,29 +286,19 @@ export const claudeCodeCli: Agent = {
386286
parsed.tokenCount = tokenData.tokens;
387287
parsed.costUSD = tokenData.cost;
388288
messages.push(parsed);
389-
if (verbose) {
390-
verboseLog.message(
391-
formatMessageForLog(parsed, projectPath, deltaMs / 1000),
392-
);
393-
} else {
394-
const todoProgress = getTodoProgress(messages);
395-
let progressMessage = `Agent is working, turn ${messages.filter((m) => m.type === 'assistant').length}`;
396-
if (todoProgress) {
397-
progressMessage += `, todo ${todoProgress.current} / ${todoProgress.total}: ${todoProgress.currentTitle}`;
398-
}
399-
normalLog.message(progressMessage);
289+
290+
const todoProgress = getTodoProgress(messages);
291+
let progressMessage = `Agent is working, turn ${messages.filter((m) => m.type === 'assistant').length}`;
292+
if (todoProgress) {
293+
progressMessage += `, todo ${todoProgress.current} / ${todoProgress.total}: ${todoProgress.currentTitle}`;
400294
}
295+
log.message(progressMessage);
401296
}
402297
const resultMessage = messages.find(
403298
(m): m is ResultMessage => m.type === 'result',
404299
);
405300
if (!resultMessage) {
406-
const errorMessage = 'No result message received from Claude Code CLI';
407-
if (verbose) {
408-
verboseLog.error(errorMessage);
409-
} else {
410-
normalLog.stop(errorMessage);
411-
}
301+
log.error('No result message received from Claude Code CLI');
412302
process.exit(1);
413303
}
414304
await claudeProcess;
@@ -431,16 +321,8 @@ export const claudeCodeCli: Agent = {
431321
turns: resultMessage.num_turns,
432322
};
433323
const successMessage = `Agent completed in ${result.turns} turns, ${result.duration} seconds, $${result.cost}`;
434-
await runHook(
435-
'post-execute-agent',
436-
experimentArgs,
437-
verboseLog ?? normalLog,
438-
);
439-
if (verbose) {
440-
verboseLog.success(successMessage);
441-
} else {
442-
normalLog.stop(successMessage);
443-
}
324+
await runHook('post-execute-agent', experimentArgs);
325+
log.stop(successMessage);
444326

445327
return result;
446328
},

0 commit comments

Comments
 (0)