diff --git a/cli/README.md b/cli/README.md index e4a2e199..47f86ec2 100644 --- a/cli/README.md +++ b/cli/README.md @@ -345,6 +345,7 @@ ralphy --parallel --sandbox | Gemini | `gemini` | `--yolo` | tokens + cost | When an engine exits non-zero, ralphy includes the last lines of CLI output in the error message to make debugging easier. +Authentication and command-failure messages are parsed more strictly to reduce false positives and make retry behavior more predictable. ## Links diff --git a/cli/src/cli/commands/run.ts b/cli/src/cli/commands/run.ts index e9fc95b7..6482f168 100644 --- a/cli/src/cli/commands/run.ts +++ b/cli/src/cli/commands/run.ts @@ -40,19 +40,19 @@ export async function runLoop(options: RuntimeOptions): Promise { if (!existsSync(options.prdFile)) { logError(`${options.prdFile} not found in current directory`); logInfo(`Create a ${options.prdFile} file with tasks`); - process.exit(1); + throw new Error(`PRD source not found: ${options.prdFile}`); } } else if (options.prdSource === "markdown-folder") { if (!existsSync(options.prdFile)) { logError(`PRD folder ${options.prdFile} not found`); logInfo(`Create a ${options.prdFile}/ folder with markdown files containing tasks`); - process.exit(1); + throw new Error(`PRD folder not found: ${options.prdFile}`); } } if (options.prdSource === "github" && !options.githubRepo) { logError("GitHub repository not specified. Use --github owner/repo"); - process.exit(1); + throw new Error("GitHub repository not specified"); } // Check engine availability @@ -61,7 +61,7 @@ export async function runLoop(options: RuntimeOptions): Promise { if (!available) { logError(`${engine.name} CLI not found. Make sure '${engine.cliCommand}' is in your PATH.`); - process.exit(1); + throw new Error(`${engine.name} CLI not available`); } // Create task source with caching for better performance @@ -91,7 +91,7 @@ export async function runLoop(options: RuntimeOptions): Promise { logError("Cannot run in parallel/branch mode: repository has no commits yet."); logInfo("Please make an initial commit first:"); logInfo(' git add . && git commit -m "Initial commit"'); - process.exit(1); + throw new Error("Repository has no commits yet"); } } @@ -195,6 +195,6 @@ export async function runLoop(options: RuntimeOptions): Promise { } if (result.tasksFailed > 0) { - process.exit(1); + throw new Error(`${result.tasksFailed} task(s) failed`); } } diff --git a/cli/src/cli/commands/task.ts b/cli/src/cli/commands/task.ts index 312977e7..f34d633c 100644 --- a/cli/src/cli/commands/task.ts +++ b/cli/src/cli/commands/task.ts @@ -28,7 +28,7 @@ export async function runTask(task: string, options: RuntimeOptions): Promise { - try { - const checkCommand = isWindows ? "where" : "which"; - if (isBun) { - const proc = Bun.spawn([checkCommand, command], { - stdout: "pipe", - stderr: "pipe", - }); - const exitCode = await proc.exited; - return exitCode === 0; - } - // Node.js fallback - where/which don't need shell - const result = spawnSync(checkCommand, [command], { stdio: "pipe" }); - return result.status === 0; - } catch { - return false; +// Re-export functions from new modules for backward compatibility +export { + commandExists, + execCommand, + execCommandStreaming, + execCommandStreamingNew, +} from "./executor.ts"; +export { + checkForErrors, + detectStepFromOutput, + extractAuthenticationError, + extractTokenCounts, + formatCommandError, + parseStreamJsonResult, +} from "./parsers.ts"; +export { validateArgs, validateCommand, validateCommandAndArgs } from "./validation.ts"; + +const DEBUG = process.env.RALPHY_DEBUG === "true"; + +function debugLog(...args: unknown[]): void { + if (DEBUG || (globalThis as { verboseMode?: boolean }).verboseMode === true) { + logDebug(args.map((a) => String(a)).join(" ")); } } /** - * Execute a command and return stdout - * @param stdinContent - Optional content to pass via stdin (useful for multi-line prompts on Windows) + * Base AI Engine implementation */ -export async function execCommand( - command: string, - args: string[], - workDir: string, - env?: Record, - stdinContent?: string, -): Promise<{ stdout: string; stderr: string; exitCode: number }> { - if (isBun) { - // On Windows, run through cmd.exe to handle .cmd wrappers (npm global packages) - const spawnArgs = isWindows ? ["cmd.exe", "/c", command, ...args] : [command, ...args]; - const proc = Bun.spawn(spawnArgs, { - cwd: workDir, - stdin: stdinContent ? "pipe" : "ignore", - stdout: "pipe", - stderr: "pipe", - env: { ...process.env, ...env }, - }); - - // Write stdin content if provided - if (stdinContent && proc.stdin) { - proc.stdin.write(stdinContent); - proc.stdin.end(); - } - - const [stdout, stderr, exitCode] = await Promise.all([ - new Response(proc.stdout).text(), - new Response(proc.stderr).text(), - proc.exited, - ]); +export abstract class BaseAIEngine implements AIEngine { + abstract name: string; + abstract cliCommand: string; - return { stdout, stderr, exitCode }; + /** + * Check if the CLI command is available + */ + async isAvailable(): Promise { + debugLog(`isAvailable: Checking if '${this.cliCommand}' (${this.name}) is available...`); + const result = await commandExists(this.cliCommand); + debugLog(`isAvailable: '${this.cliCommand}' (${this.name}) available = ${result}`); + return result; } - // Node.js fallback - use shell on Windows to execute .cmd wrappers - return new Promise((resolve) => { - const proc = spawn(command, args, { - cwd: workDir, - env: { ...process.env, ...env }, - stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"], - shell: isWindows, // Required on Windows for npm global commands (.cmd wrappers) - }); - - // Write stdin content if provided - if (stdinContent && proc.stdin) { - proc.stdin.write(stdinContent); - proc.stdin.end(); - } - - let stdout = ""; - let stderr = ""; - - proc.stdout?.on("data", (data) => { - stdout += data.toString(); - }); - - proc.stderr?.on("data", (data) => { - stderr += data.toString(); - }); - - proc.on("close", (exitCode) => { - resolve({ stdout, stderr, exitCode: exitCode ?? 1 }); - }); + /** + * Build CLI arguments for engine + */ + protected abstract buildArgs(prompt: string, workDir: string, options?: EngineOptions): string[]; - proc.on("error", (err) => { - // Maintain backward compatibility - don't reject, include error in stderr - stderr += `\nSpawn error: ${err.message}`; - resolve({ stdout, stderr, exitCode: 1 }); - }); - }); -} + /** + * Process CLI output into AIResult + */ + protected abstract processCliResult( + stdout: string, + stderr: string, + exitCode: number, + workDir: string, + ): AIResult; -/** - * Parse token counts from stream-json output (Claude/Qwen format) - */ -export function parseStreamJsonResult(output: string): { - response: string; - inputTokens: number; - outputTokens: number; -} { - const lines = output.split("\n").filter(Boolean); - let response = ""; - let inputTokens = 0; - let outputTokens = 0; - - for (const line of lines) { - try { - const parsed = JSON.parse(line); - if (parsed.type === "result") { - response = parsed.result || "Task completed"; - inputTokens = parsed.usage?.input_tokens || 0; - outputTokens = parsed.usage?.output_tokens || 0; - } - } catch { - // Ignore non-JSON lines - } + /** + * Get environment variables for engine + */ + protected getEnv(options?: EngineOptions): Record | undefined { + return options?.env; } - return { response: response || "Task completed", inputTokens, outputTokens }; -} - -/** - * Check for errors in stream-json output - */ -export function checkForErrors(output: string): string | null { - const lines = output.split("\n").filter(Boolean); - - for (const line of lines) { - try { - const parsed = JSON.parse(line); - if (parsed.type === "error") { - return parsed.error?.message || parsed.message || "Unknown error"; - } - } catch { - // Ignore non-JSON lines - } + /** + * Whether prompt should be passed through stdin. + * Engines that pass prompts via temp files should override this to false. + */ + protected useStdin(): boolean { + return true; } - return null; -} - -/** - * Extract authentication error message from stream-json output. - * Looks for error type messages or result messages with authentication-related keywords. - * Returns the clean error message if found, null otherwise. - */ -export function extractAuthenticationError(output: string): string | null { - const lines = output.split("\n").filter(Boolean); + /** + * Build args array with stdin handling + * Prompts are passed via stdin to avoid shell escaping issues and ensure + * cross-platform compatibility (Windows, Linux, macOS) + */ + protected buildArgsWithStdin( + baseArgs: string[], + prompt: string, + ): { args: string[]; stdinContent?: string } { + // Always use stdin for prompts - this is the most reliable cross-platform approach + // It avoids shell escaping issues and command line length limits on all platforms + return { args: baseArgs, stdinContent: prompt }; + } - for (const line of lines) { - try { - const parsed = JSON.parse(line); - - // Check if this is any kind of error response - if ( - parsed.type === "error" || - parsed.is_error === true || - parsed.error === "authentication_failed" - ) { - // Extract message from content array (assistant type) or standard fields - let message = ""; - const content = parsed.message?.content; - if (Array.isArray(content)) { - const textItem = content.find( - (item: { type?: string; text?: string }) => item.type === "text" && item.text, - ); - if (textItem) message = textItem.text; - } - if (!message) { - message = parsed.result || parsed.error?.message || parsed.message || ""; - } + /** + * Execute with streaming progress updates (optional implementation) + */ + async executeStreaming( + prompt: string, + workDir: string, + onProgress: ProgressCallback, + options?: EngineOptions, + ): Promise { + if (options?.dryRun) { + onProgress("Skipped (dry run)"); + return { success: true, response: "(dry run) Skipped", inputTokens: 0, outputTokens: 0 }; + } - if (message && isAuthenticationMessage(message.toLowerCase())) { - return message; + const args = this.buildArgs(prompt, workDir, options); + const env = this.getEnv(options); + + // Always use stdin for prompts - most reliable cross-platform approach + const stdinContent = this.useStdin() ? prompt : undefined; + + debugLog(`Starting ${this.name} engine with ${this.cliCommand}`); + debugLog(`WorkDir: ${workDir}`); + debugLog(`Args: ${args.join(" ")}`); + + const timeout = Number.parseInt( + process.env.RALPHY_EXECUTION_TIMEOUT || String(DEFAULT_AI_ENGINE_TIMEOUT_MS), + 10, + ); + debugLog(`Timeout set to: ${Math.floor(timeout / 1000)}s`); + + let timedOut = false; + let childProcess: import("./types.ts").ChildProcess | null = null; + const timeoutId = setTimeout(() => { + timedOut = true; + onProgress( + `[Warning: Process taking longer than ${Math.floor(timeout / 1000 / 60)} minutes...]`, + ); + debugLog(`Timeout reached after ${timeout}ms`); + + // BUG FIX: Check childProcess exists before attempting to kill + // This prevents errors when timeout fires before process is assigned (fallback case) + if (childProcess && typeof childProcess.kill === "function") { + debugLog("Killing child process due to timeout"); + try { + childProcess.kill(); + } catch (killErr) { + debugLog(`Failed to kill child process: ${killErr}`); } } - } catch { - // Ignore non-JSON lines - } - } + }, timeout); - return null; -} - -/** - * Check if a message contains authentication-related keywords - */ -function isAuthenticationMessage(messageLower: string): boolean { - return ( - messageLower.includes("invalid api key") || - messageLower.includes("authentication") || - messageLower.includes("not authenticated") || - messageLower.includes("unauthorized") || - messageLower.includes("/login") - ); -} - -/** - * Format a command failure with useful output context. - * If the output contains an authentication error, returns just that error message. - * Otherwise returns the full error with output context. - */ -export function formatCommandError(exitCode: number, output: string): string { - const trimmed = output.trim(); - if (!trimmed) { - return `Command failed with exit code ${exitCode}`; - } - - // Check for authentication errors first - return the clean message if found - const authError = extractAuthenticationError(output); - if (authError) { - return authError; - } + try { + const result = await execCommandStreamingNew( + this.cliCommand, + args, + workDir, + env, + stdinContent, + ); + + childProcess = result.process; + let stdout = ""; + let stderr = ""; + let exitCode = 0; + + if (result.stdout?.getReader && result.stderr?.getReader) { + const stdoutReader = result.stdout.getReader(); + const stderrReader = result.stderr.getReader(); + + const readStdout = async () => { + try { + while (true) { + const { done, value } = await stdoutReader.read(); + if (done) break; + const chunk = new TextDecoder().decode(value); + stdout += chunk; + + const lines = chunk.split("\n"); + for (const line of lines) { + if (line.trim()) { + const step = this.parseProgressLine(line, options?.logThoughts); + if (step) { + onProgress(step); + } + } + } + } + } catch (err) { + debugLog(`Error reading stdout: ${err}`); + } + }; + + const readStderr = async () => { + try { + while (true) { + const { done, value } = await stderrReader.read(); + if (done) break; + const chunk = new TextDecoder().decode(value); + stderr += chunk; + } + } catch (err) { + debugLog(`Error reading stderr: ${err}`); + } + }; + + const exitedPromise = childProcess?.exited + ? childProcess.exited + : new Promise((resolve) => { + const nodeProcess = childProcess as unknown as ChildProcess; + nodeProcess.once("close", (code) => resolve(code ?? 1)); + nodeProcess.once("error", () => resolve(1)); + }); + + const [resolvedExitCode] = await Promise.all([exitedPromise, readStdout(), readStderr()]); + exitCode = resolvedExitCode ?? 1; + } else { + // BUG FIX: Use stdinContent instead of undefined 'needsStdin' variable + const result = await execCommand(this.cliCommand, args, workDir, env, stdinContent); + stdout = result.stdout; + stderr = result.stderr; + exitCode = result.exitCode; + } - const lines = trimmed.split("\n").filter(Boolean); - const snippet = lines.slice(-12).join("\n"); - return `Command failed with exit code ${exitCode}. Output:\n${snippet}`; -} + clearTimeout(timeoutId); -/** - * Read a stream line by line, calling onLine for each non-empty line - */ -async function readStream( - stream: ReadableStream, - onLine: (line: string) => void, -): Promise { - const reader = stream.getReader(); - const decoder = new TextDecoder(); - let buffer = ""; - try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; - buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split("\n"); - buffer = lines.pop() || ""; - for (const line of lines) { - if (line.trim()) onLine(line); + if (timedOut) { + return { + success: false, + response: "", + inputTokens: 0, + outputTokens: 0, + error: `Execution timed out after ${Math.floor(timeout / 1000 / 60)} minutes`, + }; } - } - if (buffer.trim()) onLine(buffer); - } finally { - reader.releaseLock(); - } -} -/** - * Execute a command with streaming output, calling onLine for each line - * @param stdinContent - Optional content to pass via stdin (useful for multi-line prompts on Windows) - */ -export async function execCommandStreaming( - command: string, - args: string[], - workDir: string, - onLine: (line: string) => void, - env?: Record, - stdinContent?: string, -): Promise<{ exitCode: number }> { - if (isBun) { - // On Windows, run through cmd.exe to handle .cmd wrappers (npm global packages) - const spawnArgs = isWindows ? ["cmd.exe", "/c", command, ...args] : [command, ...args]; - const proc = Bun.spawn(spawnArgs, { - cwd: workDir, - stdin: stdinContent ? "pipe" : "ignore", - stdout: "pipe", - stderr: "pipe", - env: { ...process.env, ...env }, - }); - - // Write stdin content if provided - if (stdinContent && proc.stdin) { - proc.stdin.write(stdinContent); - proc.stdin.end(); + return this.processCliResult(stdout, stderr, exitCode, workDir); + } catch (error) { + clearTimeout(timeoutId); + debugLog(`Error in executeStreaming: ${error}`); + return { + success: false, + response: "", + inputTokens: 0, + outputTokens: 0, + error: error instanceof Error ? error.message : String(error), + }; } - - // Process both stdout and stderr in parallel - await Promise.all([readStream(proc.stdout, onLine), readStream(proc.stderr, onLine)]); - - const exitCode = await proc.exited; - return { exitCode }; } - // Node.js fallback - use shell on Windows to execute .cmd wrappers - return new Promise((resolve) => { - const proc = spawn(command, args, { - cwd: workDir, - env: { ...process.env, ...env }, - stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"], - shell: isWindows, // Required on Windows for npm global commands (.cmd wrappers) - }); - - // Write stdin content if provided - if (stdinContent && proc.stdin) { - proc.stdin.write(stdinContent); - proc.stdin.end(); + /** + * Execute the AI engine (non-streaming) + */ + async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + if (options?.dryRun) { + return { success: true, response: "(dry run) Skipped", inputTokens: 0, outputTokens: 0 }; } - let stdoutBuffer = ""; - let stderrBuffer = ""; - - const processBuffer = (buffer: string, isStderr = false) => { - const lines = buffer.split("\n"); - const remaining = lines.pop() || ""; - for (const line of lines) { - if (line.trim()) onLine(line); - } - return remaining; - }; - - proc.stdout?.on("data", (data) => { - stdoutBuffer += data.toString(); - stdoutBuffer = processBuffer(stdoutBuffer); - }); - - proc.stderr?.on("data", (data) => { - stderrBuffer += data.toString(); - stderrBuffer = processBuffer(stderrBuffer, true); - }); - - proc.on("close", (exitCode) => { - // Process any remaining data - if (stdoutBuffer.trim()) onLine(stdoutBuffer); - if (stderrBuffer.trim()) onLine(stderrBuffer); - resolve({ exitCode: exitCode ?? 1 }); - }); - - proc.on("error", (err) => { - // Maintain backward compatibility - don't reject, report error via onLine - onLine(`Spawn error: ${err.message}`); - resolve({ exitCode: 1 }); - }); - }); -} - -/** - * Check if a file path looks like a test file - */ -function isTestFile(filePath: string): boolean { - const lower = filePath.toLowerCase(); - return ( - lower.includes(".test.") || - lower.includes(".spec.") || - lower.includes("__tests__") || - lower.includes("_test.go") - ); -} - -/** - * Detect the current step from a JSON output line - * Returns step name like "Reading code", "Implementing", etc. - */ -export function detectStepFromOutput(line: string): string | null { - // Fast path: skip non-JSON lines - const trimmed = line.trim(); - if (!trimmed.startsWith("{")) { - return null; - } - - try { - const parsed = JSON.parse(trimmed); - - // Extract specific fields for pattern matching (avoid stringifying entire object) - const toolName = - parsed.tool?.toLowerCase() || - parsed.name?.toLowerCase() || - parsed.tool_name?.toLowerCase() || - ""; - const command = parsed.command?.toLowerCase() || ""; - const filePath = (parsed.file_path || parsed.filePath || parsed.path || "").toLowerCase(); - const description = (parsed.description || "").toLowerCase(); - - // Check tool name first to determine operation type - const isReadOperation = toolName === "read" || toolName === "glob" || toolName === "grep"; - const isWriteOperation = toolName === "write" || toolName === "edit"; - - // Reading code - check this early to avoid misclassifying reads of test files - if (isReadOperation) { - return "Reading code"; - } + const args = this.buildArgs(prompt, workDir, options); + const env = this.getEnv(options); - // Git commit - if (command.includes("git commit") || description.includes("git commit")) { - return "Committing"; - } + // Always use stdin for prompts - most reliable cross-platform approach + const stdinContent = this.useStdin() ? prompt : undefined; - // Git add/staging - if (command.includes("git add") || description.includes("git add")) { - return "Staging"; - } + debugLog(`Starting ${this.name} engine (non-streaming)`); + debugLog(`WorkDir: ${workDir}`); + debugLog(`Args: ${args.join(" ")}`); - // Linting - check command for lint tools - if ( - command.includes("lint") || - command.includes("eslint") || - command.includes("biome") || - command.includes("prettier") - ) { - return "Linting"; + try { + const result = await execCommand(this.cliCommand, args, workDir, env, stdinContent); + + return this.processCliResult(result.stdout, result.stderr, result.exitCode, workDir); + } catch (error) { + debugLog(`Error in execute: ${error}`); + return { + success: false, + response: "", + inputTokens: 0, + outputTokens: 0, + error: error instanceof Error ? error.message : String(error), + }; } + } - // Testing - check command for test runners - if ( - command.includes("vitest") || - command.includes("jest") || - command.includes("bun test") || - command.includes("npm test") || - command.includes("pytest") || - command.includes("go test") - ) { - return "Testing"; + /** + * Parse a line of output to extract progress information + */ + protected parseProgressLine(line: string, logThoughts?: boolean): string | null { + if (line.trim().startsWith("{")) { + try { + const parsed = parseJsonLine(line); + if (parsed) { + if (ErrorSchema.safeParse(parsed.event).success) { + return null; + } + + const event = parsed.event as Record; + if (event.type === "text" && event.part && typeof event.part === "object") { + const part = event.part as { text?: string }; + if (part.text) { + const step = detectStepFromOutput(part.text, logThoughts); + if (step) return step; + } + } + } + } catch { + // Not valid JSON, continue to plain text parsing + } } - // Writing tests - only for write operations to test files - if (isWriteOperation && isTestFile(filePath)) { - return "Writing tests"; - } + const step = detectStepFromOutput(line, logThoughts); + if (step) return step; - // Writing/Editing code - if (isWriteOperation) { - return "Implementing"; + const parsed = parseAIStep(line); + if (parsed && !line.includes("[ERROR")) { + return formatParsedStep(parsed); } return null; - } catch { - return null; - } -} - -/** - * Base implementation for AI engines - */ -export abstract class BaseAIEngine implements AIEngine { - abstract name: string; - abstract cliCommand: string; - - async isAvailable(): Promise { - return commandExists(this.cliCommand); } - - abstract execute(prompt: string, workDir: string, options?: EngineOptions): Promise; - - /** - * Execute with streaming progress updates (optional implementation) - */ - executeStreaming?( - prompt: string, - workDir: string, - onProgress: ProgressCallback, - options?: EngineOptions, - ): Promise; } diff --git a/cli/src/engines/claude.ts b/cli/src/engines/claude.ts index cba08f28..ed686c2c 100644 --- a/cli/src/engines/claude.ts +++ b/cli/src/engines/claude.ts @@ -1,15 +1,6 @@ -import { - BaseAIEngine, - checkForErrors, - detectStepFromOutput, - execCommand, - execCommandStreaming, - formatCommandError, - parseStreamJsonResult, -} from "./base.ts"; -import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts"; - -const isWindows = process.platform === "win32"; +import { BaseAIEngine, checkForErrors } from "./base.ts"; +import { createErrorResult, createSuccessResult, parseStreamJsonResult } from "./parsers.ts"; +import type { AIResult, EngineOptions } from "./types.ts"; /** * Claude Code AI Engine @@ -18,147 +9,33 @@ export class ClaudeEngine extends BaseAIEngine { name = "Claude Code"; cliCommand = "claude"; - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] { const args = ["--dangerously-skip-permissions", "--verbose", "--output-format", "stream-json"]; if (options?.modelOverride) { args.push("--model", options.modelOverride); } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { + if (options?.engineArgs) { args.push(...options.engineArgs); } - - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - // On other platforms, pass as argument for compatibility - let stdinContent: string | undefined; - if (isWindows) { - args.push("-p"); // Enable print mode, prompt comes from stdin - stdinContent = prompt; - } else { - args.push("-p", prompt); - } - - const { stdout, stderr, exitCode } = await execCommand( - this.cliCommand, - args, - workDir, - undefined, - stdinContent, - ); - - const output = stdout + stderr; - - // Check for errors - const error = checkForErrors(output); - if (error) { - return { - success: false, - response: "", - inputTokens: 0, - outputTokens: 0, - error, - }; - } - - // Parse result - const { response, inputTokens, outputTokens } = parseStreamJsonResult(output); - - // If command failed with non-zero exit code, provide a meaningful error - if (exitCode !== 0) { - return { - success: false, - response, - inputTokens, - outputTokens, - error: formatCommandError(exitCode, output), - }; - } - - return { - success: true, - response, - inputTokens, - outputTokens, - }; + // Note: The prompt is passed via stdin by the base engine for cross-platform compatibility + // The -p flag tells Claude to read from stdin + args.push("-p"); + return args; } - async executeStreaming( - prompt: string, - workDir: string, - onProgress: ProgressCallback, - options?: EngineOptions, - ): Promise { - const args = ["--dangerously-skip-permissions", "--verbose", "--output-format", "stream-json"]; - if (options?.modelOverride) { - args.push("--model", options.modelOverride); - } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { - args.push(...options.engineArgs); - } - - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - // On other platforms, pass as argument for compatibility - let stdinContent: string | undefined; - if (isWindows) { - args.push("-p"); // Enable print mode, prompt comes from stdin - stdinContent = prompt; - } else { - args.push("-p", prompt); - } - - const outputLines: string[] = []; - - const { exitCode } = await execCommandStreaming( - this.cliCommand, - args, - workDir, - (line) => { - outputLines.push(line); - - // Detect and report step changes - const step = detectStepFromOutput(line); - if (step) { - onProgress(step); - } - }, - undefined, - stdinContent, - ); - - const output = outputLines.join("\n"); - - // Check for errors + protected processCliResult(stdout: string, stderr: string, exitCode: number): AIResult { + const output = stdout + stderr; const error = checkForErrors(output); if (error) { - return { - success: false, - response: "", - inputTokens: 0, - outputTokens: 0, - error, - }; + return { success: false, response: "", inputTokens: 0, outputTokens: 0, error }; } - // Parse result const { response, inputTokens, outputTokens } = parseStreamJsonResult(output); - // If command failed with non-zero exit code, provide a meaningful error if (exitCode !== 0) { - return { - success: false, - response, - inputTokens, - outputTokens, - error: formatCommandError(exitCode, output), - }; + return createErrorResult(exitCode, output, response, inputTokens, outputTokens); } - return { - success: true, - response, - inputTokens, - outputTokens, - }; + return createSuccessResult(response, inputTokens, outputTokens); } } diff --git a/cli/src/engines/codex.ts b/cli/src/engines/codex.ts index 9674ea0f..5dc677d7 100644 --- a/cli/src/engines/codex.ts +++ b/cli/src/engines/codex.ts @@ -1,9 +1,8 @@ -import { existsSync, readFileSync, rmSync, unlinkSync } from "node:fs"; +import { randomUUID } from "node:crypto"; +import { existsSync, readFileSync, unlinkSync } from "node:fs"; import { join } from "node:path"; import { BaseAIEngine, execCommand, formatCommandError } from "./base.ts"; -import type { AIResult, EngineOptions } from "./types.ts"; - -const isWindows = process.platform === "win32"; +import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts"; /** * Codex AI Engine @@ -12,33 +11,47 @@ export class CodexEngine extends BaseAIEngine { name = "Codex"; cliCommand = "codex"; - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + protected useStdin(): boolean { + return false; + } + + private buildArgsInternal( + prompt: string, + workDir: string, + options?: EngineOptions, + ): { args: string[]; stdinContent?: string; lastMessageFile: string } { // Codex uses a separate file for the last message - const lastMessageFile = join(workDir, `.codex-last-message-${Date.now()}-${process.pid}.txt`); + const lastMessageFile = join( + workDir, + `.codex-last-message-${Date.now()}-${process.pid}-${randomUUID()}.txt`, + ); - try { - const args = ["exec", "--full-auto", "--json", "--output-last-message", lastMessageFile]; - if (options?.modelOverride) { - args.push("--model", options.modelOverride); - } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { - args.push(...options.engineArgs); - } + const baseArgs = ["exec", "--full-auto", "--json", "--output-last-message", lastMessageFile]; + if (options?.modelOverride) { + baseArgs.push("--model", options.modelOverride); + } + // Add any additional engine-specific arguments + if (options?.engineArgs && options.engineArgs.length > 0) { + baseArgs.push(...options.engineArgs); + } - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - let stdinContent: string | undefined; - if (isWindows) { - stdinContent = prompt; - } else { - args.push(prompt); - } + const { args, stdinContent } = this.buildArgsWithStdin(baseArgs, prompt); + return { args, stdinContent, lastMessageFile }; + } + + async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + const { args, stdinContent, lastMessageFile } = this.buildArgsInternal( + prompt, + workDir, + options, + ); + try { const { stdout, stderr, exitCode } = await execCommand( this.cliCommand, args, workDir, - undefined, + this.getEnv(options), stdinContent, ); @@ -98,4 +111,53 @@ export class CodexEngine extends BaseAIEngine { } } } + + protected buildArgs(prompt: string, workDir: string, options?: EngineOptions): string[] { + const { args } = this.buildArgsInternal(prompt, workDir, options); + return args; + } + + protected processCliResult( + stdout: string, + stderr: string, + exitCode: number, + _workDir: string, + ): AIResult { + const output = stdout + stderr; + + if (output.includes('"type":"error"')) { + const errorMatch = output.match(/"message":"([^"]+)"/); + return { + success: false, + response: "", + inputTokens: 0, + outputTokens: 0, + error: errorMatch?.[1] || "Unknown error", + }; + } + + if (exitCode !== 0) { + return { + success: false, + response: "Task completed", + inputTokens: 0, + outputTokens: 0, + error: formatCommandError(exitCode, output), + }; + } + + return { success: true, response: "Task completed", inputTokens: 0, outputTokens: 0 }; + } + + async executeStreaming( + prompt: string, + workDir: string, + onProgress: ProgressCallback, + options?: EngineOptions, + ): Promise { + onProgress("Running Codex"); + const result = await this.execute(prompt, workDir, options); + onProgress(result.success ? "Completed" : "Failed"); + return result; + } } diff --git a/cli/src/engines/copilot.test.ts b/cli/src/engines/copilot.test.ts index 67e5ff0a..90a6e645 100644 --- a/cli/src/engines/copilot.test.ts +++ b/cli/src/engines/copilot.test.ts @@ -515,7 +515,7 @@ o1-preview 1.5m in, 0.5m out, 0.1m cached`, expect(result.success).toBe(false); expect(result.error).toContain("not authenticated"); - expect(result.error).toContain("/login"); + expect(result.error).toContain("gh auth login"); spy.mockRestore(); }); diff --git a/cli/src/engines/copilot.ts b/cli/src/engines/copilot.ts index 25d13962..638d50fc 100644 --- a/cli/src/engines/copilot.ts +++ b/cli/src/engines/copilot.ts @@ -1,92 +1,48 @@ import { randomUUID } from "node:crypto"; -import { mkdirSync, unlinkSync, writeFileSync } from "node:fs"; +import { existsSync, mkdirSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { logDebug } from "../ui/logger.ts"; -import { BaseAIEngine, checkForErrors, execCommand, formatCommandError } from "./base.ts"; -import type { AIResult, EngineOptions } from "./types.ts"; - -/** Directory for temporary prompt files */ -const TEMP_DIR = join(tmpdir(), "ralphy-copilot"); +import { BaseAIEngine, checkForErrors, execCommand, execCommandStreaming } from "./base.ts"; +import { detectStepFromOutput, formatCommandError } from "./parsers.ts"; +import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts"; /** * GitHub Copilot CLI AI Engine - * - * Note: executeStreaming is intentionally not implemented for Copilot - * because the streaming function can hang on Windows due to how - * Bun handles cmd.exe stream completion. The non-streaming execute() - * method works reliably. - * - * Note: All engine output is captured internally for parsing and not displayed - * to the end user. This is by design - the spinner shows step progress while - * the actual CLI output is processed silently. - * - * Note: Prompts are passed via temporary files to preserve markdown formatting. - * The -p parameter accepts a file path, which avoids shell escaping issues and - * maintains the full structure of markdown (newlines, code blocks, etc.) that - * would be lost if passed as a command line string. */ export class CopilotEngine extends BaseAIEngine { name = "GitHub Copilot"; cliCommand = "copilot"; + private tempDir = join(tmpdir(), "ralphy-copilot"); /** - * Create a temporary file containing the prompt. - * Uses a unique filename to support parallel execution. - * @returns The path to the temporary prompt file + * Build command arguments for Copilot CLI + * Returns args array and optional stdin content for Windows */ - private createPromptFile(prompt: string): string { - // Ensure temp directory exists - wrapped in try-catch to handle - // potential race conditions when multiple processes create it simultaneously - try { - mkdirSync(TEMP_DIR, { recursive: true }); - } catch (err) { - // EEXIST is expected if another process created the directory first - if ((err as NodeJS.ErrnoException).code !== "EEXIST") { - throw err; - } - } - - // Generate unique filename using UUID for parallel safety - const filename = `prompt-${randomUUID()}.md`; - const filepath = join(TEMP_DIR, filename); - - // Write prompt to file preserving all formatting - writeFileSync(filepath, prompt, "utf-8"); - logDebug(`[Copilot] Created prompt file: ${filepath}`); - - return filepath; + protected buildArgs(prompt: string, _workDir: string, options?: EngineOptions): string[] { + const { args } = this.buildArgsInternal(prompt, options); + return args; } - /** - * Clean up a temporary prompt file - */ - private cleanupPromptFile(filepath: string): void { - try { - unlinkSync(filepath); - logDebug(`[Copilot] Cleaned up prompt file: ${filepath}`); - } catch (err) { - // Ignore cleanup errors - file may already be deleted - logDebug(`[Copilot] Failed to cleanup prompt file: ${filepath}`); - } + protected useStdin(): boolean { + return false; } - /** - * Build command arguments for Copilot CLI - * @param promptFilePath Path to the temporary file containing the prompt - */ - private buildArgs(promptFilePath: string, options?: EngineOptions): { args: string[] } { + private buildArgsInternal( + prompt: string, + options?: EngineOptions, + ): { args: string[]; stdinContent?: string } { const args: string[] = []; - // Use --yolo for non-interactive mode (allows all tools and paths) + // Add --yolo flag for non-interactive mode args.push("--yolo"); - // Pass prompt file path (Copilot CLI accepts file paths for -p) - // NOTE: This is an undocumented feature of Copilot CLI but works reliably - // since copilot is smart enough to detect file paths and read the content. - // Do NOT quote the path - arguments are passed directly without shell interpretation - // on non-Windows platforms, so quotes would become literal characters in the path. - args.push("-p", promptFilePath); + // Copilot uses -p flag for prompt file + args.push("-p"); + + // Create temp file with prompt content + const tempFile = this.createTempFile(prompt); + args.push(tempFile); if (options?.modelOverride) { args.push("--model", options.modelOverride); @@ -98,66 +54,121 @@ export class CopilotEngine extends BaseAIEngine { return { args }; } - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { - // Create temporary prompt file to preserve markdown formatting - const promptFilePath = this.createPromptFile(prompt); + private createTempFile(prompt: string): string { + this.cleanupOldTempFiles(); + + // Ensure temp directory exists + if (!existsSync(this.tempDir)) { + mkdirSync(this.tempDir, { recursive: true }); + } + + // Create unique filename + const uuid = randomUUID(); + const tempFile = join(this.tempDir, `prompt-${uuid}.md`); + + // Write prompt to file + writeFileSync(tempFile, prompt, "utf-8"); + + return tempFile; + } + + private cleanupTempFile(filePath: string | undefined): void { + if (!filePath) return; + try { + if (existsSync(filePath)) { + rmSync(filePath); + } + } catch (err) { + logDebug(`Failed to cleanup temp file: ${err}`); + } + } + private getAuthenticationError(output: string): string | null { + const firstLine = output.split("\n")[0]?.trim().toLowerCase() || ""; + if (firstLine.startsWith("not authenticated") || firstLine.startsWith("no authentication")) { + return "GitHub Copilot is not authenticated. Please run `gh auth login` or check your Copilot subscription."; + } + return null; + } + + /** + * Cleanup temp files older than 1 hour to prevent disk space exhaustion + */ + private cleanupOldTempFiles(): void { try { - const { args } = this.buildArgs(promptFilePath, options); + if (!existsSync(this.tempDir)) return; + const files = readdirSync(this.tempDir); + const oneHourAgo = Date.now() - 60 * 60 * 1000; + for (const file of files) { + const filePath = join(this.tempDir, file); + try { + const stats = statSync(filePath); + if (stats.mtimeMs < oneHourAgo) { + rmSync(filePath); + } + } catch { + // File may have been deleted, skip + } + } + } catch (err) { + logDebug(`Failed to cleanup old temp files: ${err}`); + } + } - // Debug logging - logDebug(`[Copilot] Working directory: ${workDir}`); - logDebug(`[Copilot] Prompt length: ${prompt.length} chars`); - logDebug(`[Copilot] Prompt preview: ${prompt.substring(0, 200)}...`); - logDebug(`[Copilot] Prompt file: ${promptFilePath}`); - logDebug(`[Copilot] Command: ${this.cliCommand} ${args.join(" ")}`); + async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + let tempFile: string | undefined; - const startTime = Date.now(); - const { stdout, stderr, exitCode } = await execCommand(this.cliCommand, args, workDir); + const startTime = Date.now(); + try { + const { args } = this.buildArgsInternal(prompt, options); + const pIndex = args.indexOf("-p"); + tempFile = pIndex >= 0 && pIndex < args.length - 1 ? args[pIndex + 1] : undefined; + + const { stdout, stderr, exitCode } = await execCommand( + this.cliCommand, + args, + workDir, + this.getEnv(options), + ); const durationMs = Date.now() - startTime; const output = stdout + stderr; - // Debug logging - logDebug(`[Copilot] Exit code: ${exitCode}`); - logDebug(`[Copilot] Duration: ${durationMs}ms`); - logDebug(`[Copilot] Output length: ${output.length} chars`); - logDebug(`[Copilot] Output preview: ${output.substring(0, 500)}...`); - - // Check for JSON errors (from base) - const jsonError = checkForErrors(output); - if (jsonError) { + // Check for authentication errors first (check first line only) + const authError = this.getAuthenticationError(output); + if (authError) { return { success: false, response: "", inputTokens: 0, outputTokens: 0, - error: jsonError, + error: authError, }; } - // Check for Copilot-specific errors (plain text) - const copilotError = this.checkCopilotErrors(output); - if (copilotError) { + // Check for errors + const error = checkForErrors(output); + if (error) { return { success: false, response: "", inputTokens: 0, outputTokens: 0, - error: copilotError, + error, }; } - // Parse Copilot output - extract response and token counts - const { response, inputTokens, outputTokens } = this.parseOutput(output); + // Parse Copilot output - extract response from output + const response = this.parseOutput(output); + const tokenCounts = this.parseTokenCounts(output); // If command failed with non-zero exit code, provide a meaningful error if (exitCode !== 0) { return { success: false, response, - inputTokens, - outputTokens, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, error: formatCommandError(exitCode, output), }; } @@ -165,105 +176,46 @@ export class CopilotEngine extends BaseAIEngine { return { success: true, response, - inputTokens, - outputTokens, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, cost: durationMs > 0 ? `duration:${durationMs}` : undefined, }; } finally { - // Always clean up the temporary prompt file - this.cleanupPromptFile(promptFilePath); + // Always clean up temp file + this.cleanupTempFile(tempFile); } } - /** - * Check for Copilot-specific errors in output - * - * IMPORTANT: We are intentionally very conservative with error detection here. - * We don't have documentation on Copilot CLI's error response formats, exit codes, - * or error messages. The response content might contain strings like "network error", - * "error:", "rate limit", etc. as part of valid output (e.g., test results, error - * handling discussions, feedback about code). We only detect errors that we have - * actually observed in practice. - * - * Currently known error: Authentication errors (observed when not logged in) - */ - private checkCopilotErrors(output: string): string | null { - const trimmed = output.trim(); - const trimmedLower = trimmed.toLowerCase(); - - // Authentication errors - the only error format we've actually observed - // When not authenticated, Copilot CLI outputs a message starting with these phrases - if ( - trimmedLower.startsWith("no authentication") || - trimmedLower.startsWith("not authenticated") || - trimmedLower.startsWith("authentication required") || - trimmedLower.startsWith("please authenticate") - ) { - return "GitHub Copilot CLI is not authenticated. Run 'copilot' and use '/login' to authenticate, or set COPILOT_GITHUB_TOKEN environment variable."; - } - - // Note: We intentionally do NOT check for: - // - "rate limit" / "too many requests" - unknown format, could appear in response content - // - "network error" / "connection refused" - unknown format, could appear in response content - // - "error:" prefix - too generic, could appear in response content - // - Non-zero exit codes - we don't know if Copilot uses them for errors - // - // If we encounter other error patterns in practice, we can add them here. - - return null; - } - - /** - * Parse a token count string like "17.5k" or "73" into a number - */ - private parseTokenCount(str: string): number { - const trimmed = str.trim().toLowerCase(); - if (trimmed.endsWith("k")) { - const value = Number.parseFloat(trimmed.slice(0, -1)); - return Number.isNaN(value) ? 0 : Math.round(value * 1000); - } - if (trimmed.endsWith("m")) { - const value = Number.parseFloat(trimmed.slice(0, -1)); - return Number.isNaN(value) ? 0 : Math.round(value * 1000000); + private parseTokenCounts(output: string): { input: number; output: number } { + const lines = output.split("\n"); + for (const line of lines) { + // Match pattern: "model-name X in, Y out, Z cached" or variations + // Using atomic grouping to prevent ReDoS - \d+\.?\d* matches numbers without catastrophic backtracking + const match = line.match(/(\d+\.?\d*)([km]?)\s+in,\s+(\d+\.?\d*)([km]?)\s+out/i); + if (match) { + let input = Number.parseFloat(match[1]); + let output = Number.parseFloat(match[3]); + + // Handle k/m suffixes + if (match[2] === "k") input *= 1000; + if (match[2] === "m") input *= 1000000; + if (match[4] === "k") output *= 1000; + if (match[4] === "m") output *= 1000000; + + return { input: Math.round(input), output: Math.round(output) }; + } } - const value = Number.parseFloat(trimmed); - return Number.isNaN(value) ? 0 : Math.round(value); + return { input: 0, output: 0 }; } - /** - * Extract token counts from Copilot CLI output - * Format: "model-name 17.5k in, 73 out, 11.8k cached (Est. 1 Premium request)" - */ - private parseTokenCounts(output: string): { inputTokens: number; outputTokens: number } { - // Look for the token count line in the "Breakdown by AI model" section - // Pattern: number followed by "in," and number followed by "out," - const tokenMatch = output.match(/(\d+(?:\.\d+)?[km]?)\s+in,\s+(\d+(?:\.\d+)?[km]?)\s+out/i); - - if (tokenMatch) { - const inputTokens = this.parseTokenCount(tokenMatch[1]); - const outputTokens = this.parseTokenCount(tokenMatch[2]); - logDebug(`[Copilot] Parsed tokens: ${inputTokens} in, ${outputTokens} out`); - return { inputTokens, outputTokens }; - } - - return { inputTokens: 0, outputTokens: 0 }; - } - - private parseOutput(output: string): { - response: string; - inputTokens: number; - outputTokens: number; - } { - // Extract token counts first - const { inputTokens, outputTokens } = this.parseTokenCounts(output); - + private parseOutput(output: string): string { // Copilot CLI may output text responses // Extract the meaningful response, filtering out control characters and prompts // Note: These filter patterns are specific to current Copilot CLI behavior // and may need updates if the CLI output format changes const lines = output.split("\n").filter(Boolean); - // Filter out empty lines, CLI artifacts, and stats section + // Filter out empty lines and common CLI artifacts const meaningfulLines = lines.filter((line) => { const trimmed = line.trim(); return ( @@ -272,18 +224,149 @@ export class CopilotEngine extends BaseAIEngine { !trimmed.startsWith("❯") && // Command prompts !trimmed.includes("Thinking...") && // Status messages !trimmed.includes("Working on it...") && // Status messages - !trimmed.startsWith("Total usage") && // Stats section - !trimmed.startsWith("API time") && // Stats section - !trimmed.startsWith("Total session") && // Stats section - !trimmed.startsWith("Total code") && // Stats section - !trimmed.startsWith("Breakdown by") && // Stats section header - !trimmed.match( - /^\s*\S+\s+\d+(?:\.\d+)?[km]?\s+in,\s+\d+(?:\.\d+)?[km]?\s+out,\s+\d+(?:\.\d+)?[km]?\s+cached/, - ) // Token count lines (model stats: "model-name 17.5k in, 73 out, 11.8k cached") + !trimmed.match(/^\S+\s+\d+(\.\d+)?[km]?\s+in,\s+\d+(\.\d+)?[km]?\s+out/i) && // Token count lines + !trimmed.match(/^Total usage:\s*\d+\s*tokens/i) // Total usage lines ); }); - const response = meaningfulLines.join("\n").trim() || "Task completed"; - return { response, inputTokens, outputTokens }; + return meaningfulLines.join("\n") || "Task completed"; + } + + async executeStreaming( + prompt: string, + workDir: string, + onProgress: ProgressCallback, + options?: EngineOptions, + ): Promise { + let tempFile: string | undefined; + + const outputLines: string[] = []; + const startTime = Date.now(); + + try { + const { args } = this.buildArgsInternal(prompt, options); + const pIndex = args.indexOf("-p"); + tempFile = pIndex >= 0 && pIndex < args.length - 1 ? args[pIndex + 1] : undefined; + + const { exitCode } = await execCommandStreaming( + this.cliCommand, + args, + workDir, + (line) => { + outputLines.push(line); + + // Detect and report step changes + const step = detectStepFromOutput(line); + if (step) { + onProgress(step); + } + }, + this.getEnv(options), + ); + + const durationMs = Date.now() - startTime; + const output = outputLines.join("\n"); + + const authError = this.getAuthenticationError(output); + if (authError) { + return { + success: false, + response: "", + inputTokens: 0, + outputTokens: 0, + error: authError, + }; + } + + // Check for errors + const error = checkForErrors(output); + if (error) { + return { + success: false, + response: "", + inputTokens: 0, + outputTokens: 0, + error, + }; + } + + // Parse Copilot output + const response = this.parseOutput(output); + const tokenCounts = this.parseTokenCounts(output); + + // If command failed with non-zero exit code, provide a meaningful error + if (exitCode !== 0) { + return { + success: false, + response, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, + error: formatCommandError(exitCode, output), + }; + } + + return { + success: true, + response, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, + cost: durationMs > 0 ? `duration:${durationMs}` : undefined, + }; + } finally { + // Always clean up temp file + this.cleanupTempFile(tempFile); + } + } + + protected processCliResult( + stdout: string, + stderr: string, + exitCode: number, + _workDir: string, + ): AIResult { + const output = stdout + stderr; + const response = this.parseOutput(output); + const tokenCounts = this.parseTokenCounts(output); + + // Check for auth errors first (check first line only) + const authError = this.getAuthenticationError(output); + if (authError) { + return { + success: false, + response, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, + error: authError, + }; + } + + // Check for CLI errors + const error = checkForErrors(output); + if (error) { + return { + success: false, + response, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, + error, + }; + } + + if (exitCode !== 0) { + return { + success: false, + response, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, + error: formatCommandError(exitCode, output), + }; + } + + return { + success: true, + response, + inputTokens: tokenCounts.input, + outputTokens: tokenCounts.output, + }; } } diff --git a/cli/src/engines/cursor.ts b/cli/src/engines/cursor.ts index 58133a51..e525fe0e 100644 --- a/cli/src/engines/cursor.ts +++ b/cli/src/engines/cursor.ts @@ -1,15 +1,8 @@ -import { - BaseAIEngine, - checkForErrors, - detectStepFromOutput, - execCommand, - execCommandStreaming, - formatCommandError, -} from "./base.ts"; +import { logDebug } from "../ui/logger.ts"; +import { BaseAIEngine, checkForErrors, execCommand, execCommandStreaming } from "./base.ts"; +import { detectStepFromOutput, formatCommandError } from "./parsers.ts"; import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts"; -const isWindows = process.platform === "win32"; - /** * Cursor Agent AI Engine */ @@ -17,7 +10,15 @@ export class CursorEngine extends BaseAIEngine { name = "Cursor Agent"; cliCommand = "agent"; - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + protected buildArgs(prompt: string, _workDir: string, options?: EngineOptions): string[] { + const { args } = this.buildArgsInternal(prompt, options); + return args; + } + + private buildArgsInternal( + prompt: string, + options?: EngineOptions, + ): { args: string[]; stdinContent?: string } { const args = ["--print", "--force", "--output-format", "stream-json"]; if (options?.modelOverride) { args.push("--model", options.modelOverride); @@ -27,19 +28,17 @@ export class CursorEngine extends BaseAIEngine { args.push(...options.engineArgs); } - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues - let stdinContent: string | undefined; - if (isWindows) { - stdinContent = prompt; - } else { - args.push(prompt); - } + return this.buildArgsWithStdin(args, prompt); + } + + async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + const { args, stdinContent } = this.buildArgsInternal(prompt, options); const { stdout, stderr, exitCode } = await execCommand( this.cliCommand, args, workDir, - undefined, + this.getEnv(options), stdinContent, ); @@ -106,8 +105,8 @@ export class CursorEngine extends BaseAIEngine { response = content; } } - } catch { - // Ignore non-JSON lines + } catch (_err) { + logDebug(`Cursor: Failed to parse JSON line: ${_err}`); } } @@ -120,22 +119,7 @@ export class CursorEngine extends BaseAIEngine { onProgress: ProgressCallback, options?: EngineOptions, ): Promise { - const args = ["--print", "--force", "--output-format", "stream-json"]; - if (options?.modelOverride) { - args.push("--model", options.modelOverride); - } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { - args.push(...options.engineArgs); - } - - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues - let stdinContent: string | undefined; - if (isWindows) { - stdinContent = prompt; - } else { - args.push(prompt); - } + const { args, stdinContent } = this.buildArgsInternal(prompt, options); const outputLines: string[] = []; @@ -152,7 +136,7 @@ export class CursorEngine extends BaseAIEngine { onProgress(step); } }, - undefined, + this.getEnv(options), stdinContent, ); @@ -192,4 +176,34 @@ export class CursorEngine extends BaseAIEngine { cost: durationMs > 0 ? `duration:${durationMs}` : undefined, }; } + + protected processCliResult( + stdout: string, + stderr: string, + exitCode: number, + _workDir: string, + ): AIResult { + const output = stdout + stderr; + const error = checkForErrors(output); + if (error) { + return { success: false, response: "", inputTokens: 0, outputTokens: 0, error }; + } + const { response, durationMs } = this.parseOutput(output); + if (exitCode !== 0) { + return { + success: false, + response, + inputTokens: 0, + outputTokens: 0, + error: formatCommandError(exitCode, output), + }; + } + return { + success: true, + response, + inputTokens: 0, + outputTokens: 0, + cost: durationMs > 0 ? `duration:${durationMs}` : undefined, + }; + } } diff --git a/cli/src/engines/droid.ts b/cli/src/engines/droid.ts index 0247e098..d4db4882 100644 --- a/cli/src/engines/droid.ts +++ b/cli/src/engines/droid.ts @@ -1,15 +1,8 @@ -import { - BaseAIEngine, - checkForErrors, - detectStepFromOutput, - execCommand, - execCommandStreaming, - formatCommandError, -} from "./base.ts"; +import { logDebug } from "../ui/logger.ts"; +import { BaseAIEngine, checkForErrors, execCommand, execCommandStreaming } from "./base.ts"; +import { detectStepFromOutput, formatCommandError } from "./parsers.ts"; import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts"; -const isWindows = process.platform === "win32"; - /** * Factory Droid AI Engine */ @@ -17,7 +10,15 @@ export class DroidEngine extends BaseAIEngine { name = "Factory Droid"; cliCommand = "droid"; - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] { + const { args } = this.buildArgsInternal(_prompt, options); + return args; + } + + private buildArgsInternal( + prompt: string, + options?: EngineOptions, + ): { args: string[]; stdinContent?: string } { const args = ["exec", "--output-format", "stream-json", "--auto", "medium"]; if (options?.modelOverride) { args.push("--model", options.modelOverride); @@ -27,19 +28,17 @@ export class DroidEngine extends BaseAIEngine { args.push(...options.engineArgs); } - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues - let stdinContent: string | undefined; - if (isWindows) { - stdinContent = prompt; - } else { - args.push(prompt); - } + return this.buildArgsWithStdin(args, prompt); + } + + async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + const { args, stdinContent } = this.buildArgsInternal(prompt, options); const { stdout, stderr, exitCode } = await execCommand( this.cliCommand, args, workDir, - undefined, + this.getEnv(options), stdinContent, ); @@ -96,8 +95,8 @@ export class DroidEngine extends BaseAIEngine { durationMs = parsed.durationMs; } } - } catch { - // Ignore non-JSON lines + } catch (_err) { + logDebug(`Droid: Failed to parse JSON line: ${_err}`); } } @@ -110,22 +109,7 @@ export class DroidEngine extends BaseAIEngine { onProgress: ProgressCallback, options?: EngineOptions, ): Promise { - const args = ["exec", "--output-format", "stream-json", "--auto", "medium"]; - if (options?.modelOverride) { - args.push("--model", options.modelOverride); - } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { - args.push(...options.engineArgs); - } - - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues - let stdinContent: string | undefined; - if (isWindows) { - stdinContent = prompt; - } else { - args.push(prompt); - } + const { args, stdinContent } = this.buildArgsInternal(prompt, options); const outputLines: string[] = []; @@ -142,7 +126,7 @@ export class DroidEngine extends BaseAIEngine { onProgress(step); } }, - undefined, + this.getEnv(options), stdinContent, ); @@ -182,4 +166,43 @@ export class DroidEngine extends BaseAIEngine { cost: durationMs > 0 ? `duration:${durationMs}` : undefined, }; } + + protected processCliResult( + stdout: string, + stderr: string, + exitCode: number, + _workDir: string, + ): AIResult { + const output = stdout + stderr; + const error = checkForErrors(output); + if (error) { + return { + success: false, + response: "", + inputTokens: 0, + outputTokens: 0, + error, + }; + } + + const { response, durationMs } = this.parseOutput(output); + + if (exitCode !== 0) { + return { + success: false, + response, + inputTokens: 0, + outputTokens: 0, + error: formatCommandError(exitCode, output), + }; + } + + return { + success: true, + response, + inputTokens: 0, + outputTokens: 0, + cost: durationMs > 0 ? `duration:${durationMs}` : undefined, + }; + } } diff --git a/cli/src/engines/executor.ts b/cli/src/engines/executor.ts new file mode 100644 index 00000000..07f3a804 --- /dev/null +++ b/cli/src/engines/executor.ts @@ -0,0 +1,385 @@ +import { spawn, spawnSync } from "node:child_process"; +import { Readable } from "node:stream"; +import { logDebug } from "../ui/logger.ts"; +import { registerProcess } from "../utils/cleanup.ts"; +import { validateCommandAndArgs } from "./validation.ts"; + +// Check if running in Bun +const isBun = typeof Bun !== "undefined"; +const isWindows = process.platform === "win32"; +const DEBUG = process.env.RALPHY_DEBUG === "true"; + +function debugLog(...args: unknown[]): void { + if (DEBUG || (globalThis as { verboseMode?: boolean }).verboseMode === true) { + logDebug(args.map((a) => String(a)).join(" ")); + } +} + +/** + * Command execution result + */ +export interface ExecutionResult { + stdout: string; + stderr: string; + exitCode: number; +} + +/** + * Check if a command is available in PATH + */ +export async function commandExists(command: string): Promise { + debugLog(`commandExists: Checking for '${command}'...`); + try { + const checkCommand = isWindows ? "where" : "which"; + debugLog(`commandExists: Using checkCommand='${checkCommand}', isBun=${isBun}`); + + if (isBun) { + debugLog("commandExists: Using Bun.spawn for check"); + const proc = Bun.spawn([checkCommand, command], { + stdout: "pipe", + stderr: "pipe", + }); + const exitCode = await proc.exited; + debugLog(`commandExists: Bun.spawn exited with code ${exitCode}`); + return exitCode === 0; + } + + // Node.js fallback + debugLog("commandExists: Using Node.js spawnSync"); + const result = spawnSync(checkCommand, [command], { stdio: "pipe" }); + debugLog(`commandExists: spawnSync status=${result.status}`); + return result.status === 0; + } catch (err) { + debugLog(`commandExists: Exception - ${err}`); + return false; + } +} + +/** + * Execute a command and return stdout + * @param stdinContent - Optional content to pass via stdin + */ +export async function execCommand( + command: string, + args: string[], + workDir: string, + env?: Record, + stdinContent?: string, +): Promise { + debugLog(`execCommand: ${command} ${args.join(" ")}`); + debugLog(`execCommand: workDir=${workDir}, hasEnv=${!!env}, hasStdin=${!!stdinContent}`); + + // Validate command and arguments for security (applies to both Bun and Node.js) + const validation = validateCommandAndArgs(command, args); + if (!validation.valid || !validation.command || !validation.args) { + return Promise.resolve({ + stdout: "", + stderr: `Error: ${validation.error}`, + exitCode: 1, + }); + } + + // Use validated values + const validatedCommand = validation.command; + const validatedArgs = validation.args; + + if (isBun) { + return execWithBun(validatedCommand, validatedArgs, workDir, env, stdinContent); + } + + return execWithNode(validatedCommand, validatedArgs, workDir, env, stdinContent); +} + +/** + * Execute command using Bun runtime + */ +async function execWithBun( + command: string, + args: string[], + workDir: string, + env?: Record, + stdinContent?: string, +): Promise { + const spawnArgs = [command, ...args]; + debugLog(`execCommand: spawning with Bun, spawnArgs=${spawnArgs.join(" ")}`); + + const proc = Bun.spawn(spawnArgs, { + cwd: workDir, + stdin: stdinContent ? "pipe" : "ignore", + stdout: "pipe", + stderr: "pipe", + env: { ...process.env, ...(env || {}) }, + }); + + debugLog( + `execCommand: process spawned, PID=${proc.pid}, stdinContent length=${stdinContent?.length || 0}`, + ); + + // Write stdin content if provided + if (stdinContent && proc.stdin) { + proc.stdin.write(stdinContent); + proc.stdin.end(); + debugLog("execCommand: stdin written and closed"); + } + + const [stdout, stderr, exitCode] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + proc.exited, + ]); + + debugLog( + `execCommand: process exited, exitCode=${exitCode}, stdout=${stdout.length} chars, stderr=${stderr.length} chars`, + ); + + return { stdout, stderr, exitCode }; +} + +/** + * Execute command using Node.js runtime + */ +function execWithNode( + command: string, + args: string[], + workDir: string, + env?: Record, + stdinContent?: string, +): Promise { + return new Promise((resolve) => { + const proc = spawn(command, args, { + cwd: workDir, + env: { ...process.env, ...env }, + stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"], + shell: false, // Disable shell to prevent command injection + }); + + // Track process for cleanup + const unregister = registerProcess(proc); + + // Write stdin content if provided + if (stdinContent && proc.stdin) { + proc.stdin.write(stdinContent); + proc.stdin.end(); + } + + let stdout = ""; + let stderr = ""; + + proc.stdout?.on("data", (data) => { + stdout += data.toString(); + }); + + proc.stderr?.on("data", (data) => { + stderr += data.toString(); + }); + + proc.on("close", (code) => { + unregister(); + resolve({ + stdout, + stderr, + exitCode: code ?? 1, + }); + }); + + proc.on("error", (err) => { + unregister(); + const errorMessage = err.message || String(err); + const mergedStderr = stderr ? `${stderr}\n${errorMessage}` : errorMessage; + resolve({ + stdout, + stderr: mergedStderr, + exitCode: 1, + }); + }); + }); +} + +/** + * Streaming execution result + */ +export interface StreamingExecutionResult { + process: import("./types.ts").ChildProcess; + stdout: ReadableStream | null; + stderr: ReadableStream | null; +} + +/** + * Execute a command with streaming output and callback + * Legacy API for backward compatibility with existing engines + */ +export async function execCommandStreaming( + command: string, + args: string[], + workDir: string, + onLine: (line: string) => void, + env?: Record, + stdinContent?: string, +): Promise<{ exitCode: number }> { + debugLog(`execCommandStreaming (legacy): ${command} ${args.join(" ")}`); + + const { + process: childProcess, + stdout, + stderr, + } = await execCommandStreamingNew(command, args, workDir, env, stdinContent); + + const timeout = Number(globalThis.process.env.RALPHY_EXECUTION_TIMEOUT || 30 * 60 * 1000); + let timedOut = false; + + const timeoutId = setTimeout(() => { + timedOut = true; + try { + childProcess.kill("SIGTERM"); + } catch { + // no-op + } + + setTimeout(() => { + if (!timedOut) return; + try { + childProcess.kill("SIGKILL"); + } catch { + // no-op + } + }, 3000); + }, timeout); + + const readStreamLines = async (stream: ReadableStream | null): Promise => { + if (!stream) return; + const reader = stream.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; + for (const line of lines) { + if (line.trim()) { + onLine(line); + } + } + } + + buffer += decoder.decode(); + if (buffer.trim()) { + onLine(buffer); + } + } finally { + reader.releaseLock(); + } + }; + + const exitPromise = childProcess.exited + ? childProcess.exited + : new Promise((resolve) => { + (childProcess as import("node:child_process").ChildProcess).once("close", (code) => { + resolve(code ?? 1); + }); + (childProcess as import("node:child_process").ChildProcess).once("error", () => { + resolve(1); + }); + }); + + const [exitCode] = await Promise.all([ + exitPromise, + readStreamLines(stdout), + readStreamLines(stderr), + ]); + clearTimeout(timeoutId); + const didTimeOut = timedOut; + timedOut = false; + + return { exitCode: didTimeOut ? 1 : (exitCode ?? 1) }; +} + +/** + * Execute a command with streaming output (returns streams) + * New API for use with BaseAIEngine streaming + */ +export async function execCommandStreamingNew( + command: string, + args: string[], + workDir: string, + env?: Record, + stdinContent?: string, +): Promise { + debugLog(`execCommandStreamingNew: ${command} ${args.join(" ")}`); + + const validation = validateCommandAndArgs(command, args); + if (!validation.valid || !validation.command || !validation.args) { + throw new Error(validation.error || "Command validation failed"); + } + + const validatedCommand = validation.command; + const validatedArgs = validation.args; + + if (isBun) { + const spawnArgs = [validatedCommand, ...validatedArgs]; + const proc = Bun.spawn(spawnArgs, { + cwd: workDir, + stdin: stdinContent ? "pipe" : "ignore", + stdout: "pipe", + stderr: "pipe", + env: { ...process.env, ...(env || {}) }, + }); + + if (stdinContent && proc.stdin) { + proc.stdin.write(stdinContent); + proc.stdin.end(); + } + + return { + process: proc as unknown as import("./types.ts").ChildProcess, + stdout: proc.stdout, + stderr: proc.stderr, + }; + } + + // Node.js fallback + + const proc = spawn(validatedCommand, validatedArgs, { + cwd: workDir, + env: { ...process.env, ...env }, + stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"], + shell: false, + }); + const unregister = registerProcess(proc); + let cleaned = false; + const unregisterOnce = () => { + if (cleaned) return; + cleaned = true; + unregister(); + }; + proc.once("close", unregisterOnce); + proc.once("error", unregisterOnce); + + if (stdinContent && proc.stdin) { + proc.stdin.write(stdinContent); + proc.stdin.end(); + } + + const exited = new Promise((resolve) => { + proc.once("close", (code) => resolve(code ?? 1)); + proc.once("error", () => resolve(1)); + }); + + const processWithExit = Object.assign(proc, { exited }) as import("./types.ts").ChildProcess; + + const stdout = proc.stdout + ? (Readable.toWeb(proc.stdout) as unknown as ReadableStream) + : null; + const stderr = proc.stderr + ? (Readable.toWeb(proc.stderr) as unknown as ReadableStream) + : null; + + return { + process: processWithExit, + stdout, + stderr, + }; +} diff --git a/cli/src/engines/gemini.ts b/cli/src/engines/gemini.ts index c9b98cd8..b860673b 100644 --- a/cli/src/engines/gemini.ts +++ b/cli/src/engines/gemini.ts @@ -9,8 +9,6 @@ import { } from "./base.ts"; import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts"; -const isWindows = process.platform === "win32"; - /** * Gemini CLI AI Engine * https://github.com/google-gemini/gemini-cli @@ -19,7 +17,10 @@ export class GeminiEngine extends BaseAIEngine { name = "Gemini CLI"; cliCommand = "gemini"; - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + /** + * Build CLI arguments for Gemini + */ + protected buildArgs(prompt: string, workDir: string, options?: EngineOptions): string[] { const args = ["--output-format", "stream-json", "--yolo"]; if (options?.modelOverride) { args.push("--model", options.modelOverride); @@ -28,21 +29,32 @@ export class GeminiEngine extends BaseAIEngine { if (options?.engineArgs && options.engineArgs.length > 0) { args.push(...options.engineArgs); } + // Pass prompt via stdin + args.push("-p"); + return args; + } - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - let stdinContent: string | undefined; - if (isWindows) { - args.push("-p"); - stdinContent = prompt; - } else { - args.push("-p", prompt); - } + /** + * Process CLI output into AIResult + */ + protected processCliResult( + _stdout: string, + _stderr: string, + _exitCode: number, + _workDir: string, + ): AIResult { + throw new Error("GeminiEngine: use execute() or executeStreaming() directly"); + } + + async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + const args = this.buildArgs(prompt, workDir, options); + const stdinContent = prompt; const { stdout, stderr, exitCode } = await execCommand( this.cliCommand, args, workDir, - undefined, + this.getEnv(options), stdinContent, ); @@ -88,23 +100,8 @@ export class GeminiEngine extends BaseAIEngine { onProgress: ProgressCallback, options?: EngineOptions, ): Promise { - const args = ["--output-format", "stream-json", "--yolo"]; - if (options?.modelOverride) { - args.push("--model", options.modelOverride); - } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { - args.push(...options.engineArgs); - } - - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - let stdinContent: string | undefined; - if (isWindows) { - args.push("-p"); - stdinContent = prompt; - } else { - args.push("-p", prompt); - } + const args = this.buildArgs(prompt, workDir, options); + const stdinContent = prompt; const outputLines: string[] = []; @@ -121,7 +118,7 @@ export class GeminiEngine extends BaseAIEngine { onProgress(step); } }, - undefined, + this.getEnv(options), stdinContent, ); diff --git a/cli/src/engines/index.ts b/cli/src/engines/index.ts index 4b7fc70e..3bdb1e16 100644 --- a/cli/src/engines/index.ts +++ b/cli/src/engines/index.ts @@ -1,13 +1,13 @@ -export * from "./types.ts"; export * from "./base.ts"; export * from "./claude.ts"; -export * from "./opencode.ts"; -export * from "./cursor.ts"; export * from "./codex.ts"; -export * from "./qwen.ts"; -export * from "./droid.ts"; export * from "./copilot.ts"; +export * from "./cursor.ts"; +export * from "./droid.ts"; export * from "./gemini.ts"; +export * from "./opencode.ts"; +export * from "./qwen.ts"; +export * from "./types.ts"; import { ClaudeEngine } from "./claude.ts"; import { CodexEngine } from "./codex.ts"; @@ -56,5 +56,6 @@ export function getEngineName(name: AIEngineName): string { * Check if an engine is available */ export async function isEngineAvailable(name: AIEngineName): Promise { - return createEngine(name).isAvailable(); + const engine = createEngine(name); + return await engine.isAvailable(); } diff --git a/cli/src/engines/opencode.ts b/cli/src/engines/opencode.ts index 9315e4bb..570f8742 100644 --- a/cli/src/engines/opencode.ts +++ b/cli/src/engines/opencode.ts @@ -1,44 +1,303 @@ +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { logDebug as debugLog } from "../ui/logger.ts"; +import { + StepFinishSchema, + TextSchema, + extractSessionId, + parseJsonLine, +} from "../utils/json-validation.ts"; import { BaseAIEngine, checkForErrors, execCommand, formatCommandError } from "./base.ts"; +import { detectStepFromOutput as baseDetectStepFromOutput } from "./parsers.ts"; import type { AIResult, EngineOptions } from "./types.ts"; -const isWindows = process.platform === "win32"; - -/** - * OpenCode AI Engine - */ +/** OpenCode AI Engine */ export class OpenCodeEngine extends BaseAIEngine { name = "OpenCode"; cliCommand = "opencode"; + protected lastUsedModel?: string; - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + /** Set up environment variables for OpenCode engine */ + protected getEnv(options?: EngineOptions): Record | undefined { + const env: Record = { + // Add rate limiting to prevent overwhelming the API + OPENCODE_REQUEST_DELAY: "1000", + ...(options?.env || {}), + }; + + if (options?.debugOpenCode) { + env.DEBUG_OPENCODE = "true"; + } + + // Allow OpenCode broad filesystem access only when explicitly opted in. + if (options?.allowOpenCodeSandboxAccess === true) { + env.OPENCODE_PERMISSION = '{"*":"allow"}'; + } + + return env; + } + + protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] { const args = ["run", "--format", "json"]; if (options?.modelOverride) { args.push("--model", options.modelOverride); + this.lastUsedModel = options.modelOverride; + } else { + this.lastUsedModel = ""; } - // Add any additional engine-specific arguments + if (options?.engineArgs && options.engineArgs.length > 0) { args.push(...options.engineArgs); } + // Prompt is passed via stdin by the execute() method for cross-platform compatibility + return args; + } - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - let stdinContent: string | undefined; - if (isWindows) { - stdinContent = prompt; - } else { - args.push(prompt); - } + async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + const args = this.buildArgs(prompt, workDir, options); + + // Pass prompt via stdin for cross-platform compatibility + // This avoids shell escaping issues and argument length limits on all platforms + const stdinContent = prompt; const { stdout, stderr, exitCode } = await execCommand( this.cliCommand, args, workDir, - { OPENCODE_PERMISSION: '{"*":"allow"}' }, + this.getEnv(options), stdinContent, ); + const combinedOutput = stdout + stderr; + + // Diagnostics: capture session-related artifacts only when explicit debug is enabled + if (options?.debugOpenCode || process.env.RALPHY_DEBUG === "true") { + try { + const diagLogPath = path.join(os.tmpdir(), "ralphy-opencode_diag.log"); + let sessionId: string | undefined; + // Attempt to extract a sessionId from any JSON lines in the output + for (const line of combinedOutput.split(/\r?\n/)) { + if (!line?.trim()) continue; + try { + const obj = JSON.parse(line); + if (obj?.sessionID) { + sessionId = String(obj.sessionID); + } else if (obj?.sessionId) { + sessionId = String(obj.sessionId); + } else if (obj?.session_id) { + sessionId = String(obj.session_id); + } + } catch { + // Ignore non-JSON lines in diagnostic extraction. + } + } + + const diag = { + timestamp: new Date().toISOString(), + command: this.cliCommand, + argsCount: args.length, + workDir, + platform: process.platform, + exitCode, + sessionId, + stateDirHint: "[REDACTED]", + envSnapshot: { + HOME: "[REDACTED]", + USERPROFILE: "[REDACTED]", + XDG_STATE_HOME: "[REDACTED]", + }, + stdoutBytes: Buffer.byteLength(stdout, "utf8"), + stderrBytes: Buffer.byteLength(stderr, "utf8"), + hasOutput: stdout.length > 0 || stderr.length > 0, + }; + // Ensure the log directory exists and append the diagnostic entry + try { + fs.mkdirSync(path.dirname(diagLogPath), { recursive: true }); + // Check file size limit before appending + const MAX_DIAG_SIZE = 10 * 1024 * 1024; // 10MB limit + if (fs.existsSync(diagLogPath)) { + const stats = fs.statSync(diagLogPath); + if (stats.size > MAX_DIAG_SIZE) { + // Rotate log file + fs.renameSync(diagLogPath, `${diagLogPath}.old`); + } + } + fs.appendFileSync(diagLogPath, `${JSON.stringify(diag)}\n`); + } catch (err) { + // Log but don't crash on logging failures + debugLog(`Failed to write diagnostic log: ${err}`); + } + } catch (diagErr) { + // If diagnostics fail for any reason, do not crash the engine + debugLog(`OpenCode: Diagnostic error (non-critical): ${diagErr}`); + } + } + + return this.processCliResult(stdout, stderr, exitCode, workDir); + } + + private parseOutput(output: string): { + response: string; + inputTokens: number; + outputTokens: number; + cost?: string; + sessionId?: string; + } { + const lines = output.split("\n").filter(Boolean); + let response = ""; + let inputTokens = 0; + let outputTokens = 0; + let cost: string | undefined; + let sessionId: string | undefined; + + // Find step_finish and other events for token counts and session ID + for (const line of lines) { + const result = parseJsonLine(line); + if (!result) continue; + const { event } = result; + + // Extract session ID from any event that has it + const extractedSessionId = extractSessionId(event); + if (extractedSessionId) { + sessionId = extractedSessionId; + } + + const stepFinishResult = StepFinishSchema.safeParse(event); + if (stepFinishResult.success) { + const stepFinish = stepFinishResult.data; + const tokens = stepFinish.part?.tokens || stepFinish.tokens; + inputTokens = tokens?.input || 0; + outputTokens = tokens?.output || 0; + cost = String(stepFinish.cost || stepFinish.part?.cost || ""); + } + } + + // Get text response from text events and tool_use events with file content + const textParts: string[] = []; + for (const line of lines) { + const result = parseJsonLine(line); + if (!result) continue; + const { event } = result; + + const textResult = TextSchema.safeParse(event); + if (textResult.success) { + textParts.push(textResult.data.part.text); + } + } + + // If no text parts found, check if this is a raw tool_use response (planning phase issue) + // In this case, we return an empty response so the caller can detect this condition + if (textParts.length === 0) { + // Check for raw tool_use response + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.startsWith('{"type":"tool_use"')) { + // Return empty response - this will be detected by planning.ts + return { response: "", inputTokens, outputTokens, cost, sessionId }; + } + } + } + + response = textParts.join("") || "Task completed"; + + return { response, inputTokens, outputTokens, cost, sessionId }; + } + + /** Detect step from output for progress tracking */ + detectStepFromOutput(line: string, logThoughts = false): string | null { + const trimmed = line.trim(); + const lowerLine = trimmed.toLowerCase(); + + // Handle JSON tool calls first (specific to OpenCode) + try { + const parsed = JSON.parse(trimmed); + if (parsed?.tool && parsed?.file_path) { + const fileName = parsed.file_path.split("/").pop() || parsed.file_path; + switch (parsed.tool) { + case "read": + return `Reading ${fileName}`; + case "write": + case "edit": + return `Implementing ${fileName}`; + default: + return `${parsed.tool.charAt(0).toUpperCase() + parsed.tool.slice(1)} ${fileName}`; + } + } + if (parsed?.type === "text" && parsed?.part?.text) { + const text = parsed.part.text; + // Truncate long text + if (text.length > 150) { + return text.substring(0, 150); + } + return text; + } + } catch (err) { + // Not JSON, continue with text processing + debugLog(`OpenCode: JSON parse error in step detection: ${err}`); + } + + // OpenCode-specific step detection before base implementation + if (lowerLine.includes("reading") || lowerLine.includes("loading")) { + // Check for "Reading file " pattern first + const readingFileMatch = trimmed.match(/Reading\s+file\s+["']?([^"']+)["']?/i); + if (readingFileMatch) { + const fileName = readingFileMatch[1].split("/").pop() || readingFileMatch[1]; + return `Reading ${fileName}`; + } + // Extract filename if present + const fileMatch = trimmed.match(/(?:file|reading)\s+(?:"?')?([^"'\s]+)/i); + if (fileMatch) { + const fileName = fileMatch[1].split("/").pop() || fileMatch[1]; + return `Reading ${fileName}`; + } + if (lowerLine.includes("file")) return "Reading code"; + } + // Handle cat command pattern + if (trimmed.startsWith("cat ")) { + const fileMatch = trimmed.match(/cat\s+(?:"?')?([^"'\s]+)/i); + if (fileMatch) { + const fileName = fileMatch[1].split("/").pop() || fileMatch[1]; + return `Reading ${fileName}`; + } + } + if ( + lowerLine.includes("writing") || + lowerLine.includes("editing") || + lowerLine.includes("implementing") + ) { + if (lowerLine.includes("test")) return "Writing tests"; + return "Implementing"; + } + + // Use base implementation for other cases + const baseResult = baseDetectStepFromOutput(line, logThoughts); + if (baseResult !== null && baseResult !== undefined) { + return baseResult; + } + + // OpenCode-specific step detection + if (lowerLine.includes("lint") || lowerLine.includes("formatting")) { + return "Linting"; + } + if (lowerLine.includes("commit")) return "Committing"; + if (lowerLine.includes("staging")) return "Staging"; + + return null; + } + + protected processCliResult( + stdout: string, + stderr: string, + exitCode: number, + _workDir: string, + ): AIResult { const output = stdout + stderr; - // Check for errors + // Parse OpenCode JSON format + const { response, inputTokens, outputTokens, cost, sessionId } = this.parseOutput(output); + + // Check for errors first const error = checkForErrors(output); if (error) { return { @@ -47,12 +306,10 @@ export class OpenCodeEngine extends BaseAIEngine { inputTokens: 0, outputTokens: 0, error, + sessionId, }; } - // Parse OpenCode JSON format - const { response, inputTokens, outputTokens, cost } = this.parseOutput(output); - // If command failed with non-zero exit code, provide a meaningful error if (exitCode !== 0) { return { @@ -61,6 +318,7 @@ export class OpenCodeEngine extends BaseAIEngine { inputTokens, outputTokens, error: formatCommandError(exitCode, output), + sessionId, }; } @@ -70,52 +328,7 @@ export class OpenCodeEngine extends BaseAIEngine { inputTokens, outputTokens, cost, + sessionId, }; } - - private parseOutput(output: string): { - response: string; - inputTokens: number; - outputTokens: number; - cost?: string; - } { - const lines = output.split("\n").filter(Boolean); - let response = ""; - let inputTokens = 0; - let outputTokens = 0; - let cost: string | undefined; - - // Find step_finish for token counts - for (const line of lines) { - try { - const parsed = JSON.parse(line); - if (parsed.type === "step_finish") { - inputTokens = parsed.part?.tokens?.input || 0; - outputTokens = parsed.part?.tokens?.output || 0; - if (parsed.part?.cost) { - cost = String(parsed.part.cost); - } - } - } catch { - // Ignore non-JSON lines - } - } - - // Get text response from text events - const textParts: string[] = []; - for (const line of lines) { - try { - const parsed = JSON.parse(line); - if (parsed.type === "text" && parsed.part?.text) { - textParts.push(parsed.part.text); - } - } catch { - // Ignore non-JSON lines - } - } - - response = textParts.join("") || "Task completed"; - - return { response, inputTokens, outputTokens, cost }; - } } diff --git a/cli/src/engines/parsers.ts b/cli/src/engines/parsers.ts new file mode 100644 index 00000000..db906d3b --- /dev/null +++ b/cli/src/engines/parsers.ts @@ -0,0 +1,360 @@ +import type { z } from "zod"; +import { ErrorSchema, StepFinishSchema, parseJsonLine } from "../utils/json-validation.ts"; +import type { AIResult } from "./types.ts"; + +/** + * Parsed result with token counts + */ +export interface ParsedResult { + response: string; + inputTokens: number; + outputTokens: number; +} + +/** + * Token counts + */ +export interface TokenCounts { + input: number; + output: number; +} + +function isAuthenticationMessage(message: string): boolean { + const lower = message.toLowerCase(); + return ( + lower.includes("invalid api key") || + lower.includes("authentication") || + lower.includes("not authenticated") || + lower.includes("unauthorized") || + lower.includes("/login") + ); +} + +export function extractAuthenticationError(output: string): string | null { + const lines = output + .split("\n") + .map((line) => line.trim()) + .filter(Boolean); + + for (const line of lines) { + let event: Record | null = null; + let rawEvent: Record | null = null; + + if (line.startsWith("{")) { + try { + const parsed = JSON.parse(line); + if (parsed && typeof parsed === "object") { + rawEvent = parsed as Record; + } + } catch { + // ignore malformed JSON lines + } + } + + const parsedLine = parseJsonLine(line); + if (parsedLine?.event && typeof parsedLine.event === "object") { + event = parsedLine.event as Record; + } else if (rawEvent) { + event = rawEvent; + } + + if (!event) continue; + + const type = typeof event.type === "string" ? event.type : ""; + + if (type === "error") { + const errorObj = event.error; + const errorMessage = + errorObj && + typeof errorObj === "object" && + typeof (errorObj as { message?: unknown }).message === "string" + ? (errorObj as { message: string }).message + : typeof event.message === "string" + ? event.message + : ""; + if (errorMessage && isAuthenticationMessage(errorMessage)) return errorMessage; + } + + const source = rawEvent || event; + if ( + type === "result" && + source.is_error === true && + typeof source.result === "string" && + isAuthenticationMessage(source.result) + ) { + return source.result; + } + + if (type === "assistant") { + const messageObj = event.message; + const hasAuthError = + event.error === "authentication_failed" || + (messageObj && + typeof messageObj === "object" && + (messageObj as { error?: unknown }).error === "authentication_failed"); + if (!hasAuthError) continue; + + if (messageObj && typeof messageObj === "object") { + const content = (messageObj as { content?: unknown }).content; + if (Array.isArray(content)) { + for (const item of content) { + if (!item || typeof item !== "object") continue; + const text = (item as { text?: unknown }).text; + if (typeof text === "string" && isAuthenticationMessage(text)) return text; + } + } + } + } + } + + return null; +} + +/** + * Check for errors in stream-json output or general CLI output. + */ +export function checkForErrors(output: string): string | null { + const lines = output.split("\n").filter(Boolean); + + for (const line of lines) { + const trimmed = line.trim(); + // Try JSON parsing with schema validation + if (trimmed.startsWith("{")) { + const parsed = parseJsonLine(line); + if (parsed && ErrorSchema.safeParse(parsed.event).success) { + const errorData = parsed.event as z.infer; + return errorData.error?.message || errorData.message || "Unknown error"; + } + } + + // Look for common error patterns in plain text (case-insensitive) + const lowerTrimmed = trimmed.toLowerCase(); + const hasExplicitErrorPrefix = lowerTrimmed.startsWith("fatal:"); + const hasKnownModelErrorToken = + lowerTrimmed.includes("providermodelnotfounderror") || + lowerTrimmed.includes("modelnotfounderror") || + (lowerTrimmed.includes("model not found") && lowerTrimmed.includes("error")) || + (lowerTrimmed.includes("invalid model") && lowerTrimmed.includes("error")); + + if (hasExplicitErrorPrefix || hasKnownModelErrorToken) { + // Improve specific error messages + if (lowerTrimmed.includes("rate limit")) { + return "OpenCode Rate Limit: Too many requests. Try: Wait 30-60s"; + } + if (lowerTrimmed.includes("quota")) { + return "OpenCode Quota Exceeded: You've reached your usage limit. Check your OpenCode plan"; + } + if (lowerTrimmed.includes("connection") || lowerTrimmed.includes("timeout")) { + return "OpenCode Connection Error: Unable to connect to the service. Check internet connection"; + } + return trimmed; + } + } + + // Secondary check for common fatal strings + const fatalTerms = ["Permission denied", "command not found"]; + const fatalLine = output.split("\n").find((line) => { + const trimmedLine = line.trim(); + const lowerLine = trimmedLine.toLowerCase(); + const hasFatalPrefix = /^(fatal:|error:|err:|\/bin\/sh:|sh:|bash:|zsh:)/i.test(trimmedLine); + return ( + (hasFatalPrefix && fatalTerms.some((term) => lowerLine.includes(term.toLowerCase()))) || + lowerLine.includes("providermodelnotfounderror") + ); + }); + if (fatalLine) { + return fatalLine.trim() || "Access or command error"; + } + + return null; +} + +/** + * Format a command failure with useful output context. + */ +export function formatCommandError(exitCode: number, output: string): string { + const trimmed = output.trim(); + if (!trimmed) { + return `Command failed with exit code ${exitCode}`; + } + + // Try to find a meaningful error message first + const authError = extractAuthenticationError(output); + if (authError) { + return authError; + } + + const extractedError = checkForErrors(output); + if (extractedError) { + return `Command failed with exit code ${exitCode}. Output:\n${extractedError}`; + } + + const lines = trimmed.split("\n").filter(Boolean); + const snippet = lines.slice(-12).join("\n"); + return `Command failed with exit code ${exitCode}. Output:\n${snippet}`; +} + +/** + * Parse JSON result from AI output + */ +export function parseStreamJsonResult(output: string): ParsedResult { + const lines = output.split("\n").filter(Boolean); + let response = ""; + let inputTokens = 0; + let outputTokens = 0; + + for (const line of lines) { + try { + const parsed = JSON.parse(line); + if (parsed.type === "result") { + response = parsed.result || "Task completed"; + inputTokens = parsed.usage?.input_tokens || 0; + outputTokens = parsed.usage?.output_tokens || 0; + } + } catch { + // Ignore non-JSON lines + } + } + + return { response: response || "Task completed", inputTokens, outputTokens }; +} + +/** + * Extract token counts from JSON response + */ +export function extractTokenCounts(output: string): TokenCounts | null { + const lines = output.split("\n").filter(Boolean); + for (const line of lines) { + if (line.trim().startsWith("{")) { + const parsed = parseJsonLine(line); + if (!parsed) continue; + + const stepFinishResult = StepFinishSchema.safeParse(parsed.event); + if (stepFinishResult.success) { + const stepFinish = stepFinishResult.data; + const tokens = stepFinish.part?.tokens || stepFinish.tokens; + if (tokens) { + return { + input: tokens.input || 0, + output: tokens.output || 0, + }; + } + } + } + } + return null; +} + +/** + * Detect step from AI output line + */ +export function detectStepFromOutput(line: string, logThoughts = true): string | null { + const trimmed = line.trim(); + + // Skip empty lines and obvious non-step lines + if (!trimmed || trimmed.startsWith("```") || trimmed.startsWith("$") || trimmed.startsWith("{")) { + return null; + } + + // Skip markdown headings and list bullets - they often produce noisy false positives + if (/^#{1,6}\s+/.test(trimmed) || /^[-*+]\s+/.test(trimmed)) { + return null; + } + + // Check for natural language patterns that indicate a step + const stepPatterns = [ + // Action-oriented phrases + /^(I will|I'll|Let me|Now I'll|Next I'll|First I'll|Then I'll|Finally I'll)\s+/i, + // Status indicators + /^(Step\s+\d+|Phase\s+\d+|Stage\s+\d+):?/i, + // Progress markers + /^(Working on|Starting|Proceeding with|Moving to|Switching to)\s+/i, + // Analysis phrases + /^(Analyzing|Examining|Reviewing|Checking|Investigating)\s+/i, + // Action verbs at start + /^(Reading|Writing|Editing|Creating|Deleting|Moving|Renaming|Running|Testing|Building|Searching|Finding|Getting|Setting|Updating|Modifying|Implementing|Adding|Removing|Fixing|Refactoring|Optimizing|Converting|Generating|Validating|Formatting|Organizing|Preparing|Executing|Completing)\s+/i, + // Special markers + /^\[(READ|WRITE|EDIT|CREATE|DELETE|RUN|TEST|BUILD|SEARCH|ANALYZE|FIX|REFACTOR)\]/i, + ]; + + for (const pattern of stepPatterns) { + const match = trimmed.match(pattern); + if (match) { + // Avoid generic assistant chatter being treated as an execution step + if (/^(i can|i think|i need|it looks like|we should)\b/i.test(trimmed)) { + return null; + } + + // Extract the full action description (first sentence or up to 100 chars) + let step = trimmed; + const sentenceEnd = step.match(/[.!?](?:\s|$)/); + if (sentenceEnd?.index && sentenceEnd.index < 100) { + step = step.slice(0, sentenceEnd.index + 1); + } else if (step.length > 100) { + step = `${step.slice(0, 97)}...`; + } + + // Don't log thoughts if disabled + if (!logThoughts && /^(?:(?:Let me|I'll|I will) think|thinking|analyzing the)/i.test(step)) { + return null; + } + + return step; + } + } + + return null; +} + +/** + * Extract the most meaningful line from output for display + */ +export function extractMeaningfulLine(output: string): string | null { + const lines = output.split("\n").filter((line) => line.trim()); + + for (const line of lines) { + const step = detectStepFromOutput(line, false); + if (step) { + return step; + } + } + + return lines[0] || null; +} + +/** + * Create error result from exit code and output + */ +export function createErrorResult( + exitCode: number, + output: string, + response = "", + inputTokens = 0, + outputTokens = 0, +): AIResult { + return { + success: false, + response, + inputTokens, + outputTokens, + error: formatCommandError(exitCode, output), + }; +} + +/** + * Create success result with response and token counts + */ +export function createSuccessResult( + response: string, + inputTokens: number, + outputTokens: number, + extra?: Record, +): AIResult { + return { + success: true, + response, + inputTokens, + outputTokens, + ...extra, + }; +} diff --git a/cli/src/engines/qwen.ts b/cli/src/engines/qwen.ts index a0ddeef8..4162e853 100644 --- a/cli/src/engines/qwen.ts +++ b/cli/src/engines/qwen.ts @@ -1,15 +1,6 @@ -import { - BaseAIEngine, - checkForErrors, - detectStepFromOutput, - execCommand, - execCommandStreaming, - formatCommandError, - parseStreamJsonResult, -} from "./base.ts"; -import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts"; - -const isWindows = process.platform === "win32"; +import { BaseAIEngine, checkForErrors } from "./base.ts"; +import { createErrorResult, createSuccessResult, parseStreamJsonResult } from "./parsers.ts"; +import type { AIResult, EngineOptions } from "./types.ts"; /** * Qwen-Code AI Engine @@ -18,145 +9,33 @@ export class QwenEngine extends BaseAIEngine { name = "Qwen-Code"; cliCommand = "qwen"; - async execute(prompt: string, workDir: string, options?: EngineOptions): Promise { + protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] { const args = ["--output-format", "stream-json", "--approval-mode", "yolo"]; if (options?.modelOverride) { args.push("--model", options.modelOverride); } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { + if (options?.engineArgs) { args.push(...options.engineArgs); } - - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - let stdinContent: string | undefined; - if (isWindows) { - args.push("-p"); - stdinContent = prompt; - } else { - args.push("-p", prompt); - } - - const { stdout, stderr, exitCode } = await execCommand( - this.cliCommand, - args, - workDir, - undefined, - stdinContent, - ); - - const output = stdout + stderr; - - // Check for errors - const error = checkForErrors(output); - if (error) { - return { - success: false, - response: "", - inputTokens: 0, - outputTokens: 0, - error, - }; - } - - // Parse result (same format as Claude) - const { response, inputTokens, outputTokens } = parseStreamJsonResult(output); - - // If command failed with non-zero exit code, provide a meaningful error - if (exitCode !== 0) { - return { - success: false, - response, - inputTokens, - outputTokens, - error: formatCommandError(exitCode, output), - }; - } - - return { - success: true, - response, - inputTokens, - outputTokens, - }; + // Note: The prompt is passed via stdin by the base engine for cross-platform compatibility + // The -p flag tells Qwen to read from stdin + args.push("-p"); + return args; } - async executeStreaming( - prompt: string, - workDir: string, - onProgress: ProgressCallback, - options?: EngineOptions, - ): Promise { - const args = ["--output-format", "stream-json", "--approval-mode", "yolo"]; - if (options?.modelOverride) { - args.push("--model", options.modelOverride); - } - // Add any additional engine-specific arguments - if (options?.engineArgs && options.engineArgs.length > 0) { - args.push(...options.engineArgs); - } - - // On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content - let stdinContent: string | undefined; - if (isWindows) { - args.push("-p"); - stdinContent = prompt; - } else { - args.push("-p", prompt); - } - - const outputLines: string[] = []; - - const { exitCode } = await execCommandStreaming( - this.cliCommand, - args, - workDir, - (line) => { - outputLines.push(line); - - // Detect and report step changes - const step = detectStepFromOutput(line); - if (step) { - onProgress(step); - } - }, - undefined, - stdinContent, - ); - - const output = outputLines.join("\n"); - - // Check for errors + protected processCliResult(stdout: string, stderr: string, exitCode: number): AIResult { + const output = stdout + stderr; const error = checkForErrors(output); if (error) { - return { - success: false, - response: "", - inputTokens: 0, - outputTokens: 0, - error, - }; + return { success: false, response: "", inputTokens: 0, outputTokens: 0, error }; } - // Parse result (same format as Claude) const { response, inputTokens, outputTokens } = parseStreamJsonResult(output); - // If command failed with non-zero exit code, provide a meaningful error if (exitCode !== 0) { - return { - success: false, - response, - inputTokens, - outputTokens, - error: formatCommandError(exitCode, output), - }; + return createErrorResult(exitCode, output, response, inputTokens, outputTokens); } - return { - success: true, - response, - inputTokens, - outputTokens, - }; + return createSuccessResult(response, inputTokens, outputTokens); } } diff --git a/cli/src/engines/types.ts b/cli/src/engines/types.ts index 6e9f9c8f..f3bb342f 100644 --- a/cli/src/engines/types.ts +++ b/cli/src/engines/types.ts @@ -9,6 +9,8 @@ export interface AIResult { /** Actual cost in dollars (if provided by engine) or duration in ms */ cost?: string; error?: string; + /** Session ID if the engine supports it (like OpenCode) */ + sessionId?: string; } /** @@ -19,6 +21,18 @@ export interface EngineOptions { modelOverride?: string; /** Additional arguments to pass to the engine CLI */ engineArgs?: string[]; + /** Additional environment variables for the engine CLI */ + env?: Record; + /** Enable comprehensive OpenCode debugging */ + debugOpenCode?: boolean; + /** Allow OpenCode to access sandbox directories without permission prompts */ + allowOpenCodeSandboxAccess?: boolean; + /** General debug flag */ + debug?: boolean; + /** Whether this is a dry run (no actual AI execution) */ + dryRun?: boolean; + /** Log AI thoughts/reasoning to console */ + logThoughts?: boolean; } /** @@ -26,6 +40,19 @@ export interface EngineOptions { */ export type ProgressCallback = (step: string) => void; +/** + * Process reference type for child processes + * Compatible with both Bun and Node.js child processes + * BUG FIX: Use proper union type instead of any for type safety + */ +export type ChildProcess = + | (Bun.Subprocess & { kill: (signal?: string) => void; pid: number; exited: Promise }) + | (import("node:child_process").ChildProcess & { + kill: (signal?: string) => void; + pid: number; + exited?: Promise; + }); + /** * AI Engine interface - one per AI tool */ diff --git a/cli/src/engines/validation.ts b/cli/src/engines/validation.ts new file mode 100644 index 00000000..6219ae96 --- /dev/null +++ b/cli/src/engines/validation.ts @@ -0,0 +1,178 @@ +import { logDebug } from "../ui/logger.ts"; + +// Check platform +const isWindows = process.platform === "win32"; +const DEBUG = process.env.RALPHY_DEBUG === "true"; + +/** + * Maximum lengths to prevent DoS attacks + */ +const MAX_COMMAND_LENGTH = 1000; +const MAX_ARG_LENGTH = 10000; +const MAX_TOTAL_ARGS_LENGTH = 100000; +const MAX_ARG_COUNT = 1000; + +function debugLog(...args: unknown[]): void { + if (DEBUG || (globalThis as { verboseMode?: boolean }).verboseMode === true) { + logDebug(args.map((a) => String(a)).join(" ")); + } +} + +/** + * Validate command name to prevent command injection + * Only allows alphanumeric characters, hyphens, underscores, and dots + * Also allows forward slashes for path-based commands (e.g., ./node_modules/.bin/cli) + */ +function tokenizeCommand(command: string): string[] { + const tokens: string[] = []; + const regex = /[^\s"']+|"([^"]*)"|'([^']*)'/g; + let match = regex.exec(command); + while (match !== null) { + tokens.push(match[1] ?? match[2] ?? match[0]); + match = regex.exec(command); + } + + return tokens; +} + +export function validateCommand(command: string): string | null { + const trimmedCommand = command.trim(); + if (!trimmedCommand) { + debugLog("Command validation failed: command is empty"); + return null; + } + + // Check command length to prevent DoS + if (trimmedCommand.length > MAX_COMMAND_LENGTH) { + debugLog( + `Command validation failed: command too long (${trimmedCommand.length} > ${MAX_COMMAND_LENGTH})`, + ); + return null; + } + + // Block shell metacharacters and dangerous patterns + const dangerousPatterns = [ + /[;&|`$]/, // Shell metacharacters + /\$\{/, // Variable expansion + /\$\(/, // Command substitution + /`/, // Backtick substitution + /\|\|/, // OR operator + /&&/, // AND operator + /[<>]/, // Redirection + ]; + + for (const pattern of dangerousPatterns) { + if (pattern.test(trimmedCommand)) { + debugLog(`Command validation failed: dangerous pattern detected in "${trimmedCommand}"`); + return null; + } + } + + const tokens = tokenizeCommand(trimmedCommand); + if (tokens.length === 0) { + debugLog("Command validation failed: no command token found"); + return null; + } + + const [commandToken, ...args] = tokens; + + // Allow executable characters: alphanumeric, hyphen, underscore, dot, slashes. + // Windows also needs drive-letter colon support (e.g., C:\tools\bun.exe). + const validCommandPattern = isWindows ? /^[a-zA-Z0-9._\-\\/:]+$/ : /^[a-zA-Z0-9._\-/]+$/; + + if (!validCommandPattern.test(commandToken)) { + debugLog(`Command validation failed: invalid command token "${commandToken}"`); + return null; + } + + if (args.length > 0 && !validateArgs(args)) { + debugLog(`Command validation failed: invalid args in "${trimmedCommand}"`); + return null; + } + + return trimmedCommand; +} + +/** + * Validate command arguments to prevent injection + * Returns null if any argument contains dangerous patterns + */ +export function validateArgs(args: string[]): string[] | null { + // Check argument count to prevent DoS + if (args.length > MAX_ARG_COUNT) { + debugLog(`Argument validation failed: too many arguments (${args.length} > ${MAX_ARG_COUNT})`); + return null; + } + + // Check total arguments length + const totalLength = args.reduce((sum, arg) => sum + arg.length, 0); + if (totalLength > MAX_TOTAL_ARGS_LENGTH) { + debugLog( + `Argument validation failed: total arguments too long (${totalLength} > ${MAX_TOTAL_ARGS_LENGTH})`, + ); + return null; + } + + const dangerousPatterns = [ + /[;&|`]/, // Shell metacharacters + /\$\{/, // Variable expansion + /\$\(/, // Command substitution + /`/, // Backtick substitution + /\|\|/, // OR operator + /&&/, // AND operator + ]; + + for (const arg of args) { + // Check individual argument length + if (arg.length > MAX_ARG_LENGTH) { + debugLog(`Argument validation failed: argument too long (${arg.length} > ${MAX_ARG_LENGTH})`); + return null; + } + + for (const pattern of dangerousPatterns) { + if (pattern.test(arg)) { + debugLog(`Argument validation failed: dangerous pattern in "${arg}"`); + return null; + } + } + } + + return args; +} + +/** + * Validation result type + */ +export interface ValidationResult { + valid: boolean; + command?: string; + args?: string[]; + error?: string; +} + +/** + * Validate both command and arguments in one call + */ +export function validateCommandAndArgs(command: string, args: string[]): ValidationResult { + const validatedCommand = validateCommand(command); + if (!validatedCommand) { + return { + valid: false, + error: "Invalid command - potential command injection detected", + }; + } + + const validatedArgs = validateArgs(args); + if (!validatedArgs) { + return { + valid: false, + error: "Invalid arguments - potential command injection detected", + }; + } + + return { + valid: true, + command: validatedCommand, + args: validatedArgs, + }; +} diff --git a/cli/src/execution/parallel.ts b/cli/src/execution/parallel.ts index 5318088e..765618a7 100644 --- a/cli/src/execution/parallel.ts +++ b/cli/src/execution/parallel.ts @@ -1,5 +1,5 @@ import { copyFileSync, cpSync, existsSync, mkdirSync } from "node:fs"; -import { join } from "node:path"; +import { dirname, join, resolve, sep } from "node:path"; import simpleGit from "simple-git"; import { PROGRESS_FILE, RALPHY_DIR } from "../config/loader.ts"; import { logTaskProgress } from "../config/writer.ts"; @@ -41,6 +41,23 @@ interface ParallelAgentResult { usedSandbox?: boolean; } +function resolveRelativePathInside(baseDir: string, relativePath: string): string | null { + if (!relativePath || relativePath.startsWith("/") || relativePath.startsWith("\\")) { + return null; + } + if (/^[a-zA-Z]:/.test(relativePath)) { + return null; + } + + const resolvedBase = resolve(baseDir); + const resolvedPath = resolve(baseDir, relativePath); + if (resolvedPath !== resolvedBase && !resolvedPath.startsWith(`${resolvedBase}${sep}`)) { + return null; + } + + return resolvedPath; +} + /** * Run a single agent in a worktree */ @@ -80,17 +97,28 @@ async function runAgentInWorktree( logDebug(`Agent ${agentNum}: Created worktree at ${worktreeDir}`); // Copy PRD file or folder to worktree - if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") { - const srcPath = join(originalDir, prdFile); - const destPath = join(worktreeDir, prdFile); - if (existsSync(srcPath)) { + if ( + prdSource === "markdown" || + prdSource === "yaml" || + prdSource === "json" || + prdSource === "csv" + ) { + const srcPath = resolveRelativePathInside(originalDir, prdFile); + const destPath = resolveRelativePathInside(worktreeDir, prdFile); + if (srcPath && destPath && existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); copyFileSync(srcPath, destPath); + } else if (!srcPath || !destPath) { + throw new Error(`Invalid PRD path: ${prdFile}`); } } else if (prdSource === "markdown-folder" && prdIsFolder) { - const srcPath = join(originalDir, prdFile); - const destPath = join(worktreeDir, prdFile); - if (existsSync(srcPath)) { + const srcPath = resolveRelativePathInside(originalDir, prdFile); + const destPath = resolveRelativePathInside(worktreeDir, prdFile); + if (srcPath && destPath && existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); cpSync(srcPath, destPath, { recursive: true }); + } else if (!srcPath || !destPath) { + throw new Error(`Invalid PRD path: ${prdFile}`); } } @@ -173,17 +201,28 @@ async function runAgentInSandbox( ); // Copy PRD file or folder to sandbox (same as worktree mode) - if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") { - const srcPath = join(originalDir, prdFile); - const destPath = join(sandboxDir, prdFile); - if (existsSync(srcPath)) { + if ( + prdSource === "markdown" || + prdSource === "yaml" || + prdSource === "json" || + prdSource === "csv" + ) { + const srcPath = resolveRelativePathInside(originalDir, prdFile); + const destPath = resolveRelativePathInside(sandboxDir, prdFile); + if (srcPath && destPath && existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); copyFileSync(srcPath, destPath); + } else if (!srcPath || !destPath) { + throw new Error(`Invalid PRD path: ${prdFile}`); } } else if (prdSource === "markdown-folder" && prdIsFolder) { - const srcPath = join(originalDir, prdFile); - const destPath = join(sandboxDir, prdFile); - if (existsSync(srcPath)) { + const srcPath = resolveRelativePathInside(originalDir, prdFile); + const destPath = resolveRelativePathInside(sandboxDir, prdFile); + if (srcPath && destPath && existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); cpSync(srcPath, destPath, { recursive: true }); + } else if (!srcPath || !destPath) { + throw new Error(`Invalid PRD path: ${prdFile}`); } } @@ -381,12 +420,13 @@ export async function runParallel( // Run agents in parallel (using sandbox or worktree mode) const promises = batch.map((task) => { globalAgentNum++; + const agentId = globalAgentNum; const runInSandbox = () => runAgentInSandbox( engine, task, - globalAgentNum, + agentId, getSandboxBase(workDir), workDir, prdSource, @@ -408,7 +448,7 @@ export async function runParallel( return runAgentInWorktree( engine, task, - globalAgentNum, + agentId, baseBranch, isolationBase, workDir, @@ -424,7 +464,7 @@ export async function runParallel( engineArgs, ).then((res) => { if (shouldFallbackToSandbox(res.error)) { - logWarn(`Agent ${globalAgentNum}: Worktree unavailable, retrying in sandbox mode.`); + logWarn(`Agent ${agentId}: Worktree unavailable, retrying in sandbox mode.`); if (res.worktreeDir) { cleanupAgentWorktree(res.worktreeDir, res.branchName, workDir).catch(() => { // Ignore cleanup failures during fallback diff --git a/cli/src/execution/retry.ts b/cli/src/execution/retry.ts index 9eb4f293..a9d7707c 100644 --- a/cli/src/execution/retry.ts +++ b/cli/src/execution/retry.ts @@ -34,7 +34,7 @@ export function calculateBackoffDelay( useJitter: boolean, ): number { // Exponential backoff: baseDelay * 2^(attempt-1) - let delay = baseDelayMs * Math.pow(2, attempt - 1); + let delay = baseDelayMs * 2 ** (attempt - 1); // Cap at maximum delay delay = Math.min(delay, maxDelayMs); diff --git a/cli/src/execution/sandbox-git.ts b/cli/src/execution/sandbox-git.ts index 45ceb1e9..b4ee4a85 100644 --- a/cli/src/execution/sandbox-git.ts +++ b/cli/src/execution/sandbox-git.ts @@ -12,7 +12,7 @@ class GitMutex { private queue: Promise = Promise.resolve(); async acquire(fn: () => Promise): Promise { - let release: () => void; + let release: (() => void) | undefined; const next = new Promise((resolve) => { release = resolve; }); @@ -22,7 +22,7 @@ class GitMutex { try { return await fn(); } finally { - release!(); + release?.(); } } } diff --git a/cli/src/execution/sandbox.ts b/cli/src/execution/sandbox.ts index d27e76c9..00a9252e 100644 --- a/cli/src/execution/sandbox.ts +++ b/cli/src/execution/sandbox.ts @@ -32,21 +32,27 @@ export async function rmRF(path: string): Promise { // Using force: true and recursive: true is standard rmSync(path, { recursive: true, force: true }); return; - } catch (err: any) { - const isLockError = err.code === "EBUSY" || err.code === "EPERM" || err.code === "ENOTEMPTY"; + } catch (err: unknown) { + const errorCode = + typeof err === "object" && err !== null && "code" in err + ? String((err as { code?: string }).code) + : ""; + const isLockError = + errorCode === "EBUSY" || errorCode === "EPERM" || errorCode === "ENOTEMPTY"; if (isLockError && i < retries - 1) { // Wait with exponential backoff: 500, 1000, 2000, 4000... - const delay = 500 * Math.pow(2, i); + const delay = 500 * 2 ** i; await new Promise((resolve) => setTimeout(resolve, delay)); continue; } // On final failure for lock errors, log warning and swallow. // For non-lock errors (any time), throw immediately. + const errorMessage = err instanceof Error ? err.message : String(err); if (isLockError && i === retries - 1) { logWarn( - `Failed to clean up ${path} after ${retries} attempts: ${err.message}. This may be due to a file lock. Proceeding anyway.`, + `Failed to clean up ${path} after ${retries} attempts: ${errorMessage}. This may be due to a file lock. Proceeding anyway.`, ); } else { throw err; @@ -252,7 +258,6 @@ export function verifySandboxIsolation(sandboxDir: string, symlinkDirs: string[] const stat = lstatSync(sandboxPath); if (stat.isSymbolicLink()) { // Good - it's a symlink - continue; } } catch { // Error checking - assume not isolated diff --git a/cli/src/telemetry/collector.ts b/cli/src/telemetry/collector.ts index 37b38fe9..1d33a037 100644 --- a/cli/src/telemetry/collector.ts +++ b/cli/src/telemetry/collector.ts @@ -18,6 +18,43 @@ import type { // Package version (loaded lazily) let cachedVersion: string | undefined; +function sanitizeSecrets(input: string): string { + const patterns = [ + { regex: /sk-[a-zA-Z0-9]{48}/g, replacement: "[API_KEY_REDACTED]" }, + { regex: /sk-ant-[a-zA-Z0-9_-]{16,256}/g, replacement: "[ANTHROPIC_KEY_REDACTED]" }, + { regex: /ghp_[a-zA-Z0-9]{36}/g, replacement: "[GITHUB_TOKEN_REDACTED]" }, + { regex: /gho_[a-zA-Z0-9]{52}/g, replacement: "[GITHUB_OAUTH_REDACTED]" }, + { regex: /AKIA[0-9A-Z]{16}/g, replacement: "[AWS_KEY_REDACTED]" }, + { regex: /\b[0-9a-f]{64}\b/g, replacement: "[HEX_SECRET_REDACTED]" }, + ]; + + let result = input; + for (const { regex, replacement } of patterns) { + result = result.replace(regex, replacement); + } + return result; +} + +function sanitizeTelemetryValue(value: unknown): unknown { + if (typeof value === "string") { + return sanitizeSecrets(value); + } + + if (Array.isArray(value)) { + return value.map((item) => sanitizeTelemetryValue(item)); + } + + if (value && typeof value === "object") { + const sanitized: Record = {}; + for (const [key, nested] of Object.entries(value)) { + sanitized[key] = sanitizeTelemetryValue(nested); + } + return sanitized; + } + + return value; +} + function getCliVersion(): string { if (cachedVersion) return cachedVersion; try { @@ -116,8 +153,8 @@ export class TelemetryCollector { // Store prompts/responses for full mode if (this.level === "full") { - if (prompt) this.prompts.push(prompt); - if (response) this.responses.push(response); + if (prompt) this.prompts.push(sanitizeSecrets(prompt)); + if (response) this.responses.push(sanitizeSecrets(response)); } } @@ -131,7 +168,10 @@ export class TelemetryCollector { startTime: Date.now(), toolName, parameterKeys: parameters ? Object.keys(parameters) : undefined, - parameters: this.level === "full" ? parameters : undefined, + parameters: + this.level === "full" + ? (sanitizeTelemetryValue(parameters) as Record | undefined) + : undefined, }; // Track file paths in full mode @@ -164,7 +204,7 @@ export class TelemetryCollector { // Add full mode data if (this.level === "full") { toolCall.parameters = this.activeToolCall.parameters; - if (result) toolCall.result = result; + if (result) toolCall.result = sanitizeSecrets(result); } this.toolCalls.push(toolCall); @@ -199,8 +239,10 @@ export class TelemetryCollector { }; if (this.level === "full") { - toolCall.parameters = options?.parameters; - toolCall.result = options?.result; + toolCall.parameters = sanitizeTelemetryValue(options?.parameters) as + | Record + | undefined; + toolCall.result = options?.result ? sanitizeSecrets(options.result) : undefined; // Track file paths if (options?.parameters) { @@ -303,7 +345,7 @@ export class TelemetryCollector { fullSession.response = this.responses.join("\n\n---\n\n"); } if (this.filePaths.size > 0) { - fullSession.filePaths = Array.from(this.filePaths); + fullSession.filePaths = Array.from(this.filePaths).map((path) => sanitizeSecrets(path)); } return { session: fullSession, toolCalls: this.toolCalls }; } diff --git a/cli/src/telemetry/exporter.ts b/cli/src/telemetry/exporter.ts index c4b4a2be..bef8331c 100644 --- a/cli/src/telemetry/exporter.ts +++ b/cli/src/telemetry/exporter.ts @@ -161,7 +161,7 @@ export class TelemetryExporter { await this.ensureExportsDir(); const filePath = outputPath || join(this.exportsDir, "openai-evals.jsonl"); - await writeFile(filePath, entries.join("\n") + "\n", "utf-8"); + await writeFile(filePath, `${entries.join("\n")}\n`, "utf-8"); return filePath; } @@ -194,7 +194,7 @@ export class TelemetryExporter { await this.ensureExportsDir(); const filePath = outputPath || join(this.exportsDir, "raw-telemetry.jsonl"); - const lines = entries.map((e) => JSON.stringify(e)).join("\n") + "\n"; + const lines = `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`; await writeFile(filePath, lines, "utf-8"); return filePath; diff --git a/cli/src/telemetry/types.ts b/cli/src/telemetry/types.ts index 41650f3c..11424fa8 100644 --- a/cli/src/telemetry/types.ts +++ b/cli/src/telemetry/types.ts @@ -78,6 +78,51 @@ export interface ToolCall { */ export type TelemetryLevel = "anonymous" | "full"; +/** + * Full session data for webhook + */ +export interface WebhookSessionData { + sessionId: string; + engine: string; + mode: string; + cliVersion: string; + platform: string; + totalTokensIn: number; + totalTokensOut: number; + totalDurationMs: number; + taskCount: number; + successCount: number; + failedCount: number; + toolCalls: { + toolName: string; + callCount: number; + successCount: number; + failedCount: number; + avgDurationMs: number; + }[]; + tags?: string[]; +} + +/** + * Full session details for webhook (full privacy mode) + */ +export interface WebhookSessionDetails { + prompt?: string; + response?: string; + filePaths?: string[]; +} + +/** + * Telemetry webhook payload + */ +export interface TelemetryWebhookPayload { + event: string; + version: string; + timestamp: string; + session: WebhookSessionData; + details?: WebhookSessionDetails; +} + /** * Telemetry configuration */ diff --git a/cli/src/telemetry/webhook.ts b/cli/src/telemetry/webhook.ts index c305e0e0..6ce965d3 100644 --- a/cli/src/telemetry/webhook.ts +++ b/cli/src/telemetry/webhook.ts @@ -76,11 +76,18 @@ export async function sendTelemetryWebhook( } const payload = buildPayload(session, level); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout + const safeWebhookTarget = (() => { + try { + const parsed = new URL(webhookUrl); + return `${parsed.protocol}//${parsed.host}`; + } catch { + return "[invalid-webhook-url]"; + } + })(); try { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout - const response = await fetch(webhookUrl, { method: "POST", headers: { @@ -90,14 +97,12 @@ export async function sendTelemetryWebhook( signal: controller.signal, }); - clearTimeout(timeoutId); - if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`HTTP ${response.status}${text ? `: ${text}` : ""}`); } - logDebug(`Telemetry webhook sent successfully to ${webhookUrl}`); + logDebug(`Telemetry webhook sent successfully to ${safeWebhookTarget}`); } catch (error) { if (error instanceof Error && error.name === "AbortError") { logError("Telemetry webhook timed out after 10 seconds"); @@ -107,5 +112,7 @@ export async function sendTelemetryWebhook( ); } // Don't throw - webhook failures shouldn't break the session + } finally { + clearTimeout(timeoutId); } } diff --git a/cli/src/telemetry/writer.ts b/cli/src/telemetry/writer.ts index ab64c48f..8b0d9569 100644 --- a/cli/src/telemetry/writer.ts +++ b/cli/src/telemetry/writer.ts @@ -7,6 +7,7 @@ import { existsSync } from "node:fs"; import { appendFile, mkdir, readFile, readdir } from "node:fs/promises"; import { dirname, join } from "node:path"; +import { logDebug } from "../ui/logger.ts"; import type { Session, SessionFull, ToolCall } from "./types.js"; const DEFAULT_OUTPUT_DIR = ".ralphy/telemetry"; @@ -56,7 +57,7 @@ export class TelemetryWriter { async writeSession(session: Session | SessionFull): Promise { await this.ensureDir(); const path = join(this.outputDir, SESSIONS_FILE); - const line = JSON.stringify(session) + "\n"; + const line = `${JSON.stringify(session)}\n`; await appendFile(path, line, "utf-8"); } @@ -68,7 +69,7 @@ export class TelemetryWriter { await this.ensureDir(); const path = join(this.outputDir, TOOL_CALLS_FILE); - const lines = toolCalls.map((call) => JSON.stringify(call)).join("\n") + "\n"; + const lines = `${toolCalls.map((call) => JSON.stringify(call)).join("\n")}\n`; await appendFile(path, lines, "utf-8"); } @@ -91,8 +92,16 @@ export class TelemetryWriter { const content = await readFile(path, "utf-8"); const lines = content.trim().split("\n").filter(Boolean); + const sessions: Array = []; + for (const line of lines) { + try { + sessions.push(JSON.parse(line) as Session | SessionFull); + } catch (error) { + logDebug(`Skipping invalid telemetry session line: ${error}`); + } + } - return lines.map((line) => JSON.parse(line) as Session | SessionFull); + return sessions; } /** @@ -107,8 +116,16 @@ export class TelemetryWriter { const content = await readFile(path, "utf-8"); const lines = content.trim().split("\n").filter(Boolean); + const toolCalls: ToolCall[] = []; + for (const line of lines) { + try { + toolCalls.push(JSON.parse(line) as ToolCall); + } catch (error) { + logDebug(`Skipping invalid telemetry tool-call line: ${error}`); + } + } - return lines.map((line) => JSON.parse(line) as ToolCall); + return toolCalls; } /** diff --git a/cli/src/utils/ai-output-parser.ts b/cli/src/utils/ai-output-parser.ts new file mode 100644 index 00000000..a5e6b5fc --- /dev/null +++ b/cli/src/utils/ai-output-parser.ts @@ -0,0 +1,155 @@ +import { parseJsonLine, TextSchema, ToolUseSchema } from "./json-validation.ts"; + +export interface ParsedStep { + thought?: string; + tool?: string; + toolArgs?: string; + reading?: string; + writing?: string; + running?: string; + executed?: string; + raw?: string; +} + +/** + * Parse AI output step and extract structured information + */ +export function parseAIStep(step: string): ParsedStep { + const parsed: ParsedStep = { raw: step }; + + // Try to parse as JSON first + const result = parseJsonLine(step); + if (result) { + const { event, remaining } = result; + + // If there's remaining text, treat it as a thought/progress message + if (remaining) { + parsed.thought = remaining; + } + + // Extract text/response content + const textResult = TextSchema.safeParse(event); + if (textResult.success) { + const textEvent = textResult.data; + if (textEvent.part?.text) { + const text = textEvent.part.text; + + // Try to categorize based on content patterns + if (text.startsWith("[thinking") || text.startsWith("Thinking")) { + parsed.thought = text; + } else if (text.startsWith("Running") || text.startsWith("Executing")) { + parsed.running = text; + } else if (text.startsWith("Reading") || text.startsWith("Examining")) { + parsed.reading = text; + } else if (text.startsWith("Writing") || text.startsWith("Creating") || text.startsWith("Updating")) { + parsed.writing = text; + } else if (text.startsWith("Executed") || text.startsWith("Finished")) { + parsed.executed = text; + } else { + // Default: if it's text content, treat as general progress + parsed.thought = text; + } + } + } + + // Extract tool use content + const toolUseResult = ToolUseSchema.safeParse(event); + if (toolUseResult.success) { + const toolUse = toolUseResult.data; + const toolName = toolUse.tool || toolUse.part?.tool || ""; + const toolInput = toolUse.part?.state?.input; + + if (toolName) { + parsed.tool = toolName; + + // Extract meaningful summary of arguments + let argSummary = ""; + if (toolInput) { + if (typeof toolInput === "string") { + argSummary = toolInput; + } else if (typeof toolInput === "object" && toolInput !== null && !Array.isArray(toolInput)) { + // Heuristics for common tools + const input = toolInput as Record; + argSummary = + String(input.pattern || "") || + String(input.path || "") || + String(input.filePath || "") || + String(input.command || "") || + JSON.stringify(toolInput); + } + } + + parsed.toolArgs = argSummary; + + // Map specific tools to display categories with refined messages + const lowerTool = toolName.toLowerCase(); + const shortArgs = argSummary; + + if (lowerTool === "read") { + parsed.reading = `Read: ${shortArgs} `; + } else if (lowerTool === "glob") { + parsed.reading = `Glob: ${shortArgs} `; + } else if (lowerTool === "grep") { + parsed.reading = `Grep: ${shortArgs} `; + } else if (lowerTool === "ls") { + parsed.reading = `List: ${shortArgs} `; + } else if (lowerTool === "write" || lowerTool === "edit" || lowerTool === "create") { + parsed.writing = `Write: ${shortArgs} `; + } else if (lowerTool === "run" || lowerTool === "execute" || lowerTool === "terminal") { + parsed.running = `Run: ${shortArgs} `; + } else { + // Catch-all for other tools + parsed.tool = `${toolName}: ${shortArgs} `; + } + } + } + } + + return parsed; +} + +/** + * Format parsed step for display + */ +export function formatParsedStep(step: ParsedStep, agentNum?: number): string | null { + const prefix = agentNum !== undefined ? `Agent ${agentNum}: ` : ""; + + // Prioritize concrete actions over generic thoughts + if (step.writing) { + return `${prefix}${step.writing} `; + } + + if (step.reading) { + return `${prefix}${step.reading} `; + } + + if (step.running) { + return `${prefix}${step.running} `; + } + + if (step.thought) { + // Clean up common "Thinking:" prefixes since we wrap in {} later + const cleanedThought = step.thought.replace(/^(Thinking|Analyzing|Considering|Warning|Waiting)[:\s]*/i, "").trim(); + return `${prefix}${cleanedThought} `; + } + + if (step.executed) { + return `${prefix} Done: ${step.executed.substring(0, 100)} `; + } + + if (step.tool) { + return `${prefix} Tool: ${step.tool} `; + } + + // If raw but couldn't parse, truncate and show + if (step.raw && step.raw.length > 0) { + const trimmed = step.raw.trim(); + if (trimmed.startsWith("{") || trimmed.startsWith('"')) { + // It's JSON we couldn't parse, skip it + return null; + } + return `${prefix}${trimmed.substring(0, 100)} `; + } + + return null; +} diff --git a/cli/src/utils/cleanup.ts b/cli/src/utils/cleanup.ts new file mode 100644 index 00000000..211458cb --- /dev/null +++ b/cli/src/utils/cleanup.ts @@ -0,0 +1,157 @@ +import type { ChildProcess } from "node:child_process"; +import { spawnSync } from "node:child_process"; +import { logDebug, logWarn } from "../ui/logger.ts"; + +type CleanupFn = () => Promise | void; + +const cleanupRegistry: Set = new Set(); +const trackedProcesses: Set = new Set(); +let isCleaningUp = false; + +function isProcessRunning(proc: ChildProcess): boolean { + return proc.exitCode === null && proc.signalCode === null; +} + +/** + * Register a function to be called on process exit or manual cleanup + */ +export function registerCleanup(fn: CleanupFn): () => void { + cleanupRegistry.add(fn); + return () => cleanupRegistry.delete(fn); +} + +/** + * Register a child process to be tracked and killed on exit + */ +export function registerProcess(proc: ChildProcess): () => void { + trackedProcesses.add(proc); + + const remove = () => trackedProcesses.delete(proc); + + proc.on("exit", remove); + proc.on("error", remove); + + return remove; +} + +/** + * Run all registered cleanup functions and kill tracked processes + */ +export async function runCleanup(): Promise { + if (isCleaningUp) return; + isCleaningUp = true; + + // 1. Kill all tracked child processes with verification + for (const proc of trackedProcesses) { + try { + if (proc.pid && isProcessRunning(proc)) { + const pid = proc.pid; + + if (process.platform === "win32") { + // Windows needs taskkill for robust child tree termination + const result = spawnSync("taskkill", ["/pid", String(pid), "/f", "/t"], { + stdio: "pipe", + }); + + // Verify the process was actually killed + // Status 128 = process already exited, which is fine + if (result.status !== 0 && result.status !== 128) { + logWarn(`taskkill may have failed for PID ${pid} (exit code: ${result.status})`); + if (result.stderr) { + logDebug(`taskkill stderr: ${result.stderr.toString()}`); + } + } + + await new Promise((resolve) => setTimeout(resolve, 500)); + if (isProcessRunning(proc)) { + logWarn(`Process ${pid} may still be running after taskkill`); + } + } else { + // Try graceful termination first + proc.kill("SIGTERM"); + + // Wait a bit and verify it's dead + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // Check if process is still running + if (isProcessRunning(proc)) { + proc.kill("SIGKILL"); + + // Final verification + await new Promise((resolve) => setTimeout(resolve, 500)); + if (isProcessRunning(proc)) { + logWarn(`Failed to terminate process ${pid} after SIGKILL`); + } + } + } + } + } catch (err) { + // Process termination failed, continue cleanup + logDebug(`Failed to terminate process ${proc.pid}: ${err}`); + } + } + trackedProcesses.clear(); + + // 2. Run registered cleanup functions + const promises: Promise[] = []; + for (const fn of cleanupRegistry) { + try { + const result = fn(); + if (result instanceof Promise) { + promises.push(result); + } + } catch (err) { + // Log sync errors but continue with other cleanup functions + promises.push(Promise.reject(err)); + } + } + + const results = await Promise.allSettled(promises); + for (const result of results) { + if (result.status === "rejected") { + logWarn(`Cleanup task failed: ${result.reason}`); + } + } + cleanupRegistry.clear(); + isCleaningUp = false; +} + +let isShuttingDown = false; +let handlersRegistered = false; + +/** + * Setup process signal handlers for cleanup + */ +export function setupSignalHandlers(): void { + if (handlersRegistered) { + return; + } + handlersRegistered = true; + + const signals: NodeJS.Signals[] = ["SIGINT", "SIGTERM"]; + + for (const signal of signals) { + process.on(signal, async () => { + // Prevent duplicate cleanup runs + if (isShuttingDown) { + process.stdout.write(`\nReceived ${signal}, cleanup already in progress...\n`); + return; + } + isShuttingDown = true; + + // Use writeSync to avoid event loop issues during exit + process.stdout.write(`\nReceived ${signal}, cleaning up processes and files...\n`); + + try { + await runCleanup(); + process.exit(0); + } catch (error) { + process.stderr.write(`\nCleanup failed: ${error}\n`); + process.exit(1); + } + }); + } + + // Note: uncaughtException is handled in cli/src/index.ts for the main process + // This avoids duplicate handlers and ensures consistent error handling +} diff --git a/cli/src/utils/json-validation.ts b/cli/src/utils/json-validation.ts new file mode 100644 index 00000000..e5a069dc --- /dev/null +++ b/cli/src/utils/json-validation.ts @@ -0,0 +1,179 @@ +import { z } from "zod"; + +export const StepFinishSchema = z.object({ + type: z.literal("step_finish"), + part: z + .object({ + tokens: z + .object({ + input: z.number().optional(), + output: z.number().optional(), + }) + .optional(), + input: z.number().optional(), + output: z.number().optional(), + cost: z.number().optional(), + }) + .optional(), + tokens: z + .object({ + input: z.number().optional(), + output: z.number().optional(), + }) + .optional(), + cost: z.number().optional(), + // Session ID fields (various naming conventions) + sessionID: z.string().optional(), + sessionId: z.string().optional(), + session_id: z.string().optional(), +}); + +export const StepStartSchema = z.object({ + type: z.literal("step_start"), +}); + +export const TextSchema = z.object({ + type: z.literal("text"), + part: z.object({ + text: z.string(), + }), +}); + +export const ErrorSchema = z.object({ + type: z.literal("error"), + error: z + .object({ + message: z.string().optional(), + }) + .optional(), + message: z.string().optional(), +}); + +export const ResultSchema = z.object({ + type: z.literal("result"), + result: z.string().optional(), + usage: z + .object({ + input_tokens: z.number().optional(), + output_tokens: z.number().optional(), + }) + .optional(), +}); + +export const ToolUseSchema = z.object({ + type: z.literal("tool_use"), + part: z + .object({ + tool: z.string().optional(), + state: z + .object({ + input: z + .union([z.string(), z.number(), z.boolean(), z.object({}).passthrough(), z.array(z.unknown())]) + .optional(), + status: z.string().optional(), + }) + .optional(), + }) + .optional(), + tool: z.string().optional(), + callID: z.string().optional(), +}); + +export const StreamJsonEventSchema = z.union([ + StepFinishSchema, + StepStartSchema, + TextSchema, + ErrorSchema, + ResultSchema, + ToolUseSchema, +]); + +export type StreamJsonEvent = z.infer; +export type StepFinish = z.infer; +export type TextEvent = z.infer; +export type ToolUseEvent = z.infer; + +/** + * Safely parse a JSON line with schema validation + * Returns null if parsing fails or schema is invalid + * + * This function handles: + * - Complete JSON objects + * - JSON followed by additional text (splits at proper object boundary) + * - Truncated JSON (attempts to recover) + * - Special characters in strings (properly handles escape sequences) + */ +export function parseJsonLine(line: string): { event: StreamJsonEvent; remaining?: string } | null { + try { + const trimmed = line.trim(); + if (!trimmed) return null; + + // Handle cases where JSON is followed by text without a newline + // Search for the end of the JSON object + let jsonStr = trimmed; + let remaining: string | undefined; + + if (trimmed.startsWith("{")) { + let depth = 0; + let inString = false; + let isEscaped = false; + let jsonEndIndex = -1; + + for (let i = 0; i < trimmed.length; i++) { + const char = trimmed[i]; + if (isEscaped) { + isEscaped = false; + continue; + } + if (char === "\\") { + isEscaped = true; + continue; + } + if (char === '"' && !isEscaped) { + inString = !inString; + continue; + } + if (!inString) { + if (char === "{") depth++; + if (char === "}") { + depth--; + if (depth === 0) { + jsonEndIndex = i; + break; + } + } + } + } + + // If we found a complete JSON object, split it from any remaining text + if (jsonEndIndex >= 0) { + jsonStr = trimmed.substring(0, jsonEndIndex + 1); + remaining = trimmed.substring(jsonEndIndex + 1).trim(); + } + // If we didn't find a complete object but started with '{', + // it might be truncated - still try to parse what we have + } + + const parsed = JSON.parse(jsonStr); + const event = StreamJsonEventSchema.parse(parsed); + return { event, remaining: remaining || undefined }; + } catch { + return null; + } +} + +/** + * Extract session ID from a parsed JSON event + */ +export function extractSessionId(event: StreamJsonEvent): string | null { + if ("sessionID" in event && typeof event.sessionID === "string") { + return event.sessionID; + } + if ("sessionId" in event && typeof event.sessionId === "string") { + return event.sessionId; + } + if ("session_id" in event && typeof event.session_id === "string") { + return event.session_id; + } + return null; +}