diff --git a/cli/README.md b/cli/README.md
index e4a2e199..47f86ec2 100644
--- a/cli/README.md
+++ b/cli/README.md
@@ -345,6 +345,7 @@ ralphy --parallel --sandbox
 | Gemini | `gemini` | `--yolo` | tokens + cost |
 
 When an engine exits non-zero, ralphy includes the last lines of CLI output in the error message to make debugging easier.
+Authentication and command-failure messages are parsed more strictly to reduce false positives and make retry behavior more predictable.
 
 ## Links
 
diff --git a/cli/src/cli/commands/run.ts b/cli/src/cli/commands/run.ts
index e9fc95b7..6482f168 100644
--- a/cli/src/cli/commands/run.ts
+++ b/cli/src/cli/commands/run.ts
@@ -40,19 +40,19 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 		if (!existsSync(options.prdFile)) {
 			logError(`${options.prdFile} not found in current directory`);
 			logInfo(`Create a ${options.prdFile} file with tasks`);
-			process.exit(1);
+			throw new Error(`PRD source not found: ${options.prdFile}`);
 		}
 	} else if (options.prdSource === "markdown-folder") {
 		if (!existsSync(options.prdFile)) {
 			logError(`PRD folder ${options.prdFile} not found`);
 			logInfo(`Create a ${options.prdFile}/ folder with markdown files containing tasks`);
-			process.exit(1);
+			throw new Error(`PRD folder not found: ${options.prdFile}`);
 		}
 	}
 
 	if (options.prdSource === "github" && !options.githubRepo) {
 		logError("GitHub repository not specified. Use --github owner/repo");
-		process.exit(1);
+		throw new Error("GitHub repository not specified");
 	}
 
 	// Check engine availability
@@ -61,7 +61,7 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 
 	if (!available) {
 		logError(`${engine.name} CLI not found. Make sure '${engine.cliCommand}' is in your PATH.`);
-		process.exit(1);
+		throw new Error(`${engine.name} CLI not available`);
 	}
 
 	// Create task source with caching for better performance
@@ -91,7 +91,7 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 			logError("Cannot run in parallel/branch mode: repository has no commits yet.");
 			logInfo("Please make an initial commit first:");
 			logInfo('  git add . && git commit -m "Initial commit"');
-			process.exit(1);
+			throw new Error("Repository has no commits yet");
 		}
 	}
 
@@ -195,6 +195,6 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 	}
 
 	if (result.tasksFailed > 0) {
-		process.exit(1);
+		throw new Error(`${result.tasksFailed} task(s) failed`);
 	}
 }
diff --git a/cli/src/cli/commands/task.ts b/cli/src/cli/commands/task.ts
index 312977e7..f34d633c 100644
--- a/cli/src/cli/commands/task.ts
+++ b/cli/src/cli/commands/task.ts
@@ -28,7 +28,7 @@ export async function runTask(task: string, options: RuntimeOptions): Promise<vo
 
 	if (!available) {
 		logError(`${engine.name} CLI not found. Make sure '${engine.cliCommand}' is in your PATH.`);
-		process.exit(1);
+		throw new Error(`${engine.name} CLI not available`);
 	}
 
 	logInfo(`Running task with ${engine.name}...`);
@@ -129,7 +129,7 @@ export async function runTask(task: string, options: RuntimeOptions): Promise<vo
 				tasksFailed: 1,
 			});
 			notifyTaskFailed(task, result.error || "Unknown error");
-			process.exit(1);
+			throw new Error(result.error || "Unknown error");
 		}
 	} catch (error) {
 		const errorMsg = error instanceof Error ? error.message : String(error);
@@ -140,6 +140,6 @@ export async function runTask(task: string, options: RuntimeOptions): Promise<vo
 			tasksFailed: 1,
 		});
 		notifyTaskFailed(task, errorMsg);
-		process.exit(1);
+		throw error instanceof Error ? error : new Error(errorMsg);
 	}
 }
diff --git a/cli/src/config/constants.ts b/cli/src/config/constants.ts
new file mode 100644
index 00000000..04789fde
--- /dev/null
+++ b/cli/src/config/constants.ts
@@ -0,0 +1,80 @@
+export const PROGRESS_UPDATE_INTERVAL = 500;
+export const HEARTBEAT_INTERVAL = 5000;
+// Default retry count used by CLI/runtime options.
+export const DEFAULT_MAX_RETRIES = 3;
+// Legacy alias retained for older modules.
+export const MAX_RETRIES = DEFAULT_MAX_RETRIES;
+export const UI_LABELS = {
+	PLANNING: "[PLANNING]",
+	EXECUTION: "[EXECUTION]",
+	DONE: "[DONE]",
+	FAIL: "[FAIL]",
+	OK: "[OK]",
+};
+export const SPINNER_CHARS = ["|", "/", "-", "\\"];
+export const MAX_EXECUTION_TIME = 300000; // 5 minutes
+export const PROGRESS_POLL_INTERVAL = 2000;
+export const WATCHER_DEBOUNCE = 250;
+export const PLANNING_COOLDOWN = 2000;
+
+// CLI Defaults
+export const DEFAULT_RETRY_DELAY = 5;
+export const DEFAULT_MAX_PARALLEL = 3;
+export const DEFAULT_MAX_REPLANS = 3;
+export const DEFAULT_MAX_ITERATIONS = 0;
+
+// AI Engine Defaults
+export const DEFAULT_AI_ENGINE_TIMEOUT_MS = 80 * 60 * 1000; // 80 minutes
+export const STREAM_HEARTBEAT_INTERVAL_MS = 30000; // 30 seconds without output = potential hang
+
+// Parallel Execution
+export const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
+export const INITIAL_POOL_SIZE_MULTIPLIER = 1;
+export const MAX_POOL_SIZE_MULTIPLIER = 5;
+export const PLANNING_CONCURRENCY = 5;
+export const POOL_INCREMENT = 2;
+
+// Progress Monitoring
+export const MAX_OPERATIONS_HISTORY = 10;
+export const RECENT_ACTIONS_COUNT = 3;
+export const FIND_WORKTREE_RETRIES = 20;
+export const MAX_DISPLAYED_ACTIONS = 3;
+
+// Sandbox Management
+export const DEFAULT_MAX_SANDBOX_AGE_MS = 60 * 60 * 1000; // 1 hour
+export const SANDBOX_STALE_THRESHOLD_MS = 24 * 60 * 60 * 1000; // 24 hours
+export const SANDBOX_BACKGROUND_CLEANUP_DELAY_MS = 5 * 60 * 1000; // 5 minutes
+export const MS_PER_MINUTE = 60000;
+export const CLEANUP_DELAY_MS = 5000;
+export const COPY_BACK_CONCURRENCY = 10;
+export const SANDBOX_DIR_PREFIX = "agent-";
+export const SANDBOX_SUFFIX = "";
+export const DEFAULT_IGNORE_PATTERNS = [
+	".git",
+	"node_modules",
+	".ralphy-sandboxes",
+	".ralphy-worktrees",
+	".ralphy",
+	"agent-*",
+	"sandbox-*",
+];
+
+// File Utilities
+export const MAX_FILE_SIZE_FOR_HASH = 2 * 1024 * 1024; // 2MB
+export const DEFAULT_RECURSION_DEPTH = 5;
+
+// Lock Management
+export const LOCK_TIMEOUT_MS = 30000; // 30 seconds
+export const LOCK_MAX_LOCKS = 5000; // Maximum number of locks
+export const LOCK_DIR = ".ralphy/locks"; // Lock directory
+export const LOCK_CLEANUP_INTERVAL_MS = 60000; // 1 minute between cleanups
+
+// Path Constants
+export const SANDBOX_DIR = ".ralphy-worktrees";
+export const PLANNING_CACHE_FILE = "planning-cache.json";
+
+// Hash Store Constants
+export const HASH_STORE_DIR = ".ralphy-hashes";
+export const HASH_STORE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours
+export const HASH_REFERENCE_SUFFIX = ".hash-ref";
+export const ENABLE_HASH_STORE = true; // Feature flag
diff --git a/cli/src/config/types.ts b/cli/src/config/types.ts
index 6a292aa9..22c8f2b3 100644
--- a/cli/src/config/types.ts
+++ b/cli/src/config/types.ts
@@ -17,6 +17,7 @@ export const NotificationsSchema = z.object({
 	discord_webhook: z.string().default(""),
 	slack_webhook: z.string().default(""),
 	custom_webhook: z.string().default(""),
+	telemetry_webhook: z.string().default(""),
 });
 
 /**
@@ -109,6 +110,8 @@ export interface RuntimeOptions {
 	browserEnabled: "auto" | "true" | "false";
 	/** Override default model for the engine */
 	modelOverride?: string;
+	/** Enable comprehensive OpenCode debugging */
+	debugOpenCode?: boolean;
 	/** Skip automatic branch merging after parallel execution */
 	skipMerge?: boolean;
 	/** Use lightweight sandboxes instead of git worktrees for parallel execution */
@@ -142,4 +145,5 @@ export const DEFAULT_OPTIONS: RuntimeOptions = {
 	githubLabel: "",
 	autoCommit: true,
 	browserEnabled: "auto",
+	debugOpenCode: false,
 };
diff --git a/cli/src/engines/base.ts b/cli/src/engines/base.ts
index 9c558f7a..d7cc87af 100644
--- a/cli/src/engines/base.ts
+++ b/cli/src/engines/base.ts
@@ -1,463 +1,316 @@
-import { spawn, spawnSync } from "node:child_process";
+import type { ChildProcess } from "node:child_process";
+import { DEFAULT_AI_ENGINE_TIMEOUT_MS } from "../config/constants.ts";
+import { logDebug } from "../ui/logger.ts";
+
+import { formatParsedStep, parseAIStep } from "../utils/ai-output-parser.ts";
+import { ErrorSchema, parseJsonLine } from "../utils/json-validation.ts";
+import { commandExists, execCommand, execCommandStreamingNew } from "./executor.ts";
+import { detectStepFromOutput } from "./parsers.ts";
 import type { AIEngine, AIResult, EngineOptions, ProgressCallback } from "./types.ts";
 
-// Check if running in Bun
-const isBun = typeof Bun !== "undefined";
-const isWindows = process.platform === "win32";
-
-/**
- * Check if a command is available in PATH
- */
-export async function commandExists(command: string): Promise<boolean> {
-	try {
-		const checkCommand = isWindows ? "where" : "which";
-		if (isBun) {
-			const proc = Bun.spawn([checkCommand, command], {
-				stdout: "pipe",
-				stderr: "pipe",
-			});
-			const exitCode = await proc.exited;
-			return exitCode === 0;
-		}
-		// Node.js fallback - where/which don't need shell
-		const result = spawnSync(checkCommand, [command], { stdio: "pipe" });
-		return result.status === 0;
-	} catch {
-		return false;
+// Re-export functions from new modules for backward compatibility
+export {
+	commandExists,
+	execCommand,
+	execCommandStreaming,
+	execCommandStreamingNew,
+} from "./executor.ts";
+export {
+	checkForErrors,
+	detectStepFromOutput,
+	extractAuthenticationError,
+	extractTokenCounts,
+	formatCommandError,
+	parseStreamJsonResult,
+} from "./parsers.ts";
+export { validateArgs, validateCommand, validateCommandAndArgs } from "./validation.ts";
+
+const DEBUG = process.env.RALPHY_DEBUG === "true";
+
+function debugLog(...args: unknown[]): void {
+	if (DEBUG || (globalThis as { verboseMode?: boolean }).verboseMode === true) {
+		logDebug(args.map((a) => String(a)).join(" "));
 	}
 }
 
 /**
- * Execute a command and return stdout
- * @param stdinContent - Optional content to pass via stdin (useful for multi-line prompts on Windows)
+ * Base AI Engine implementation
  */
-export async function execCommand(
-	command: string,
-	args: string[],
-	workDir: string,
-	env?: Record<string, string>,
-	stdinContent?: string,
-): Promise<{ stdout: string; stderr: string; exitCode: number }> {
-	if (isBun) {
-		// On Windows, run through cmd.exe to handle .cmd wrappers (npm global packages)
-		const spawnArgs = isWindows ? ["cmd.exe", "/c", command, ...args] : [command, ...args];
-		const proc = Bun.spawn(spawnArgs, {
-			cwd: workDir,
-			stdin: stdinContent ? "pipe" : "ignore",
-			stdout: "pipe",
-			stderr: "pipe",
-			env: { ...process.env, ...env },
-		});
-
-		// Write stdin content if provided
-		if (stdinContent && proc.stdin) {
-			proc.stdin.write(stdinContent);
-			proc.stdin.end();
-		}
-
-		const [stdout, stderr, exitCode] = await Promise.all([
-			new Response(proc.stdout).text(),
-			new Response(proc.stderr).text(),
-			proc.exited,
-		]);
+export abstract class BaseAIEngine implements AIEngine {
+	abstract name: string;
+	abstract cliCommand: string;
 
-		return { stdout, stderr, exitCode };
+	/**
+	 * Check if the CLI command is available
+	 */
+	async isAvailable(): Promise<boolean> {
+		debugLog(`isAvailable: Checking if '${this.cliCommand}' (${this.name}) is available...`);
+		const result = await commandExists(this.cliCommand);
+		debugLog(`isAvailable: '${this.cliCommand}' (${this.name}) available = ${result}`);
+		return result;
 	}
 
-	// Node.js fallback - use shell on Windows to execute .cmd wrappers
-	return new Promise((resolve) => {
-		const proc = spawn(command, args, {
-			cwd: workDir,
-			env: { ...process.env, ...env },
-			stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"],
-			shell: isWindows, // Required on Windows for npm global commands (.cmd wrappers)
-		});
-
-		// Write stdin content if provided
-		if (stdinContent && proc.stdin) {
-			proc.stdin.write(stdinContent);
-			proc.stdin.end();
-		}
-
-		let stdout = "";
-		let stderr = "";
-
-		proc.stdout?.on("data", (data) => {
-			stdout += data.toString();
-		});
-
-		proc.stderr?.on("data", (data) => {
-			stderr += data.toString();
-		});
-
-		proc.on("close", (exitCode) => {
-			resolve({ stdout, stderr, exitCode: exitCode ?? 1 });
-		});
+	/**
+	 * Build CLI arguments for engine
+	 */
+	protected abstract buildArgs(prompt: string, workDir: string, options?: EngineOptions): string[];
 
-		proc.on("error", (err) => {
-			// Maintain backward compatibility - don't reject, include error in stderr
-			stderr += `\nSpawn error: ${err.message}`;
-			resolve({ stdout, stderr, exitCode: 1 });
-		});
-	});
-}
+	/**
+	 * Process CLI output into AIResult
+	 */
+	protected abstract processCliResult(
+		stdout: string,
+		stderr: string,
+		exitCode: number,
+		workDir: string,
+	): AIResult;
 
-/**
- * Parse token counts from stream-json output (Claude/Qwen format)
- */
-export function parseStreamJsonResult(output: string): {
-	response: string;
-	inputTokens: number;
-	outputTokens: number;
-} {
-	const lines = output.split("\n").filter(Boolean);
-	let response = "";
-	let inputTokens = 0;
-	let outputTokens = 0;
-
-	for (const line of lines) {
-		try {
-			const parsed = JSON.parse(line);
-			if (parsed.type === "result") {
-				response = parsed.result || "Task completed";
-				inputTokens = parsed.usage?.input_tokens || 0;
-				outputTokens = parsed.usage?.output_tokens || 0;
-			}
-		} catch {
-			// Ignore non-JSON lines
-		}
+	/**
+	 * Get environment variables for engine
+	 */
+	protected getEnv(options?: EngineOptions): Record<string, string> | undefined {
+		return options?.env;
 	}
 
-	return { response: response || "Task completed", inputTokens, outputTokens };
-}
-
-/**
- * Check for errors in stream-json output
- */
-export function checkForErrors(output: string): string | null {
-	const lines = output.split("\n").filter(Boolean);
-
-	for (const line of lines) {
-		try {
-			const parsed = JSON.parse(line);
-			if (parsed.type === "error") {
-				return parsed.error?.message || parsed.message || "Unknown error";
-			}
-		} catch {
-			// Ignore non-JSON lines
-		}
+	/**
+	 * Whether prompt should be passed through stdin.
+	 * Engines that pass prompts via temp files should override this to false.
+	 */
+	protected useStdin(): boolean {
+		return true;
 	}
 
-	return null;
-}
-
-/**
- * Extract authentication error message from stream-json output.
- * Looks for error type messages or result messages with authentication-related keywords.
- * Returns the clean error message if found, null otherwise.
- */
-export function extractAuthenticationError(output: string): string | null {
-	const lines = output.split("\n").filter(Boolean);
+	/**
+	 * Build args array with stdin handling
+	 * Prompts are passed via stdin to avoid shell escaping issues and ensure
+	 * cross-platform compatibility (Windows, Linux, macOS)
+	 */
+	protected buildArgsWithStdin(
+		baseArgs: string[],
+		prompt: string,
+	): { args: string[]; stdinContent?: string } {
+		// Always use stdin for prompts - this is the most reliable cross-platform approach
+		// It avoids shell escaping issues and command line length limits on all platforms
+		return { args: baseArgs, stdinContent: prompt };
+	}
 
-	for (const line of lines) {
-		try {
-			const parsed = JSON.parse(line);
-
-			// Check if this is any kind of error response
-			if (
-				parsed.type === "error" ||
-				parsed.is_error === true ||
-				parsed.error === "authentication_failed"
-			) {
-				// Extract message from content array (assistant type) or standard fields
-				let message = "";
-				const content = parsed.message?.content;
-				if (Array.isArray(content)) {
-					const textItem = content.find(
-						(item: { type?: string; text?: string }) => item.type === "text" && item.text,
-					);
-					if (textItem) message = textItem.text;
-				}
-				if (!message) {
-					message = parsed.result || parsed.error?.message || parsed.message || "";
-				}
+	/**
+	 * Execute with streaming progress updates (optional implementation)
+	 */
+	async executeStreaming(
+		prompt: string,
+		workDir: string,
+		onProgress: ProgressCallback,
+		options?: EngineOptions,
+	): Promise<AIResult> {
+		if (options?.dryRun) {
+			onProgress("Skipped (dry run)");
+			return { success: true, response: "(dry run) Skipped", inputTokens: 0, outputTokens: 0 };
+		}
 
-				if (message && isAuthenticationMessage(message.toLowerCase())) {
-					return message;
+		const args = this.buildArgs(prompt, workDir, options);
+		const env = this.getEnv(options);
+
+		// Always use stdin for prompts - most reliable cross-platform approach
+		const stdinContent = this.useStdin() ? prompt : undefined;
+
+		debugLog(`Starting ${this.name} engine with ${this.cliCommand}`);
+		debugLog(`WorkDir: ${workDir}`);
+		debugLog(`Args: ${args.join(" ")}`);
+
+		const timeout = Number.parseInt(
+			process.env.RALPHY_EXECUTION_TIMEOUT || String(DEFAULT_AI_ENGINE_TIMEOUT_MS),
+			10,
+		);
+		debugLog(`Timeout set to: ${Math.floor(timeout / 1000)}s`);
+
+		let timedOut = false;
+		let childProcess: import("./types.ts").ChildProcess | null = null;
+		const timeoutId = setTimeout(() => {
+			timedOut = true;
+			onProgress(
+				`[Warning: Process taking longer than ${Math.floor(timeout / 1000 / 60)} minutes...]`,
+			);
+			debugLog(`Timeout reached after ${timeout}ms`);
+
+			// BUG FIX: Check childProcess exists before attempting to kill
+			// This prevents errors when timeout fires before process is assigned (fallback case)
+			if (childProcess && typeof childProcess.kill === "function") {
+				debugLog("Killing child process due to timeout");
+				try {
+					childProcess.kill();
+				} catch (killErr) {
+					debugLog(`Failed to kill child process: ${killErr}`);
 				}
 			}
-		} catch {
-			// Ignore non-JSON lines
-		}
-	}
+		}, timeout);
 
-	return null;
-}
-
-/**
- * Check if a message contains authentication-related keywords
- */
-function isAuthenticationMessage(messageLower: string): boolean {
-	return (
-		messageLower.includes("invalid api key") ||
-		messageLower.includes("authentication") ||
-		messageLower.includes("not authenticated") ||
-		messageLower.includes("unauthorized") ||
-		messageLower.includes("/login")
-	);
-}
-
-/**
- * Format a command failure with useful output context.
- * If the output contains an authentication error, returns just that error message.
- * Otherwise returns the full error with output context.
- */
-export function formatCommandError(exitCode: number, output: string): string {
-	const trimmed = output.trim();
-	if (!trimmed) {
-		return `Command failed with exit code ${exitCode}`;
-	}
-
-	// Check for authentication errors first - return the clean message if found
-	const authError = extractAuthenticationError(output);
-	if (authError) {
-		return authError;
-	}
+		try {
+			const result = await execCommandStreamingNew(
+				this.cliCommand,
+				args,
+				workDir,
+				env,
+				stdinContent,
+			);
+
+			childProcess = result.process;
+			let stdout = "";
+			let stderr = "";
+			let exitCode = 0;
+
+			if (result.stdout?.getReader && result.stderr?.getReader) {
+				const stdoutReader = result.stdout.getReader();
+				const stderrReader = result.stderr.getReader();
+
+				const readStdout = async () => {
+					try {
+						while (true) {
+							const { done, value } = await stdoutReader.read();
+							if (done) break;
+							const chunk = new TextDecoder().decode(value);
+							stdout += chunk;
+
+							const lines = chunk.split("\n");
+							for (const line of lines) {
+								if (line.trim()) {
+									const step = this.parseProgressLine(line, options?.logThoughts);
+									if (step) {
+										onProgress(step);
+									}
+								}
+							}
+						}
+					} catch (err) {
+						debugLog(`Error reading stdout: ${err}`);
+					}
+				};
+
+				const readStderr = async () => {
+					try {
+						while (true) {
+							const { done, value } = await stderrReader.read();
+							if (done) break;
+							const chunk = new TextDecoder().decode(value);
+							stderr += chunk;
+						}
+					} catch (err) {
+						debugLog(`Error reading stderr: ${err}`);
+					}
+				};
+
+				const exitedPromise = childProcess?.exited
+					? childProcess.exited
+					: new Promise<number>((resolve) => {
+							const nodeProcess = childProcess as unknown as ChildProcess;
+							nodeProcess.once("close", (code) => resolve(code ?? 1));
+							nodeProcess.once("error", () => resolve(1));
+						});
+
+				const [resolvedExitCode] = await Promise.all([exitedPromise, readStdout(), readStderr()]);
+				exitCode = resolvedExitCode ?? 1;
+			} else {
+				// BUG FIX: Use stdinContent instead of undefined 'needsStdin' variable
+				const result = await execCommand(this.cliCommand, args, workDir, env, stdinContent);
+				stdout = result.stdout;
+				stderr = result.stderr;
+				exitCode = result.exitCode;
+			}
 
-	const lines = trimmed.split("\n").filter(Boolean);
-	const snippet = lines.slice(-12).join("\n");
-	return `Command failed with exit code ${exitCode}. Output:\n${snippet}`;
-}
+			clearTimeout(timeoutId);
 
-/**
- * Read a stream line by line, calling onLine for each non-empty line
- */
-async function readStream(
-	stream: ReadableStream<Uint8Array>,
-	onLine: (line: string) => void,
-): Promise<void> {
-	const reader = stream.getReader();
-	const decoder = new TextDecoder();
-	let buffer = "";
-	try {
-		while (true) {
-			const { done, value } = await reader.read();
-			if (done) break;
-			buffer += decoder.decode(value, { stream: true });
-			const lines = buffer.split("\n");
-			buffer = lines.pop() || "";
-			for (const line of lines) {
-				if (line.trim()) onLine(line);
+			if (timedOut) {
+				return {
+					success: false,
+					response: "",
+					inputTokens: 0,
+					outputTokens: 0,
+					error: `Execution timed out after ${Math.floor(timeout / 1000 / 60)} minutes`,
+				};
 			}
-		}
-		if (buffer.trim()) onLine(buffer);
-	} finally {
-		reader.releaseLock();
-	}
-}
 
-/**
- * Execute a command with streaming output, calling onLine for each line
- * @param stdinContent - Optional content to pass via stdin (useful for multi-line prompts on Windows)
- */
-export async function execCommandStreaming(
-	command: string,
-	args: string[],
-	workDir: string,
-	onLine: (line: string) => void,
-	env?: Record<string, string>,
-	stdinContent?: string,
-): Promise<{ exitCode: number }> {
-	if (isBun) {
-		// On Windows, run through cmd.exe to handle .cmd wrappers (npm global packages)
-		const spawnArgs = isWindows ? ["cmd.exe", "/c", command, ...args] : [command, ...args];
-		const proc = Bun.spawn(spawnArgs, {
-			cwd: workDir,
-			stdin: stdinContent ? "pipe" : "ignore",
-			stdout: "pipe",
-			stderr: "pipe",
-			env: { ...process.env, ...env },
-		});
-
-		// Write stdin content if provided
-		if (stdinContent && proc.stdin) {
-			proc.stdin.write(stdinContent);
-			proc.stdin.end();
+			return this.processCliResult(stdout, stderr, exitCode, workDir);
+		} catch (error) {
+			clearTimeout(timeoutId);
+			debugLog(`Error in executeStreaming: ${error}`);
+			return {
+				success: false,
+				response: "",
+				inputTokens: 0,
+				outputTokens: 0,
+				error: error instanceof Error ? error.message : String(error),
+			};
 		}
-
-		// Process both stdout and stderr in parallel
-		await Promise.all([readStream(proc.stdout, onLine), readStream(proc.stderr, onLine)]);
-
-		const exitCode = await proc.exited;
-		return { exitCode };
 	}
 
-	// Node.js fallback - use shell on Windows to execute .cmd wrappers
-	return new Promise((resolve) => {
-		const proc = spawn(command, args, {
-			cwd: workDir,
-			env: { ...process.env, ...env },
-			stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"],
-			shell: isWindows, // Required on Windows for npm global commands (.cmd wrappers)
-		});
-
-		// Write stdin content if provided
-		if (stdinContent && proc.stdin) {
-			proc.stdin.write(stdinContent);
-			proc.stdin.end();
+	/**
+	 * Execute the AI engine (non-streaming)
+	 */
+	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+		if (options?.dryRun) {
+			return { success: true, response: "(dry run) Skipped", inputTokens: 0, outputTokens: 0 };
 		}
 
-		let stdoutBuffer = "";
-		let stderrBuffer = "";
-
-		const processBuffer = (buffer: string, isStderr = false) => {
-			const lines = buffer.split("\n");
-			const remaining = lines.pop() || "";
-			for (const line of lines) {
-				if (line.trim()) onLine(line);
-			}
-			return remaining;
-		};
-
-		proc.stdout?.on("data", (data) => {
-			stdoutBuffer += data.toString();
-			stdoutBuffer = processBuffer(stdoutBuffer);
-		});
-
-		proc.stderr?.on("data", (data) => {
-			stderrBuffer += data.toString();
-			stderrBuffer = processBuffer(stderrBuffer, true);
-		});
-
-		proc.on("close", (exitCode) => {
-			// Process any remaining data
-			if (stdoutBuffer.trim()) onLine(stdoutBuffer);
-			if (stderrBuffer.trim()) onLine(stderrBuffer);
-			resolve({ exitCode: exitCode ?? 1 });
-		});
-
-		proc.on("error", (err) => {
-			// Maintain backward compatibility - don't reject, report error via onLine
-			onLine(`Spawn error: ${err.message}`);
-			resolve({ exitCode: 1 });
-		});
-	});
-}
-
-/**
- * Check if a file path looks like a test file
- */
-function isTestFile(filePath: string): boolean {
-	const lower = filePath.toLowerCase();
-	return (
-		lower.includes(".test.") ||
-		lower.includes(".spec.") ||
-		lower.includes("__tests__") ||
-		lower.includes("_test.go")
-	);
-}
-
-/**
- * Detect the current step from a JSON output line
- * Returns step name like "Reading code", "Implementing", etc.
- */
-export function detectStepFromOutput(line: string): string | null {
-	// Fast path: skip non-JSON lines
-	const trimmed = line.trim();
-	if (!trimmed.startsWith("{")) {
-		return null;
-	}
-
-	try {
-		const parsed = JSON.parse(trimmed);
-
-		// Extract specific fields for pattern matching (avoid stringifying entire object)
-		const toolName =
-			parsed.tool?.toLowerCase() ||
-			parsed.name?.toLowerCase() ||
-			parsed.tool_name?.toLowerCase() ||
-			"";
-		const command = parsed.command?.toLowerCase() || "";
-		const filePath = (parsed.file_path || parsed.filePath || parsed.path || "").toLowerCase();
-		const description = (parsed.description || "").toLowerCase();
-
-		// Check tool name first to determine operation type
-		const isReadOperation = toolName === "read" || toolName === "glob" || toolName === "grep";
-		const isWriteOperation = toolName === "write" || toolName === "edit";
-
-		// Reading code - check this early to avoid misclassifying reads of test files
-		if (isReadOperation) {
-			return "Reading code";
-		}
+		const args = this.buildArgs(prompt, workDir, options);
+		const env = this.getEnv(options);
 
-		// Git commit
-		if (command.includes("git commit") || description.includes("git commit")) {
-			return "Committing";
-		}
+		// Always use stdin for prompts - most reliable cross-platform approach
+		const stdinContent = this.useStdin() ? prompt : undefined;
 
-		// Git add/staging
-		if (command.includes("git add") || description.includes("git add")) {
-			return "Staging";
-		}
+		debugLog(`Starting ${this.name} engine (non-streaming)`);
+		debugLog(`WorkDir: ${workDir}`);
+		debugLog(`Args: ${args.join(" ")}`);
 
-		// Linting - check command for lint tools
-		if (
-			command.includes("lint") ||
-			command.includes("eslint") ||
-			command.includes("biome") ||
-			command.includes("prettier")
-		) {
-			return "Linting";
+		try {
+			const result = await execCommand(this.cliCommand, args, workDir, env, stdinContent);
+
+			return this.processCliResult(result.stdout, result.stderr, result.exitCode, workDir);
+		} catch (error) {
+			debugLog(`Error in execute: ${error}`);
+			return {
+				success: false,
+				response: "",
+				inputTokens: 0,
+				outputTokens: 0,
+				error: error instanceof Error ? error.message : String(error),
+			};
 		}
+	}
 
-		// Testing - check command for test runners
-		if (
-			command.includes("vitest") ||
-			command.includes("jest") ||
-			command.includes("bun test") ||
-			command.includes("npm test") ||
-			command.includes("pytest") ||
-			command.includes("go test")
-		) {
-			return "Testing";
+	/**
+	 * Parse a line of output to extract progress information
+	 */
+	protected parseProgressLine(line: string, logThoughts?: boolean): string | null {
+		if (line.trim().startsWith("{")) {
+			try {
+				const parsed = parseJsonLine(line);
+				if (parsed) {
+					if (ErrorSchema.safeParse(parsed.event).success) {
+						return null;
+					}
+
+					const event = parsed.event as Record<string, unknown>;
+					if (event.type === "text" && event.part && typeof event.part === "object") {
+						const part = event.part as { text?: string };
+						if (part.text) {
+							const step = detectStepFromOutput(part.text, logThoughts);
+							if (step) return step;
+						}
+					}
+				}
+			} catch {
+				// Not valid JSON, continue to plain text parsing
+			}
 		}
 
-		// Writing tests - only for write operations to test files
-		if (isWriteOperation && isTestFile(filePath)) {
-			return "Writing tests";
-		}
+		const step = detectStepFromOutput(line, logThoughts);
+		if (step) return step;
 
-		// Writing/Editing code
-		if (isWriteOperation) {
-			return "Implementing";
+		const parsed = parseAIStep(line);
+		if (parsed && !line.includes("[ERROR")) {
+			return formatParsedStep(parsed);
 		}
 
 		return null;
-	} catch {
-		return null;
-	}
-}
-
-/**
- * Base implementation for AI engines
- */
-export abstract class BaseAIEngine implements AIEngine {
-	abstract name: string;
-	abstract cliCommand: string;
-
-	async isAvailable(): Promise<boolean> {
-		return commandExists(this.cliCommand);
 	}
-
-	abstract execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult>;
-
-	/**
-	 * Execute with streaming progress updates (optional implementation)
-	 */
-	executeStreaming?(
-		prompt: string,
-		workDir: string,
-		onProgress: ProgressCallback,
-		options?: EngineOptions,
-	): Promise<AIResult>;
 }
diff --git a/cli/src/engines/claude.ts b/cli/src/engines/claude.ts
index cba08f28..ed686c2c 100644
--- a/cli/src/engines/claude.ts
+++ b/cli/src/engines/claude.ts
@@ -1,15 +1,6 @@
-import {
-	BaseAIEngine,
-	checkForErrors,
-	detectStepFromOutput,
-	execCommand,
-	execCommandStreaming,
-	formatCommandError,
-	parseStreamJsonResult,
-} from "./base.ts";
-import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts";
-
-const isWindows = process.platform === "win32";
+import { BaseAIEngine, checkForErrors } from "./base.ts";
+import { createErrorResult, createSuccessResult, parseStreamJsonResult } from "./parsers.ts";
+import type { AIResult, EngineOptions } from "./types.ts";
 
 /**
  * Claude Code AI Engine
@@ -18,147 +9,33 @@ export class ClaudeEngine extends BaseAIEngine {
 	name = "Claude Code";
 	cliCommand = "claude";
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+	protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] {
 		const args = ["--dangerously-skip-permissions", "--verbose", "--output-format", "stream-json"];
 		if (options?.modelOverride) {
 			args.push("--model", options.modelOverride);
 		}
-		// Add any additional engine-specific arguments
-		if (options?.engineArgs && options.engineArgs.length > 0) {
+		if (options?.engineArgs) {
 			args.push(...options.engineArgs);
 		}
-
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-		// On other platforms, pass as argument for compatibility
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			args.push("-p"); // Enable print mode, prompt comes from stdin
-			stdinContent = prompt;
-		} else {
-			args.push("-p", prompt);
-		}
-
-		const { stdout, stderr, exitCode } = await execCommand(
-			this.cliCommand,
-			args,
-			workDir,
-			undefined,
-			stdinContent,
-		);
-
-		const output = stdout + stderr;
-
-		// Check for errors
-		const error = checkForErrors(output);
-		if (error) {
-			return {
-				success: false,
-				response: "",
-				inputTokens: 0,
-				outputTokens: 0,
-				error,
-			};
-		}
-
-		// Parse result
-		const { response, inputTokens, outputTokens } = parseStreamJsonResult(output);
-
-		// If command failed with non-zero exit code, provide a meaningful error
-		if (exitCode !== 0) {
-			return {
-				success: false,
-				response,
-				inputTokens,
-				outputTokens,
-				error: formatCommandError(exitCode, output),
-			};
-		}
-
-		return {
-			success: true,
-			response,
-			inputTokens,
-			outputTokens,
-		};
+		// Note: The prompt is passed via stdin by the base engine for cross-platform compatibility
+		// The -p flag tells Claude to read from stdin
+		args.push("-p");
+		return args;
 	}
 
-	async executeStreaming(
-		prompt: string,
-		workDir: string,
-		onProgress: ProgressCallback,
-		options?: EngineOptions,
-	): Promise<AIResult> {
-		const args = ["--dangerously-skip-permissions", "--verbose", "--output-format", "stream-json"];
-		if (options?.modelOverride) {
-			args.push("--model", options.modelOverride);
-		}
-		// Add any additional engine-specific arguments
-		if (options?.engineArgs && options.engineArgs.length > 0) {
-			args.push(...options.engineArgs);
-		}
-
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-		// On other platforms, pass as argument for compatibility
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			args.push("-p"); // Enable print mode, prompt comes from stdin
-			stdinContent = prompt;
-		} else {
-			args.push("-p", prompt);
-		}
-
-		const outputLines: string[] = [];
-
-		const { exitCode } = await execCommandStreaming(
-			this.cliCommand,
-			args,
-			workDir,
-			(line) => {
-				outputLines.push(line);
-
-				// Detect and report step changes
-				const step = detectStepFromOutput(line);
-				if (step) {
-					onProgress(step);
-				}
-			},
-			undefined,
-			stdinContent,
-		);
-
-		const output = outputLines.join("\n");
-
-		// Check for errors
+	protected processCliResult(stdout: string, stderr: string, exitCode: number): AIResult {
+		const output = stdout + stderr;
 		const error = checkForErrors(output);
 		if (error) {
-			return {
-				success: false,
-				response: "",
-				inputTokens: 0,
-				outputTokens: 0,
-				error,
-			};
+			return { success: false, response: "", inputTokens: 0, outputTokens: 0, error };
 		}
 
-		// Parse result
 		const { response, inputTokens, outputTokens } = parseStreamJsonResult(output);
 
-		// If command failed with non-zero exit code, provide a meaningful error
 		if (exitCode !== 0) {
-			return {
-				success: false,
-				response,
-				inputTokens,
-				outputTokens,
-				error: formatCommandError(exitCode, output),
-			};
+			return createErrorResult(exitCode, output, response, inputTokens, outputTokens);
 		}
 
-		return {
-			success: true,
-			response,
-			inputTokens,
-			outputTokens,
-		};
+		return createSuccessResult(response, inputTokens, outputTokens);
 	}
 }
diff --git a/cli/src/engines/codex.ts b/cli/src/engines/codex.ts
index 9674ea0f..5dc677d7 100644
--- a/cli/src/engines/codex.ts
+++ b/cli/src/engines/codex.ts
@@ -1,9 +1,8 @@
-import { existsSync, readFileSync, rmSync, unlinkSync } from "node:fs";
+import { randomUUID } from "node:crypto";
+import { existsSync, readFileSync, unlinkSync } from "node:fs";
 import { join } from "node:path";
 import { BaseAIEngine, execCommand, formatCommandError } from "./base.ts";
-import type { AIResult, EngineOptions } from "./types.ts";
-
-const isWindows = process.platform === "win32";
+import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts";
 
 /**
  * Codex AI Engine
@@ -12,33 +11,47 @@ export class CodexEngine extends BaseAIEngine {
 	name = "Codex";
 	cliCommand = "codex";
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+	protected useStdin(): boolean {
+		return false;
+	}
+
+	private buildArgsInternal(
+		prompt: string,
+		workDir: string,
+		options?: EngineOptions,
+	): { args: string[]; stdinContent?: string; lastMessageFile: string } {
 		// Codex uses a separate file for the last message
-		const lastMessageFile = join(workDir, `.codex-last-message-${Date.now()}-${process.pid}.txt`);
+		const lastMessageFile = join(
+			workDir,
+			`.codex-last-message-${Date.now()}-${process.pid}-${randomUUID()}.txt`,
+		);
 
-		try {
-			const args = ["exec", "--full-auto", "--json", "--output-last-message", lastMessageFile];
-			if (options?.modelOverride) {
-				args.push("--model", options.modelOverride);
-			}
-			// Add any additional engine-specific arguments
-			if (options?.engineArgs && options.engineArgs.length > 0) {
-				args.push(...options.engineArgs);
-			}
+		const baseArgs = ["exec", "--full-auto", "--json", "--output-last-message", lastMessageFile];
+		if (options?.modelOverride) {
+			baseArgs.push("--model", options.modelOverride);
+		}
+		// Add any additional engine-specific arguments
+		if (options?.engineArgs && options.engineArgs.length > 0) {
+			baseArgs.push(...options.engineArgs);
+		}
 
-			// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-			let stdinContent: string | undefined;
-			if (isWindows) {
-				stdinContent = prompt;
-			} else {
-				args.push(prompt);
-			}
+		const { args, stdinContent } = this.buildArgsWithStdin(baseArgs, prompt);
+		return { args, stdinContent, lastMessageFile };
+	}
+
+	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+		const { args, stdinContent, lastMessageFile } = this.buildArgsInternal(
+			prompt,
+			workDir,
+			options,
+		);
 
+		try {
 			const { stdout, stderr, exitCode } = await execCommand(
 				this.cliCommand,
 				args,
 				workDir,
-				undefined,
+				this.getEnv(options),
 				stdinContent,
 			);
 
@@ -98,4 +111,53 @@ export class CodexEngine extends BaseAIEngine {
 			}
 		}
 	}
+
+	protected buildArgs(prompt: string, workDir: string, options?: EngineOptions): string[] {
+		const { args } = this.buildArgsInternal(prompt, workDir, options);
+		return args;
+	}
+
+	protected processCliResult(
+		stdout: string,
+		stderr: string,
+		exitCode: number,
+		_workDir: string,
+	): AIResult {
+		const output = stdout + stderr;
+
+		if (output.includes('"type":"error"')) {
+			const errorMatch = output.match(/"message":"([^"]+)"/);
+			return {
+				success: false,
+				response: "",
+				inputTokens: 0,
+				outputTokens: 0,
+				error: errorMatch?.[1] || "Unknown error",
+			};
+		}
+
+		if (exitCode !== 0) {
+			return {
+				success: false,
+				response: "Task completed",
+				inputTokens: 0,
+				outputTokens: 0,
+				error: formatCommandError(exitCode, output),
+			};
+		}
+
+		return { success: true, response: "Task completed", inputTokens: 0, outputTokens: 0 };
+	}
+
+	async executeStreaming(
+		prompt: string,
+		workDir: string,
+		onProgress: ProgressCallback,
+		options?: EngineOptions,
+	): Promise<AIResult> {
+		onProgress("Running Codex");
+		const result = await this.execute(prompt, workDir, options);
+		onProgress(result.success ? "Completed" : "Failed");
+		return result;
+	}
 }
diff --git a/cli/src/engines/copilot.test.ts b/cli/src/engines/copilot.test.ts
index 67e5ff0a..90a6e645 100644
--- a/cli/src/engines/copilot.test.ts
+++ b/cli/src/engines/copilot.test.ts
@@ -515,7 +515,7 @@ o1-preview 1.5m in, 0.5m out, 0.1m cached`,
 
 			expect(result.success).toBe(false);
 			expect(result.error).toContain("not authenticated");
-			expect(result.error).toContain("/login");
+			expect(result.error).toContain("gh auth login");
 
 			spy.mockRestore();
 		});
diff --git a/cli/src/engines/copilot.ts b/cli/src/engines/copilot.ts
index 25d13962..638d50fc 100644
--- a/cli/src/engines/copilot.ts
+++ b/cli/src/engines/copilot.ts
@@ -1,92 +1,48 @@
 import { randomUUID } from "node:crypto";
-import { mkdirSync, unlinkSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { logDebug } from "../ui/logger.ts";
-import { BaseAIEngine, checkForErrors, execCommand, formatCommandError } from "./base.ts";
-import type { AIResult, EngineOptions } from "./types.ts";
-
-/** Directory for temporary prompt files */
-const TEMP_DIR = join(tmpdir(), "ralphy-copilot");
+import { BaseAIEngine, checkForErrors, execCommand, execCommandStreaming } from "./base.ts";
+import { detectStepFromOutput, formatCommandError } from "./parsers.ts";
+import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts";
 
 /**
  * GitHub Copilot CLI AI Engine
- *
- * Note: executeStreaming is intentionally not implemented for Copilot
- * because the streaming function can hang on Windows due to how
- * Bun handles cmd.exe stream completion. The non-streaming execute()
- * method works reliably.
- *
- * Note: All engine output is captured internally for parsing and not displayed
- * to the end user. This is by design - the spinner shows step progress while
- * the actual CLI output is processed silently.
- *
- * Note: Prompts are passed via temporary files to preserve markdown formatting.
- * The -p parameter accepts a file path, which avoids shell escaping issues and
- * maintains the full structure of markdown (newlines, code blocks, etc.) that
- * would be lost if passed as a command line string.
  */
 export class CopilotEngine extends BaseAIEngine {
 	name = "GitHub Copilot";
 	cliCommand = "copilot";
+	private tempDir = join(tmpdir(), "ralphy-copilot");
 
 	/**
-	 * Create a temporary file containing the prompt.
-	 * Uses a unique filename to support parallel execution.
-	 * @returns The path to the temporary prompt file
+	 * Build command arguments for Copilot CLI
+	 * Returns args array and optional stdin content for Windows
 	 */
-	private createPromptFile(prompt: string): string {
-		// Ensure temp directory exists - wrapped in try-catch to handle
-		// potential race conditions when multiple processes create it simultaneously
-		try {
-			mkdirSync(TEMP_DIR, { recursive: true });
-		} catch (err) {
-			// EEXIST is expected if another process created the directory first
-			if ((err as NodeJS.ErrnoException).code !== "EEXIST") {
-				throw err;
-			}
-		}
-
-		// Generate unique filename using UUID for parallel safety
-		const filename = `prompt-${randomUUID()}.md`;
-		const filepath = join(TEMP_DIR, filename);
-
-		// Write prompt to file preserving all formatting
-		writeFileSync(filepath, prompt, "utf-8");
-		logDebug(`[Copilot] Created prompt file: ${filepath}`);
-
-		return filepath;
+	protected buildArgs(prompt: string, _workDir: string, options?: EngineOptions): string[] {
+		const { args } = this.buildArgsInternal(prompt, options);
+		return args;
 	}
 
-	/**
-	 * Clean up a temporary prompt file
-	 */
-	private cleanupPromptFile(filepath: string): void {
-		try {
-			unlinkSync(filepath);
-			logDebug(`[Copilot] Cleaned up prompt file: ${filepath}`);
-		} catch (err) {
-			// Ignore cleanup errors - file may already be deleted
-			logDebug(`[Copilot] Failed to cleanup prompt file: ${filepath}`);
-		}
+	protected useStdin(): boolean {
+		return false;
 	}
 
-	/**
-	 * Build command arguments for Copilot CLI
-	 * @param promptFilePath Path to the temporary file containing the prompt
-	 */
-	private buildArgs(promptFilePath: string, options?: EngineOptions): { args: string[] } {
+	private buildArgsInternal(
+		prompt: string,
+		options?: EngineOptions,
+	): { args: string[]; stdinContent?: string } {
 		const args: string[] = [];
 
-		// Use --yolo for non-interactive mode (allows all tools and paths)
+		// Add --yolo flag for non-interactive mode
 		args.push("--yolo");
 
-		// Pass prompt file path (Copilot CLI accepts file paths for -p)
-		// NOTE: This is an undocumented feature of Copilot CLI but works reliably
-		// since copilot is smart enough to detect file paths and read the content.
-		// Do NOT quote the path - arguments are passed directly without shell interpretation
-		// on non-Windows platforms, so quotes would become literal characters in the path.
-		args.push("-p", promptFilePath);
+		// Copilot uses -p flag for prompt file
+		args.push("-p");
+
+		// Create temp file with prompt content
+		const tempFile = this.createTempFile(prompt);
+		args.push(tempFile);
 
 		if (options?.modelOverride) {
 			args.push("--model", options.modelOverride);
@@ -98,66 +54,121 @@ export class CopilotEngine extends BaseAIEngine {
 		return { args };
 	}
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
-		// Create temporary prompt file to preserve markdown formatting
-		const promptFilePath = this.createPromptFile(prompt);
+	private createTempFile(prompt: string): string {
+		this.cleanupOldTempFiles();
+
+		// Ensure temp directory exists
+		if (!existsSync(this.tempDir)) {
+			mkdirSync(this.tempDir, { recursive: true });
+		}
+
+		// Create unique filename
+		const uuid = randomUUID();
+		const tempFile = join(this.tempDir, `prompt-${uuid}.md`);
+
+		// Write prompt to file
+		writeFileSync(tempFile, prompt, "utf-8");
+
+		return tempFile;
+	}
+
+	private cleanupTempFile(filePath: string | undefined): void {
+		if (!filePath) return;
+		try {
+			if (existsSync(filePath)) {
+				rmSync(filePath);
+			}
+		} catch (err) {
+			logDebug(`Failed to cleanup temp file: ${err}`);
+		}
+	}
 
+	private getAuthenticationError(output: string): string | null {
+		const firstLine = output.split("\n")[0]?.trim().toLowerCase() || "";
+		if (firstLine.startsWith("not authenticated") || firstLine.startsWith("no authentication")) {
+			return "GitHub Copilot is not authenticated. Please run `gh auth login` or check your Copilot subscription.";
+		}
+		return null;
+	}
+
+	/**
+	 * Cleanup temp files older than 1 hour to prevent disk space exhaustion
+	 */
+	private cleanupOldTempFiles(): void {
 		try {
-			const { args } = this.buildArgs(promptFilePath, options);
+			if (!existsSync(this.tempDir)) return;
+			const files = readdirSync(this.tempDir);
+			const oneHourAgo = Date.now() - 60 * 60 * 1000;
+			for (const file of files) {
+				const filePath = join(this.tempDir, file);
+				try {
+					const stats = statSync(filePath);
+					if (stats.mtimeMs < oneHourAgo) {
+						rmSync(filePath);
+					}
+				} catch {
+					// File may have been deleted, skip
+				}
+			}
+		} catch (err) {
+			logDebug(`Failed to cleanup old temp files: ${err}`);
+		}
+	}
 
-			// Debug logging
-			logDebug(`[Copilot] Working directory: ${workDir}`);
-			logDebug(`[Copilot] Prompt length: ${prompt.length} chars`);
-			logDebug(`[Copilot] Prompt preview: ${prompt.substring(0, 200)}...`);
-			logDebug(`[Copilot] Prompt file: ${promptFilePath}`);
-			logDebug(`[Copilot] Command: ${this.cliCommand} ${args.join(" ")}`);
+	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+		let tempFile: string | undefined;
 
-			const startTime = Date.now();
-			const { stdout, stderr, exitCode } = await execCommand(this.cliCommand, args, workDir);
+		const startTime = Date.now();
+		try {
+			const { args } = this.buildArgsInternal(prompt, options);
+			const pIndex = args.indexOf("-p");
+			tempFile = pIndex >= 0 && pIndex < args.length - 1 ? args[pIndex + 1] : undefined;
+
+			const { stdout, stderr, exitCode } = await execCommand(
+				this.cliCommand,
+				args,
+				workDir,
+				this.getEnv(options),
+			);
 			const durationMs = Date.now() - startTime;
 
 			const output = stdout + stderr;
 
-			// Debug logging
-			logDebug(`[Copilot] Exit code: ${exitCode}`);
-			logDebug(`[Copilot] Duration: ${durationMs}ms`);
-			logDebug(`[Copilot] Output length: ${output.length} chars`);
-			logDebug(`[Copilot] Output preview: ${output.substring(0, 500)}...`);
-
-			// Check for JSON errors (from base)
-			const jsonError = checkForErrors(output);
-			if (jsonError) {
+			// Check for authentication errors first (check first line only)
+			const authError = this.getAuthenticationError(output);
+			if (authError) {
 				return {
 					success: false,
 					response: "",
 					inputTokens: 0,
 					outputTokens: 0,
-					error: jsonError,
+					error: authError,
 				};
 			}
 
-			// Check for Copilot-specific errors (plain text)
-			const copilotError = this.checkCopilotErrors(output);
-			if (copilotError) {
+			// Check for errors
+			const error = checkForErrors(output);
+			if (error) {
 				return {
 					success: false,
 					response: "",
 					inputTokens: 0,
 					outputTokens: 0,
-					error: copilotError,
+					error,
 				};
 			}
 
-			// Parse Copilot output - extract response and token counts
-			const { response, inputTokens, outputTokens } = this.parseOutput(output);
+			// Parse Copilot output - extract response from output
+			const response = this.parseOutput(output);
+			const tokenCounts = this.parseTokenCounts(output);
 
 			// If command failed with non-zero exit code, provide a meaningful error
 			if (exitCode !== 0) {
 				return {
 					success: false,
 					response,
-					inputTokens,
-					outputTokens,
+					inputTokens: tokenCounts.input,
+					outputTokens: tokenCounts.output,
 					error: formatCommandError(exitCode, output),
 				};
 			}
@@ -165,105 +176,46 @@ export class CopilotEngine extends BaseAIEngine {
 			return {
 				success: true,
 				response,
-				inputTokens,
-				outputTokens,
+				inputTokens: tokenCounts.input,
+				outputTokens: tokenCounts.output,
 				cost: durationMs > 0 ? `duration:${durationMs}` : undefined,
 			};
 		} finally {
-			// Always clean up the temporary prompt file
-			this.cleanupPromptFile(promptFilePath);
+			// Always clean up temp file
+			this.cleanupTempFile(tempFile);
 		}
 	}
 
-	/**
-	 * Check for Copilot-specific errors in output
-	 *
-	 * IMPORTANT: We are intentionally very conservative with error detection here.
-	 * We don't have documentation on Copilot CLI's error response formats, exit codes,
-	 * or error messages. The response content might contain strings like "network error",
-	 * "error:", "rate limit", etc. as part of valid output (e.g., test results, error
-	 * handling discussions, feedback about code). We only detect errors that we have
-	 * actually observed in practice.
-	 *
-	 * Currently known error: Authentication errors (observed when not logged in)
-	 */
-	private checkCopilotErrors(output: string): string | null {
-		const trimmed = output.trim();
-		const trimmedLower = trimmed.toLowerCase();
-
-		// Authentication errors - the only error format we've actually observed
-		// When not authenticated, Copilot CLI outputs a message starting with these phrases
-		if (
-			trimmedLower.startsWith("no authentication") ||
-			trimmedLower.startsWith("not authenticated") ||
-			trimmedLower.startsWith("authentication required") ||
-			trimmedLower.startsWith("please authenticate")
-		) {
-			return "GitHub Copilot CLI is not authenticated. Run 'copilot' and use '/login' to authenticate, or set COPILOT_GITHUB_TOKEN environment variable.";
-		}
-
-		// Note: We intentionally do NOT check for:
-		// - "rate limit" / "too many requests" - unknown format, could appear in response content
-		// - "network error" / "connection refused" - unknown format, could appear in response content
-		// - "error:" prefix - too generic, could appear in response content
-		// - Non-zero exit codes - we don't know if Copilot uses them for errors
-		//
-		// If we encounter other error patterns in practice, we can add them here.
-
-		return null;
-	}
-
-	/**
-	 * Parse a token count string like "17.5k" or "73" into a number
-	 */
-	private parseTokenCount(str: string): number {
-		const trimmed = str.trim().toLowerCase();
-		if (trimmed.endsWith("k")) {
-			const value = Number.parseFloat(trimmed.slice(0, -1));
-			return Number.isNaN(value) ? 0 : Math.round(value * 1000);
-		}
-		if (trimmed.endsWith("m")) {
-			const value = Number.parseFloat(trimmed.slice(0, -1));
-			return Number.isNaN(value) ? 0 : Math.round(value * 1000000);
+	private parseTokenCounts(output: string): { input: number; output: number } {
+		const lines = output.split("\n");
+		for (const line of lines) {
+			// Match pattern: "model-name X in, Y out, Z cached" or variations
+			// Using atomic grouping to prevent ReDoS - \d+\.?\d* matches numbers without catastrophic backtracking
+			const match = line.match(/(\d+\.?\d*)([km]?)\s+in,\s+(\d+\.?\d*)([km]?)\s+out/i);
+			if (match) {
+				let input = Number.parseFloat(match[1]);
+				let output = Number.parseFloat(match[3]);
+
+				// Handle k/m suffixes
+				if (match[2] === "k") input *= 1000;
+				if (match[2] === "m") input *= 1000000;
+				if (match[4] === "k") output *= 1000;
+				if (match[4] === "m") output *= 1000000;
+
+				return { input: Math.round(input), output: Math.round(output) };
+			}
 		}
-		const value = Number.parseFloat(trimmed);
-		return Number.isNaN(value) ? 0 : Math.round(value);
+		return { input: 0, output: 0 };
 	}
 
-	/**
-	 * Extract token counts from Copilot CLI output
-	 * Format: "model-name       17.5k in, 73 out, 11.8k cached (Est. 1 Premium request)"
-	 */
-	private parseTokenCounts(output: string): { inputTokens: number; outputTokens: number } {
-		// Look for the token count line in the "Breakdown by AI model" section
-		// Pattern: number followed by "in," and number followed by "out,"
-		const tokenMatch = output.match(/(\d+(?:\.\d+)?[km]?)\s+in,\s+(\d+(?:\.\d+)?[km]?)\s+out/i);
-
-		if (tokenMatch) {
-			const inputTokens = this.parseTokenCount(tokenMatch[1]);
-			const outputTokens = this.parseTokenCount(tokenMatch[2]);
-			logDebug(`[Copilot] Parsed tokens: ${inputTokens} in, ${outputTokens} out`);
-			return { inputTokens, outputTokens };
-		}
-
-		return { inputTokens: 0, outputTokens: 0 };
-	}
-
-	private parseOutput(output: string): {
-		response: string;
-		inputTokens: number;
-		outputTokens: number;
-	} {
-		// Extract token counts first
-		const { inputTokens, outputTokens } = this.parseTokenCounts(output);
-
+	private parseOutput(output: string): string {
 		// Copilot CLI may output text responses
 		// Extract the meaningful response, filtering out control characters and prompts
 		// Note: These filter patterns are specific to current Copilot CLI behavior
 		// and may need updates if the CLI output format changes
 		const lines = output.split("\n").filter(Boolean);
 
-		// Filter out empty lines, CLI artifacts, and stats section
+		// Filter out empty lines and common CLI artifacts
 		const meaningfulLines = lines.filter((line) => {
 			const trimmed = line.trim();
 			return (
@@ -272,18 +224,149 @@ export class CopilotEngine extends BaseAIEngine {
 				!trimmed.startsWith("❯") && // Command prompts
 				!trimmed.includes("Thinking...") && // Status messages
 				!trimmed.includes("Working on it...") && // Status messages
-				!trimmed.startsWith("Total usage") && // Stats section
-				!trimmed.startsWith("API time") && // Stats section
-				!trimmed.startsWith("Total session") && // Stats section
-				!trimmed.startsWith("Total code") && // Stats section
-				!trimmed.startsWith("Breakdown by") && // Stats section header
-				!trimmed.match(
-					/^\s*\S+\s+\d+(?:\.\d+)?[km]?\s+in,\s+\d+(?:\.\d+)?[km]?\s+out,\s+\d+(?:\.\d+)?[km]?\s+cached/,
-				) // Token count lines (model stats: "model-name 17.5k in, 73 out, 11.8k cached")
+				!trimmed.match(/^\S+\s+\d+(\.\d+)?[km]?\s+in,\s+\d+(\.\d+)?[km]?\s+out/i) && // Token count lines
+				!trimmed.match(/^Total usage:\s*\d+\s*tokens/i) // Total usage lines
 			);
 		});
 
-		const response = meaningfulLines.join("\n").trim() || "Task completed";
-		return { response, inputTokens, outputTokens };
+		return meaningfulLines.join("\n") || "Task completed";
+	}
+
+	async executeStreaming(
+		prompt: string,
+		workDir: string,
+		onProgress: ProgressCallback,
+		options?: EngineOptions,
+	): Promise<AIResult> {
+		let tempFile: string | undefined;
+
+		const outputLines: string[] = [];
+		const startTime = Date.now();
+
+		try {
+			const { args } = this.buildArgsInternal(prompt, options);
+			const pIndex = args.indexOf("-p");
+			tempFile = pIndex >= 0 && pIndex < args.length - 1 ? args[pIndex + 1] : undefined;
+
+			const { exitCode } = await execCommandStreaming(
+				this.cliCommand,
+				args,
+				workDir,
+				(line) => {
+					outputLines.push(line);
+
+					// Detect and report step changes
+					const step = detectStepFromOutput(line);
+					if (step) {
+						onProgress(step);
+					}
+				},
+				this.getEnv(options),
+			);
+
+			const durationMs = Date.now() - startTime;
+			const output = outputLines.join("\n");
+
+			const authError = this.getAuthenticationError(output);
+			if (authError) {
+				return {
+					success: false,
+					response: "",
+					inputTokens: 0,
+					outputTokens: 0,
+					error: authError,
+				};
+			}
+
+			// Check for errors
+			const error = checkForErrors(output);
+			if (error) {
+				return {
+					success: false,
+					response: "",
+					inputTokens: 0,
+					outputTokens: 0,
+					error,
+				};
+			}
+
+			// Parse Copilot output
+			const response = this.parseOutput(output);
+			const tokenCounts = this.parseTokenCounts(output);
+
+			// If command failed with non-zero exit code, provide a meaningful error
+			if (exitCode !== 0) {
+				return {
+					success: false,
+					response,
+					inputTokens: tokenCounts.input,
+					outputTokens: tokenCounts.output,
+					error: formatCommandError(exitCode, output),
+				};
+			}
+
+			return {
+				success: true,
+				response,
+				inputTokens: tokenCounts.input,
+				outputTokens: tokenCounts.output,
+				cost: durationMs > 0 ? `duration:${durationMs}` : undefined,
+			};
+		} finally {
+			// Always clean up temp file
+			this.cleanupTempFile(tempFile);
+		}
+	}
+
+	protected processCliResult(
+		stdout: string,
+		stderr: string,
+		exitCode: number,
+		_workDir: string,
+	): AIResult {
+		const output = stdout + stderr;
+		const response = this.parseOutput(output);
+		const tokenCounts = this.parseTokenCounts(output);
+
+		// Check for auth errors first (check first line only)
+		const authError = this.getAuthenticationError(output);
+		if (authError) {
+			return {
+				success: false,
+				response,
+				inputTokens: tokenCounts.input,
+				outputTokens: tokenCounts.output,
+				error: authError,
+			};
+		}
+
+		// Check for CLI errors
+		const error = checkForErrors(output);
+		if (error) {
+			return {
+				success: false,
+				response,
+				inputTokens: tokenCounts.input,
+				outputTokens: tokenCounts.output,
+				error,
+			};
+		}
+
+		if (exitCode !== 0) {
+			return {
+				success: false,
+				response,
+				inputTokens: tokenCounts.input,
+				outputTokens: tokenCounts.output,
+				error: formatCommandError(exitCode, output),
+			};
+		}
+
+		return {
+			success: true,
+			response,
+			inputTokens: tokenCounts.input,
+			outputTokens: tokenCounts.output,
+		};
 	}
 }
diff --git a/cli/src/engines/cursor.ts b/cli/src/engines/cursor.ts
index 58133a51..e525fe0e 100644
--- a/cli/src/engines/cursor.ts
+++ b/cli/src/engines/cursor.ts
@@ -1,15 +1,8 @@
-import {
-	BaseAIEngine,
-	checkForErrors,
-	detectStepFromOutput,
-	execCommand,
-	execCommandStreaming,
-	formatCommandError,
-} from "./base.ts";
+import { logDebug } from "../ui/logger.ts";
+import { BaseAIEngine, checkForErrors, execCommand, execCommandStreaming } from "./base.ts";
+import { detectStepFromOutput, formatCommandError } from "./parsers.ts";
 import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts";
 
-const isWindows = process.platform === "win32";
-
 /**
  * Cursor Agent AI Engine
  */
@@ -17,7 +10,15 @@ export class CursorEngine extends BaseAIEngine {
 	name = "Cursor Agent";
 	cliCommand = "agent";
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+	protected buildArgs(prompt: string, _workDir: string, options?: EngineOptions): string[] {
+		const { args } = this.buildArgsInternal(prompt, options);
+		return args;
+	}
+
+	private buildArgsInternal(
+		prompt: string,
+		options?: EngineOptions,
+	): { args: string[]; stdinContent?: string } {
 		const args = ["--print", "--force", "--output-format", "stream-json"];
 		if (options?.modelOverride) {
 			args.push("--model", options.modelOverride);
@@ -27,19 +28,17 @@ export class CursorEngine extends BaseAIEngine {
 			args.push(...options.engineArgs);
 		}
 
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			stdinContent = prompt;
-		} else {
-			args.push(prompt);
-		}
+		return this.buildArgsWithStdin(args, prompt);
+	}
+
+	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+		const { args, stdinContent } = this.buildArgsInternal(prompt, options);
 
 		const { stdout, stderr, exitCode } = await execCommand(
 			this.cliCommand,
 			args,
 			workDir,
-			undefined,
+			this.getEnv(options),
 			stdinContent,
 		);
 
@@ -106,8 +105,8 @@ export class CursorEngine extends BaseAIEngine {
 						response = content;
 					}
 				}
-			} catch {
-				// Ignore non-JSON lines
+			} catch (_err) {
+				logDebug(`Cursor: Failed to parse JSON line: ${_err}`);
 			}
 		}
 
@@ -120,22 +119,7 @@ export class CursorEngine extends BaseAIEngine {
 		onProgress: ProgressCallback,
 		options?: EngineOptions,
 	): Promise<AIResult> {
-		const args = ["--print", "--force", "--output-format", "stream-json"];
-		if (options?.modelOverride) {
-			args.push("--model", options.modelOverride);
-		}
-		// Add any additional engine-specific arguments
-		if (options?.engineArgs && options.engineArgs.length > 0) {
-			args.push(...options.engineArgs);
-		}
-
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			stdinContent = prompt;
-		} else {
-			args.push(prompt);
-		}
+		const { args, stdinContent } = this.buildArgsInternal(prompt, options);
 
 		const outputLines: string[] = [];
 
@@ -152,7 +136,7 @@ export class CursorEngine extends BaseAIEngine {
 					onProgress(step);
 				}
 			},
-			undefined,
+			this.getEnv(options),
 			stdinContent,
 		);
 
@@ -192,4 +176,34 @@ export class CursorEngine extends BaseAIEngine {
 			cost: durationMs > 0 ? `duration:${durationMs}` : undefined,
 		};
 	}
+
+	protected processCliResult(
+		stdout: string,
+		stderr: string,
+		exitCode: number,
+		_workDir: string,
+	): AIResult {
+		const output = stdout + stderr;
+		const error = checkForErrors(output);
+		if (error) {
+			return { success: false, response: "", inputTokens: 0, outputTokens: 0, error };
+		}
+		const { response, durationMs } = this.parseOutput(output);
+		if (exitCode !== 0) {
+			return {
+				success: false,
+				response,
+				inputTokens: 0,
+				outputTokens: 0,
+				error: formatCommandError(exitCode, output),
+			};
+		}
+		return {
+			success: true,
+			response,
+			inputTokens: 0,
+			outputTokens: 0,
+			cost: durationMs > 0 ? `duration:${durationMs}` : undefined,
+		};
+	}
 }
diff --git a/cli/src/engines/droid.ts b/cli/src/engines/droid.ts
index 0247e098..d4db4882 100644
--- a/cli/src/engines/droid.ts
+++ b/cli/src/engines/droid.ts
@@ -1,15 +1,8 @@
-import {
-	BaseAIEngine,
-	checkForErrors,
-	detectStepFromOutput,
-	execCommand,
-	execCommandStreaming,
-	formatCommandError,
-} from "./base.ts";
+import { logDebug } from "../ui/logger.ts";
+import { BaseAIEngine, checkForErrors, execCommand, execCommandStreaming } from "./base.ts";
+import { detectStepFromOutput, formatCommandError } from "./parsers.ts";
 import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts";
 
-const isWindows = process.platform === "win32";
-
 /**
  * Factory Droid AI Engine
  */
@@ -17,7 +10,15 @@ export class DroidEngine extends BaseAIEngine {
 	name = "Factory Droid";
 	cliCommand = "droid";
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+	protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] {
+		const { args } = this.buildArgsInternal(_prompt, options);
+		return args;
+	}
+
+	private buildArgsInternal(
+		prompt: string,
+		options?: EngineOptions,
+	): { args: string[]; stdinContent?: string } {
 		const args = ["exec", "--output-format", "stream-json", "--auto", "medium"];
 		if (options?.modelOverride) {
 			args.push("--model", options.modelOverride);
@@ -27,19 +28,17 @@ export class DroidEngine extends BaseAIEngine {
 			args.push(...options.engineArgs);
 		}
 
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			stdinContent = prompt;
-		} else {
-			args.push(prompt);
-		}
+		return this.buildArgsWithStdin(args, prompt);
+	}
+
+	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+		const { args, stdinContent } = this.buildArgsInternal(prompt, options);
 
 		const { stdout, stderr, exitCode } = await execCommand(
 			this.cliCommand,
 			args,
 			workDir,
-			undefined,
+			this.getEnv(options),
 			stdinContent,
 		);
 
@@ -96,8 +95,8 @@ export class DroidEngine extends BaseAIEngine {
 						durationMs = parsed.durationMs;
 					}
 				}
-			} catch {
-				// Ignore non-JSON lines
+			} catch (_err) {
+				logDebug(`Droid: Failed to parse JSON line: ${_err}`);
 			}
 		}
 
@@ -110,22 +109,7 @@ export class DroidEngine extends BaseAIEngine {
 		onProgress: ProgressCallback,
 		options?: EngineOptions,
 	): Promise<AIResult> {
-		const args = ["exec", "--output-format", "stream-json", "--auto", "medium"];
-		if (options?.modelOverride) {
-			args.push("--model", options.modelOverride);
-		}
-		// Add any additional engine-specific arguments
-		if (options?.engineArgs && options.engineArgs.length > 0) {
-			args.push(...options.engineArgs);
-		}
-
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			stdinContent = prompt;
-		} else {
-			args.push(prompt);
-		}
+		const { args, stdinContent } = this.buildArgsInternal(prompt, options);
 
 		const outputLines: string[] = [];
 
@@ -142,7 +126,7 @@ export class DroidEngine extends BaseAIEngine {
 					onProgress(step);
 				}
 			},
-			undefined,
+			this.getEnv(options),
 			stdinContent,
 		);
 
@@ -182,4 +166,43 @@ export class DroidEngine extends BaseAIEngine {
 			cost: durationMs > 0 ? `duration:${durationMs}` : undefined,
 		};
 	}
+
+	protected processCliResult(
+		stdout: string,
+		stderr: string,
+		exitCode: number,
+		_workDir: string,
+	): AIResult {
+		const output = stdout + stderr;
+		const error = checkForErrors(output);
+		if (error) {
+			return {
+				success: false,
+				response: "",
+				inputTokens: 0,
+				outputTokens: 0,
+				error,
+			};
+		}
+
+		const { response, durationMs } = this.parseOutput(output);
+
+		if (exitCode !== 0) {
+			return {
+				success: false,
+				response,
+				inputTokens: 0,
+				outputTokens: 0,
+				error: formatCommandError(exitCode, output),
+			};
+		}
+
+		return {
+			success: true,
+			response,
+			inputTokens: 0,
+			outputTokens: 0,
+			cost: durationMs > 0 ? `duration:${durationMs}` : undefined,
+		};
+	}
 }
diff --git a/cli/src/engines/executor.ts b/cli/src/engines/executor.ts
new file mode 100644
index 00000000..07f3a804
--- /dev/null
+++ b/cli/src/engines/executor.ts
@@ -0,0 +1,385 @@
+import { spawn, spawnSync } from "node:child_process";
+import { Readable } from "node:stream";
+import { logDebug } from "../ui/logger.ts";
+import { registerProcess } from "../utils/cleanup.ts";
+import { validateCommandAndArgs } from "./validation.ts";
+
+// Check if running in Bun
+const isBun = typeof Bun !== "undefined";
+const isWindows = process.platform === "win32";
+const DEBUG = process.env.RALPHY_DEBUG === "true";
+
+function debugLog(...args: unknown[]): void {
+	if (DEBUG || (globalThis as { verboseMode?: boolean }).verboseMode === true) {
+		logDebug(args.map((a) => String(a)).join(" "));
+	}
+}
+
+/**
+ * Command execution result
+ */
+export interface ExecutionResult {
+	stdout: string;
+	stderr: string;
+	exitCode: number;
+}
+
+/**
+ * Check if a command is available in PATH
+ */
+export async function commandExists(command: string): Promise<boolean> {
+	debugLog(`commandExists: Checking for '${command}'...`);
+	try {
+		const checkCommand = isWindows ? "where" : "which";
+		debugLog(`commandExists: Using checkCommand='${checkCommand}', isBun=${isBun}`);
+
+		if (isBun) {
+			debugLog("commandExists: Using Bun.spawn for check");
+			const proc = Bun.spawn([checkCommand, command], {
+				stdout: "pipe",
+				stderr: "pipe",
+			});
+			const exitCode = await proc.exited;
+			debugLog(`commandExists: Bun.spawn exited with code ${exitCode}`);
+			return exitCode === 0;
+		}
+
+		// Node.js fallback
+		debugLog("commandExists: Using Node.js spawnSync");
+		const result = spawnSync(checkCommand, [command], { stdio: "pipe" });
+		debugLog(`commandExists: spawnSync status=${result.status}`);
+		return result.status === 0;
+	} catch (err) {
+		debugLog(`commandExists: Exception - ${err}`);
+		return false;
+	}
+}
+
+/**
+ * Execute a command and return stdout
+ * @param stdinContent - Optional content to pass via stdin
+ */
+export async function execCommand(
+	command: string,
+	args: string[],
+	workDir: string,
+	env?: Record<string, string>,
+	stdinContent?: string,
+): Promise<ExecutionResult> {
+	debugLog(`execCommand: ${command} ${args.join(" ")}`);
+	debugLog(`execCommand: workDir=${workDir}, hasEnv=${!!env}, hasStdin=${!!stdinContent}`);
+
+	// Validate command and arguments for security (applies to both Bun and Node.js)
+	const validation = validateCommandAndArgs(command, args);
+	if (!validation.valid || !validation.command || !validation.args) {
+		return Promise.resolve({
+			stdout: "",
+			stderr: `Error: ${validation.error}`,
+			exitCode: 1,
+		});
+	}
+
+	// Use validated values
+	const validatedCommand = validation.command;
+	const validatedArgs = validation.args;
+
+	if (isBun) {
+		return execWithBun(validatedCommand, validatedArgs, workDir, env, stdinContent);
+	}
+
+	return execWithNode(validatedCommand, validatedArgs, workDir, env, stdinContent);
+}
+
+/**
+ * Execute command using Bun runtime
+ */
+async function execWithBun(
+	command: string,
+	args: string[],
+	workDir: string,
+	env?: Record<string, string>,
+	stdinContent?: string,
+): Promise<ExecutionResult> {
+	const spawnArgs = [command, ...args];
+	debugLog(`execCommand: spawning with Bun, spawnArgs=${spawnArgs.join(" ")}`);
+
+	const proc = Bun.spawn(spawnArgs, {
+		cwd: workDir,
+		stdin: stdinContent ? "pipe" : "ignore",
+		stdout: "pipe",
+		stderr: "pipe",
+		env: { ...process.env, ...(env || {}) },
+	});
+
+	debugLog(
+		`execCommand: process spawned, PID=${proc.pid}, stdinContent length=${stdinContent?.length || 0}`,
+	);
+
+	// Write stdin content if provided
+	if (stdinContent && proc.stdin) {
+		proc.stdin.write(stdinContent);
+		proc.stdin.end();
+		debugLog("execCommand: stdin written and closed");
+	}
+
+	const [stdout, stderr, exitCode] = await Promise.all([
+		new Response(proc.stdout).text(),
+		new Response(proc.stderr).text(),
+		proc.exited,
+	]);
+
+	debugLog(
+		`execCommand: process exited, exitCode=${exitCode}, stdout=${stdout.length} chars, stderr=${stderr.length} chars`,
+	);
+
+	return { stdout, stderr, exitCode };
+}
+
+/**
+ * Execute command using Node.js runtime
+ */
+function execWithNode(
+	command: string,
+	args: string[],
+	workDir: string,
+	env?: Record<string, string>,
+	stdinContent?: string,
+): Promise<ExecutionResult> {
+	return new Promise((resolve) => {
+		const proc = spawn(command, args, {
+			cwd: workDir,
+			env: { ...process.env, ...env },
+			stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"],
+			shell: false, // Disable shell to prevent command injection
+		});
+
+		// Track process for cleanup
+		const unregister = registerProcess(proc);
+
+		// Write stdin content if provided
+		if (stdinContent && proc.stdin) {
+			proc.stdin.write(stdinContent);
+			proc.stdin.end();
+		}
+
+		let stdout = "";
+		let stderr = "";
+
+		proc.stdout?.on("data", (data) => {
+			stdout += data.toString();
+		});
+
+		proc.stderr?.on("data", (data) => {
+			stderr += data.toString();
+		});
+
+		proc.on("close", (code) => {
+			unregister();
+			resolve({
+				stdout,
+				stderr,
+				exitCode: code ?? 1,
+			});
+		});
+
+		proc.on("error", (err) => {
+			unregister();
+			const errorMessage = err.message || String(err);
+			const mergedStderr = stderr ? `${stderr}\n${errorMessage}` : errorMessage;
+			resolve({
+				stdout,
+				stderr: mergedStderr,
+				exitCode: 1,
+			});
+		});
+	});
+}
+
+/**
+ * Streaming execution result
+ */
+export interface StreamingExecutionResult {
+	process: import("./types.ts").ChildProcess;
+	stdout: ReadableStream<Uint8Array> | null;
+	stderr: ReadableStream<Uint8Array> | null;
+}
+
+/**
+ * Execute a command with streaming output and callback
+ * Legacy API for backward compatibility with existing engines
+ */
+export async function execCommandStreaming(
+	command: string,
+	args: string[],
+	workDir: string,
+	onLine: (line: string) => void,
+	env?: Record<string, string>,
+	stdinContent?: string,
+): Promise<{ exitCode: number }> {
+	debugLog(`execCommandStreaming (legacy): ${command} ${args.join(" ")}`);
+
+	const {
+		process: childProcess,
+		stdout,
+		stderr,
+	} = await execCommandStreamingNew(command, args, workDir, env, stdinContent);
+
+	const timeout = Number(globalThis.process.env.RALPHY_EXECUTION_TIMEOUT || 30 * 60 * 1000);
+	let timedOut = false;
+
+	const timeoutId = setTimeout(() => {
+		timedOut = true;
+		try {
+			childProcess.kill("SIGTERM");
+		} catch {
+			// no-op
+		}
+
+		setTimeout(() => {
+			if (!timedOut) return;
+			try {
+				childProcess.kill("SIGKILL");
+			} catch {
+				// no-op
+			}
+		}, 3000);
+	}, timeout);
+
+	const readStreamLines = async (stream: ReadableStream<Uint8Array> | null): Promise<void> => {
+		if (!stream) return;
+		const reader = stream.getReader();
+		const decoder = new TextDecoder();
+		let buffer = "";
+
+		try {
+			while (true) {
+				const { done, value } = await reader.read();
+				if (done) break;
+				buffer += decoder.decode(value, { stream: true });
+				const lines = buffer.split("\n");
+				buffer = lines.pop() ?? "";
+				for (const line of lines) {
+					if (line.trim()) {
+						onLine(line);
+					}
+				}
+			}
+
+			buffer += decoder.decode();
+			if (buffer.trim()) {
+				onLine(buffer);
+			}
+		} finally {
+			reader.releaseLock();
+		}
+	};
+
+	const exitPromise = childProcess.exited
+		? childProcess.exited
+		: new Promise<number>((resolve) => {
+				(childProcess as import("node:child_process").ChildProcess).once("close", (code) => {
+					resolve(code ?? 1);
+				});
+				(childProcess as import("node:child_process").ChildProcess).once("error", () => {
+					resolve(1);
+				});
+			});
+
+	const [exitCode] = await Promise.all([
+		exitPromise,
+		readStreamLines(stdout),
+		readStreamLines(stderr),
+	]);
+	clearTimeout(timeoutId);
+	const didTimeOut = timedOut;
+	timedOut = false;
+
+	return { exitCode: didTimeOut ? 1 : (exitCode ?? 1) };
+}
+
+/**
+ * Execute a command with streaming output (returns streams)
+ * New API for use with BaseAIEngine streaming
+ */
+export async function execCommandStreamingNew(
+	command: string,
+	args: string[],
+	workDir: string,
+	env?: Record<string, string>,
+	stdinContent?: string,
+): Promise<StreamingExecutionResult> {
+	debugLog(`execCommandStreamingNew: ${command} ${args.join(" ")}`);
+
+	const validation = validateCommandAndArgs(command, args);
+	if (!validation.valid || !validation.command || !validation.args) {
+		throw new Error(validation.error || "Command validation failed");
+	}
+
+	const validatedCommand = validation.command;
+	const validatedArgs = validation.args;
+
+	if (isBun) {
+		const spawnArgs = [validatedCommand, ...validatedArgs];
+		const proc = Bun.spawn(spawnArgs, {
+			cwd: workDir,
+			stdin: stdinContent ? "pipe" : "ignore",
+			stdout: "pipe",
+			stderr: "pipe",
+			env: { ...process.env, ...(env || {}) },
+		});
+
+		if (stdinContent && proc.stdin) {
+			proc.stdin.write(stdinContent);
+			proc.stdin.end();
+		}
+
+		return {
+			process: proc as unknown as import("./types.ts").ChildProcess,
+			stdout: proc.stdout,
+			stderr: proc.stderr,
+		};
+	}
+
+	// Node.js fallback
+
+	const proc = spawn(validatedCommand, validatedArgs, {
+		cwd: workDir,
+		env: { ...process.env, ...env },
+		stdio: [stdinContent ? "pipe" : "ignore", "pipe", "pipe"],
+		shell: false,
+	});
+	const unregister = registerProcess(proc);
+	let cleaned = false;
+	const unregisterOnce = () => {
+		if (cleaned) return;
+		cleaned = true;
+		unregister();
+	};
+	proc.once("close", unregisterOnce);
+	proc.once("error", unregisterOnce);
+
+	if (stdinContent && proc.stdin) {
+		proc.stdin.write(stdinContent);
+		proc.stdin.end();
+	}
+
+	const exited = new Promise<number>((resolve) => {
+		proc.once("close", (code) => resolve(code ?? 1));
+		proc.once("error", () => resolve(1));
+	});
+
+	const processWithExit = Object.assign(proc, { exited }) as import("./types.ts").ChildProcess;
+
+	const stdout = proc.stdout
+		? (Readable.toWeb(proc.stdout) as unknown as ReadableStream<Uint8Array>)
+		: null;
+	const stderr = proc.stderr
+		? (Readable.toWeb(proc.stderr) as unknown as ReadableStream<Uint8Array>)
+		: null;
+
+	return {
+		process: processWithExit,
+		stdout,
+		stderr,
+	};
+}
diff --git a/cli/src/engines/gemini.ts b/cli/src/engines/gemini.ts
index c9b98cd8..b860673b 100644
--- a/cli/src/engines/gemini.ts
+++ b/cli/src/engines/gemini.ts
@@ -9,8 +9,6 @@ import {
 } from "./base.ts";
 import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts";
 
-const isWindows = process.platform === "win32";
-
 /**
  * Gemini CLI AI Engine
  * https://github.com/google-gemini/gemini-cli
@@ -19,7 +17,10 @@ export class GeminiEngine extends BaseAIEngine {
 	name = "Gemini CLI";
 	cliCommand = "gemini";
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+	/**
+	 * Build CLI arguments for Gemini
+	 */
+	protected buildArgs(prompt: string, workDir: string, options?: EngineOptions): string[] {
 		const args = ["--output-format", "stream-json", "--yolo"];
 		if (options?.modelOverride) {
 			args.push("--model", options.modelOverride);
@@ -28,21 +29,32 @@ export class GeminiEngine extends BaseAIEngine {
 		if (options?.engineArgs && options.engineArgs.length > 0) {
 			args.push(...options.engineArgs);
 		}
+		// Pass prompt via stdin
+		args.push("-p");
+		return args;
+	}
 
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			args.push("-p");
-			stdinContent = prompt;
-		} else {
-			args.push("-p", prompt);
-		}
+	/**
+	 * Process CLI output into AIResult
+	 */
+	protected processCliResult(
+		_stdout: string,
+		_stderr: string,
+		_exitCode: number,
+		_workDir: string,
+	): AIResult {
+		throw new Error("GeminiEngine: use execute() or executeStreaming() directly");
+	}
+
+	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+		const args = this.buildArgs(prompt, workDir, options);
+		const stdinContent = prompt;
 
 		const { stdout, stderr, exitCode } = await execCommand(
 			this.cliCommand,
 			args,
 			workDir,
-			undefined,
+			this.getEnv(options),
 			stdinContent,
 		);
 
@@ -88,23 +100,8 @@ export class GeminiEngine extends BaseAIEngine {
 		onProgress: ProgressCallback,
 		options?: EngineOptions,
 	): Promise<AIResult> {
-		const args = ["--output-format", "stream-json", "--yolo"];
-		if (options?.modelOverride) {
-			args.push("--model", options.modelOverride);
-		}
-		// Add any additional engine-specific arguments
-		if (options?.engineArgs && options.engineArgs.length > 0) {
-			args.push(...options.engineArgs);
-		}
-
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			args.push("-p");
-			stdinContent = prompt;
-		} else {
-			args.push("-p", prompt);
-		}
+		const args = this.buildArgs(prompt, workDir, options);
+		const stdinContent = prompt;
 
 		const outputLines: string[] = [];
 
@@ -121,7 +118,7 @@ export class GeminiEngine extends BaseAIEngine {
 					onProgress(step);
 				}
 			},
-			undefined,
+			this.getEnv(options),
 			stdinContent,
 		);
 
diff --git a/cli/src/engines/index.ts b/cli/src/engines/index.ts
index 4b7fc70e..3bdb1e16 100644
--- a/cli/src/engines/index.ts
+++ b/cli/src/engines/index.ts
@@ -1,13 +1,13 @@
-export * from "./types.ts";
 export * from "./base.ts";
 export * from "./claude.ts";
-export * from "./opencode.ts";
-export * from "./cursor.ts";
 export * from "./codex.ts";
-export * from "./qwen.ts";
-export * from "./droid.ts";
 export * from "./copilot.ts";
+export * from "./cursor.ts";
+export * from "./droid.ts";
 export * from "./gemini.ts";
+export * from "./opencode.ts";
+export * from "./qwen.ts";
+export * from "./types.ts";
 
 import { ClaudeEngine } from "./claude.ts";
 import { CodexEngine } from "./codex.ts";
@@ -56,5 +56,6 @@ export function getEngineName(name: AIEngineName): string {
  * Check if an engine is available
  */
 export async function isEngineAvailable(name: AIEngineName): Promise<boolean> {
-	return createEngine(name).isAvailable();
+	const engine = createEngine(name);
+	return await engine.isAvailable();
 }
diff --git a/cli/src/engines/opencode.ts b/cli/src/engines/opencode.ts
index 9315e4bb..570f8742 100644
--- a/cli/src/engines/opencode.ts
+++ b/cli/src/engines/opencode.ts
@@ -1,44 +1,303 @@
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { logDebug as debugLog } from "../ui/logger.ts";
+import {
+	StepFinishSchema,
+	TextSchema,
+	extractSessionId,
+	parseJsonLine,
+} from "../utils/json-validation.ts";
 import { BaseAIEngine, checkForErrors, execCommand, formatCommandError } from "./base.ts";
+import { detectStepFromOutput as baseDetectStepFromOutput } from "./parsers.ts";
 import type { AIResult, EngineOptions } from "./types.ts";
 
-const isWindows = process.platform === "win32";
-
-/**
- * OpenCode AI Engine
- */
+/** OpenCode AI Engine */
 export class OpenCodeEngine extends BaseAIEngine {
 	name = "OpenCode";
 	cliCommand = "opencode";
+	protected lastUsedModel?: string;
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+	/** Set up environment variables for OpenCode engine */
+	protected getEnv(options?: EngineOptions): Record<string, string> | undefined {
+		const env: Record<string, string> = {
+			// Add rate limiting to prevent overwhelming the API
+			OPENCODE_REQUEST_DELAY: "1000",
+			...(options?.env || {}),
+		};
+
+		if (options?.debugOpenCode) {
+			env.DEBUG_OPENCODE = "true";
+		}
+
+		// Allow OpenCode broad filesystem access only when explicitly opted in.
+		if (options?.allowOpenCodeSandboxAccess === true) {
+			env.OPENCODE_PERMISSION = '{"*":"allow"}';
+		}
+
+		return env;
+	}
+
+	protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] {
 		const args = ["run", "--format", "json"];
 		if (options?.modelOverride) {
 			args.push("--model", options.modelOverride);
+			this.lastUsedModel = options.modelOverride;
+		} else {
+			this.lastUsedModel = "";
 		}
-		// Add any additional engine-specific arguments
+
 		if (options?.engineArgs && options.engineArgs.length > 0) {
 			args.push(...options.engineArgs);
 		}
+		// Prompt is passed via stdin by the execute() method for cross-platform compatibility
+		return args;
+	}
 
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			stdinContent = prompt;
-		} else {
-			args.push(prompt);
-		}
+	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+		const args = this.buildArgs(prompt, workDir, options);
+
+		// Pass prompt via stdin for cross-platform compatibility
+		// This avoids shell escaping issues and argument length limits on all platforms
+		const stdinContent = prompt;
 
 		const { stdout, stderr, exitCode } = await execCommand(
 			this.cliCommand,
 			args,
 			workDir,
-			{ OPENCODE_PERMISSION: '{"*":"allow"}' },
+			this.getEnv(options),
 			stdinContent,
 		);
 
+		const combinedOutput = stdout + stderr;
+
+		// Diagnostics: capture session-related artifacts only when explicit debug is enabled
+		if (options?.debugOpenCode || process.env.RALPHY_DEBUG === "true") {
+			try {
+				const diagLogPath = path.join(os.tmpdir(), "ralphy-opencode_diag.log");
+				let sessionId: string | undefined;
+				// Attempt to extract a sessionId from any JSON lines in the output
+				for (const line of combinedOutput.split(/\r?\n/)) {
+					if (!line?.trim()) continue;
+					try {
+						const obj = JSON.parse(line);
+						if (obj?.sessionID) {
+							sessionId = String(obj.sessionID);
+						} else if (obj?.sessionId) {
+							sessionId = String(obj.sessionId);
+						} else if (obj?.session_id) {
+							sessionId = String(obj.session_id);
+						}
+					} catch {
+						// Ignore non-JSON lines in diagnostic extraction.
+					}
+				}
+
+				const diag = {
+					timestamp: new Date().toISOString(),
+					command: this.cliCommand,
+					argsCount: args.length,
+					workDir,
+					platform: process.platform,
+					exitCode,
+					sessionId,
+					stateDirHint: "[REDACTED]",
+					envSnapshot: {
+						HOME: "[REDACTED]",
+						USERPROFILE: "[REDACTED]",
+						XDG_STATE_HOME: "[REDACTED]",
+					},
+					stdoutBytes: Buffer.byteLength(stdout, "utf8"),
+					stderrBytes: Buffer.byteLength(stderr, "utf8"),
+					hasOutput: stdout.length > 0 || stderr.length > 0,
+				};
+				// Ensure the log directory exists and append the diagnostic entry
+				try {
+					fs.mkdirSync(path.dirname(diagLogPath), { recursive: true });
+					// Check file size limit before appending
+					const MAX_DIAG_SIZE = 10 * 1024 * 1024; // 10MB limit
+					if (fs.existsSync(diagLogPath)) {
+						const stats = fs.statSync(diagLogPath);
+						if (stats.size > MAX_DIAG_SIZE) {
+							// Rotate log file
+							fs.renameSync(diagLogPath, `${diagLogPath}.old`);
+						}
+					}
+					fs.appendFileSync(diagLogPath, `${JSON.stringify(diag)}\n`);
+				} catch (err) {
+					// Log but don't crash on logging failures
+					debugLog(`Failed to write diagnostic log: ${err}`);
+				}
+			} catch (diagErr) {
+				// If diagnostics fail for any reason, do not crash the engine
+				debugLog(`OpenCode: Diagnostic error (non-critical): ${diagErr}`);
+			}
+		}
+
+		return this.processCliResult(stdout, stderr, exitCode, workDir);
+	}
+
+	private parseOutput(output: string): {
+		response: string;
+		inputTokens: number;
+		outputTokens: number;
+		cost?: string;
+		sessionId?: string;
+	} {
+		const lines = output.split("\n").filter(Boolean);
+		let response = "";
+		let inputTokens = 0;
+		let outputTokens = 0;
+		let cost: string | undefined;
+		let sessionId: string | undefined;
+
+		// Find step_finish and other events for token counts and session ID
+		for (const line of lines) {
+			const result = parseJsonLine(line);
+			if (!result) continue;
+			const { event } = result;
+
+			// Extract session ID from any event that has it
+			const extractedSessionId = extractSessionId(event);
+			if (extractedSessionId) {
+				sessionId = extractedSessionId;
+			}
+
+			const stepFinishResult = StepFinishSchema.safeParse(event);
+			if (stepFinishResult.success) {
+				const stepFinish = stepFinishResult.data;
+				const tokens = stepFinish.part?.tokens || stepFinish.tokens;
+				inputTokens = tokens?.input || 0;
+				outputTokens = tokens?.output || 0;
+				cost = String(stepFinish.cost || stepFinish.part?.cost || "");
+			}
+		}
+
+		// Get text response from text events and tool_use events with file content
+		const textParts: string[] = [];
+		for (const line of lines) {
+			const result = parseJsonLine(line);
+			if (!result) continue;
+			const { event } = result;
+
+			const textResult = TextSchema.safeParse(event);
+			if (textResult.success) {
+				textParts.push(textResult.data.part.text);
+			}
+		}
+
+		// If no text parts found, check if this is a raw tool_use response (planning phase issue)
+		// In this case, we return an empty response so the caller can detect this condition
+		if (textParts.length === 0) {
+			// Check for raw tool_use response
+			for (const line of lines) {
+				const trimmed = line.trim();
+				if (trimmed.startsWith('{"type":"tool_use"')) {
+					// Return empty response - this will be detected by planning.ts
+					return { response: "", inputTokens, outputTokens, cost, sessionId };
+				}
+			}
+		}
+
+		response = textParts.join("") || "Task completed";
+
+		return { response, inputTokens, outputTokens, cost, sessionId };
+	}
+
+	/** Detect step from output for progress tracking */
+	detectStepFromOutput(line: string, logThoughts = false): string | null {
+		const trimmed = line.trim();
+		const lowerLine = trimmed.toLowerCase();
+
+		// Handle JSON tool calls first (specific to OpenCode)
+		try {
+			const parsed = JSON.parse(trimmed);
+			if (parsed?.tool && parsed?.file_path) {
+				const fileName = parsed.file_path.split("/").pop() || parsed.file_path;
+				switch (parsed.tool) {
+					case "read":
+						return `Reading ${fileName}`;
+					case "write":
+					case "edit":
+						return `Implementing ${fileName}`;
+					default:
+						return `${parsed.tool.charAt(0).toUpperCase() + parsed.tool.slice(1)} ${fileName}`;
+				}
+			}
+			if (parsed?.type === "text" && parsed?.part?.text) {
+				const text = parsed.part.text;
+				// Truncate long text
+				if (text.length > 150) {
+					return text.substring(0, 150);
+				}
+				return text;
+			}
+		} catch (err) {
+			// Not JSON, continue with text processing
+			debugLog(`OpenCode: JSON parse error in step detection: ${err}`);
+		}
+
+		// OpenCode-specific step detection before base implementation
+		if (lowerLine.includes("reading") || lowerLine.includes("loading")) {
+			// Check for "Reading file <filename>" pattern first
+			const readingFileMatch = trimmed.match(/Reading\s+file\s+["']?([^"']+)["']?/i);
+			if (readingFileMatch) {
+				const fileName = readingFileMatch[1].split("/").pop() || readingFileMatch[1];
+				return `Reading ${fileName}`;
+			}
+			// Extract filename if present
+			const fileMatch = trimmed.match(/(?:file|reading)\s+(?:"?')?([^"'\s]+)/i);
+			if (fileMatch) {
+				const fileName = fileMatch[1].split("/").pop() || fileMatch[1];
+				return `Reading ${fileName}`;
+			}
+			if (lowerLine.includes("file")) return "Reading code";
+		}
+		// Handle cat command pattern
+		if (trimmed.startsWith("cat ")) {
+			const fileMatch = trimmed.match(/cat\s+(?:"?')?([^"'\s]+)/i);
+			if (fileMatch) {
+				const fileName = fileMatch[1].split("/").pop() || fileMatch[1];
+				return `Reading ${fileName}`;
+			}
+		}
+		if (
+			lowerLine.includes("writing") ||
+			lowerLine.includes("editing") ||
+			lowerLine.includes("implementing")
+		) {
+			if (lowerLine.includes("test")) return "Writing tests";
+			return "Implementing";
+		}
+
+		// Use base implementation for other cases
+		const baseResult = baseDetectStepFromOutput(line, logThoughts);
+		if (baseResult !== null && baseResult !== undefined) {
+			return baseResult;
+		}
+
+		// OpenCode-specific step detection
+		if (lowerLine.includes("lint") || lowerLine.includes("formatting")) {
+			return "Linting";
+		}
+		if (lowerLine.includes("commit")) return "Committing";
+		if (lowerLine.includes("staging")) return "Staging";
+
+		return null;
+	}
+
+	protected processCliResult(
+		stdout: string,
+		stderr: string,
+		exitCode: number,
+		_workDir: string,
+	): AIResult {
 		const output = stdout + stderr;
 
-		// Check for errors
+		// Parse OpenCode JSON format
+		const { response, inputTokens, outputTokens, cost, sessionId } = this.parseOutput(output);
+
+		// Check for errors first
 		const error = checkForErrors(output);
 		if (error) {
 			return {
@@ -47,12 +306,10 @@ export class OpenCodeEngine extends BaseAIEngine {
 				inputTokens: 0,
 				outputTokens: 0,
 				error,
+				sessionId,
 			};
 		}
 
-		// Parse OpenCode JSON format
-		const { response, inputTokens, outputTokens, cost } = this.parseOutput(output);
-
 		// If command failed with non-zero exit code, provide a meaningful error
 		if (exitCode !== 0) {
 			return {
@@ -61,6 +318,7 @@ export class OpenCodeEngine extends BaseAIEngine {
 				inputTokens,
 				outputTokens,
 				error: formatCommandError(exitCode, output),
+				sessionId,
 			};
 		}
 
@@ -70,52 +328,7 @@ export class OpenCodeEngine extends BaseAIEngine {
 			inputTokens,
 			outputTokens,
 			cost,
+			sessionId,
 		};
 	}
-
-	private parseOutput(output: string): {
-		response: string;
-		inputTokens: number;
-		outputTokens: number;
-		cost?: string;
-	} {
-		const lines = output.split("\n").filter(Boolean);
-		let response = "";
-		let inputTokens = 0;
-		let outputTokens = 0;
-		let cost: string | undefined;
-
-		// Find step_finish for token counts
-		for (const line of lines) {
-			try {
-				const parsed = JSON.parse(line);
-				if (parsed.type === "step_finish") {
-					inputTokens = parsed.part?.tokens?.input || 0;
-					outputTokens = parsed.part?.tokens?.output || 0;
-					if (parsed.part?.cost) {
-						cost = String(parsed.part.cost);
-					}
-				}
-			} catch {
-				// Ignore non-JSON lines
-			}
-		}
-
-		// Get text response from text events
-		const textParts: string[] = [];
-		for (const line of lines) {
-			try {
-				const parsed = JSON.parse(line);
-				if (parsed.type === "text" && parsed.part?.text) {
-					textParts.push(parsed.part.text);
-				}
-			} catch {
-				// Ignore non-JSON lines
-			}
-		}
-
-		response = textParts.join("") || "Task completed";
-
-		return { response, inputTokens, outputTokens, cost };
-	}
 }
diff --git a/cli/src/engines/parsers.ts b/cli/src/engines/parsers.ts
new file mode 100644
index 00000000..db906d3b
--- /dev/null
+++ b/cli/src/engines/parsers.ts
@@ -0,0 +1,360 @@
+import type { z } from "zod";
+import { ErrorSchema, StepFinishSchema, parseJsonLine } from "../utils/json-validation.ts";
+import type { AIResult } from "./types.ts";
+
+/**
+ * Parsed result with token counts
+ */
+export interface ParsedResult {
+	response: string;
+	inputTokens: number;
+	outputTokens: number;
+}
+
+/**
+ * Token counts
+ */
+export interface TokenCounts {
+	input: number;
+	output: number;
+}
+
+function isAuthenticationMessage(message: string): boolean {
+	const lower = message.toLowerCase();
+	return (
+		lower.includes("invalid api key") ||
+		lower.includes("authentication") ||
+		lower.includes("not authenticated") ||
+		lower.includes("unauthorized") ||
+		lower.includes("/login")
+	);
+}
+
+export function extractAuthenticationError(output: string): string | null {
+	const lines = output
+		.split("\n")
+		.map((line) => line.trim())
+		.filter(Boolean);
+
+	for (const line of lines) {
+		let event: Record<string, unknown> | null = null;
+		let rawEvent: Record<string, unknown> | null = null;
+
+		if (line.startsWith("{")) {
+			try {
+				const parsed = JSON.parse(line);
+				if (parsed && typeof parsed === "object") {
+					rawEvent = parsed as Record<string, unknown>;
+				}
+			} catch {
+				// ignore malformed JSON lines
+			}
+		}
+
+		const parsedLine = parseJsonLine(line);
+		if (parsedLine?.event && typeof parsedLine.event === "object") {
+			event = parsedLine.event as Record<string, unknown>;
+		} else if (rawEvent) {
+			event = rawEvent;
+		}
+
+		if (!event) continue;
+
+		const type = typeof event.type === "string" ? event.type : "";
+
+		if (type === "error") {
+			const errorObj = event.error;
+			const errorMessage =
+				errorObj &&
+				typeof errorObj === "object" &&
+				typeof (errorObj as { message?: unknown }).message === "string"
+					? (errorObj as { message: string }).message
+					: typeof event.message === "string"
+						? event.message
+						: "";
+			if (errorMessage && isAuthenticationMessage(errorMessage)) return errorMessage;
+		}
+
+		const source = rawEvent || event;
+		if (
+			type === "result" &&
+			source.is_error === true &&
+			typeof source.result === "string" &&
+			isAuthenticationMessage(source.result)
+		) {
+			return source.result;
+		}
+
+		if (type === "assistant") {
+			const messageObj = event.message;
+			const hasAuthError =
+				event.error === "authentication_failed" ||
+				(messageObj &&
+					typeof messageObj === "object" &&
+					(messageObj as { error?: unknown }).error === "authentication_failed");
+			if (!hasAuthError) continue;
+
+			if (messageObj && typeof messageObj === "object") {
+				const content = (messageObj as { content?: unknown }).content;
+				if (Array.isArray(content)) {
+					for (const item of content) {
+						if (!item || typeof item !== "object") continue;
+						const text = (item as { text?: unknown }).text;
+						if (typeof text === "string" && isAuthenticationMessage(text)) return text;
+					}
+				}
+			}
+		}
+	}
+
+	return null;
+}
+
+/**
+ * Check for errors in stream-json output or general CLI output.
+ */
+export function checkForErrors(output: string): string | null {
+	const lines = output.split("\n").filter(Boolean);
+
+	for (const line of lines) {
+		const trimmed = line.trim();
+		// Try JSON parsing with schema validation
+		if (trimmed.startsWith("{")) {
+			const parsed = parseJsonLine(line);
+			if (parsed && ErrorSchema.safeParse(parsed.event).success) {
+				const errorData = parsed.event as z.infer<typeof ErrorSchema>;
+				return errorData.error?.message || errorData.message || "Unknown error";
+			}
+		}
+
+		// Look for common error patterns in plain text (case-insensitive)
+		const lowerTrimmed = trimmed.toLowerCase();
+		const hasExplicitErrorPrefix = lowerTrimmed.startsWith("fatal:");
+		const hasKnownModelErrorToken =
+			lowerTrimmed.includes("providermodelnotfounderror") ||
+			lowerTrimmed.includes("modelnotfounderror") ||
+			(lowerTrimmed.includes("model not found") && lowerTrimmed.includes("error")) ||
+			(lowerTrimmed.includes("invalid model") && lowerTrimmed.includes("error"));
+
+		if (hasExplicitErrorPrefix || hasKnownModelErrorToken) {
+			// Improve specific error messages
+			if (lowerTrimmed.includes("rate limit")) {
+				return "OpenCode Rate Limit: Too many requests. Try: Wait 30-60s";
+			}
+			if (lowerTrimmed.includes("quota")) {
+				return "OpenCode Quota Exceeded: You've reached your usage limit. Check your OpenCode plan";
+			}
+			if (lowerTrimmed.includes("connection") || lowerTrimmed.includes("timeout")) {
+				return "OpenCode Connection Error: Unable to connect to the service. Check internet connection";
+			}
+			return trimmed;
+		}
+	}
+
+	// Secondary check for common fatal strings
+	const fatalTerms = ["Permission denied", "command not found"];
+	const fatalLine = output.split("\n").find((line) => {
+		const trimmedLine = line.trim();
+		const lowerLine = trimmedLine.toLowerCase();
+		const hasFatalPrefix = /^(fatal:|error:|err:|\/bin\/sh:|sh:|bash:|zsh:)/i.test(trimmedLine);
+		return (
+			(hasFatalPrefix && fatalTerms.some((term) => lowerLine.includes(term.toLowerCase()))) ||
+			lowerLine.includes("providermodelnotfounderror")
+		);
+	});
+	if (fatalLine) {
+		return fatalLine.trim() || "Access or command error";
+	}
+
+	return null;
+}
+
+/**
+ * Format a command failure with useful output context.
+ */
+export function formatCommandError(exitCode: number, output: string): string {
+	const trimmed = output.trim();
+	if (!trimmed) {
+		return `Command failed with exit code ${exitCode}`;
+	}
+
+	// Try to find a meaningful error message first
+	const authError = extractAuthenticationError(output);
+	if (authError) {
+		return authError;
+	}
+
+	const extractedError = checkForErrors(output);
+	if (extractedError) {
+		return `Command failed with exit code ${exitCode}. Output:\n${extractedError}`;
+	}
+
+	const lines = trimmed.split("\n").filter(Boolean);
+	const snippet = lines.slice(-12).join("\n");
+	return `Command failed with exit code ${exitCode}. Output:\n${snippet}`;
+}
+
+/**
+ * Parse JSON result from AI output
+ */
+export function parseStreamJsonResult(output: string): ParsedResult {
+	const lines = output.split("\n").filter(Boolean);
+	let response = "";
+	let inputTokens = 0;
+	let outputTokens = 0;
+
+	for (const line of lines) {
+		try {
+			const parsed = JSON.parse(line);
+			if (parsed.type === "result") {
+				response = parsed.result || "Task completed";
+				inputTokens = parsed.usage?.input_tokens || 0;
+				outputTokens = parsed.usage?.output_tokens || 0;
+			}
+		} catch {
+			// Ignore non-JSON lines
+		}
+	}
+
+	return { response: response || "Task completed", inputTokens, outputTokens };
+}
+
+/**
+ * Extract token counts from JSON response
+ */
+export function extractTokenCounts(output: string): TokenCounts | null {
+	const lines = output.split("\n").filter(Boolean);
+	for (const line of lines) {
+		if (line.trim().startsWith("{")) {
+			const parsed = parseJsonLine(line);
+			if (!parsed) continue;
+
+			const stepFinishResult = StepFinishSchema.safeParse(parsed.event);
+			if (stepFinishResult.success) {
+				const stepFinish = stepFinishResult.data;
+				const tokens = stepFinish.part?.tokens || stepFinish.tokens;
+				if (tokens) {
+					return {
+						input: tokens.input || 0,
+						output: tokens.output || 0,
+					};
+				}
+			}
+		}
+	}
+	return null;
+}
+
+/**
+ * Detect step from AI output line
+ */
+export function detectStepFromOutput(line: string, logThoughts = true): string | null {
+	const trimmed = line.trim();
+
+	// Skip empty lines and obvious non-step lines
+	if (!trimmed || trimmed.startsWith("```") || trimmed.startsWith("$") || trimmed.startsWith("{")) {
+		return null;
+	}
+
+	// Skip markdown headings and list bullets - they often produce noisy false positives
+	if (/^#{1,6}\s+/.test(trimmed) || /^[-*+]\s+/.test(trimmed)) {
+		return null;
+	}
+
+	// Check for natural language patterns that indicate a step
+	const stepPatterns = [
+		// Action-oriented phrases
+		/^(I will|I'll|Let me|Now I'll|Next I'll|First I'll|Then I'll|Finally I'll)\s+/i,
+		// Status indicators
+		/^(Step\s+\d+|Phase\s+\d+|Stage\s+\d+):?/i,
+		// Progress markers
+		/^(Working on|Starting|Proceeding with|Moving to|Switching to)\s+/i,
+		// Analysis phrases
+		/^(Analyzing|Examining|Reviewing|Checking|Investigating)\s+/i,
+		// Action verbs at start
+		/^(Reading|Writing|Editing|Creating|Deleting|Moving|Renaming|Running|Testing|Building|Searching|Finding|Getting|Setting|Updating|Modifying|Implementing|Adding|Removing|Fixing|Refactoring|Optimizing|Converting|Generating|Validating|Formatting|Organizing|Preparing|Executing|Completing)\s+/i,
+		// Special markers
+		/^\[(READ|WRITE|EDIT|CREATE|DELETE|RUN|TEST|BUILD|SEARCH|ANALYZE|FIX|REFACTOR)\]/i,
+	];
+
+	for (const pattern of stepPatterns) {
+		const match = trimmed.match(pattern);
+		if (match) {
+			// Avoid generic assistant chatter being treated as an execution step
+			if (/^(i can|i think|i need|it looks like|we should)\b/i.test(trimmed)) {
+				return null;
+			}
+
+			// Extract the full action description (first sentence or up to 100 chars)
+			let step = trimmed;
+			const sentenceEnd = step.match(/[.!?](?:\s|$)/);
+			if (sentenceEnd?.index && sentenceEnd.index < 100) {
+				step = step.slice(0, sentenceEnd.index + 1);
+			} else if (step.length > 100) {
+				step = `${step.slice(0, 97)}...`;
+			}
+
+			// Don't log thoughts if disabled
+			if (!logThoughts && /^(?:(?:Let me|I'll|I will) think|thinking|analyzing the)/i.test(step)) {
+				return null;
+			}
+
+			return step;
+		}
+	}
+
+	return null;
+}
+
+/**
+ * Extract the most meaningful line from output for display
+ */
+export function extractMeaningfulLine(output: string): string | null {
+	const lines = output.split("\n").filter((line) => line.trim());
+
+	for (const line of lines) {
+		const step = detectStepFromOutput(line, false);
+		if (step) {
+			return step;
+		}
+	}
+
+	return lines[0] || null;
+}
+
+/**
+ * Create error result from exit code and output
+ */
+export function createErrorResult(
+	exitCode: number,
+	output: string,
+	response = "",
+	inputTokens = 0,
+	outputTokens = 0,
+): AIResult {
+	return {
+		success: false,
+		response,
+		inputTokens,
+		outputTokens,
+		error: formatCommandError(exitCode, output),
+	};
+}
+
+/**
+ * Create success result with response and token counts
+ */
+export function createSuccessResult(
+	response: string,
+	inputTokens: number,
+	outputTokens: number,
+	extra?: Record<string, unknown>,
+): AIResult {
+	return {
+		success: true,
+		response,
+		inputTokens,
+		outputTokens,
+		...extra,
+	};
+}
diff --git a/cli/src/engines/qwen.ts b/cli/src/engines/qwen.ts
index a0ddeef8..4162e853 100644
--- a/cli/src/engines/qwen.ts
+++ b/cli/src/engines/qwen.ts
@@ -1,15 +1,6 @@
-import {
-	BaseAIEngine,
-	checkForErrors,
-	detectStepFromOutput,
-	execCommand,
-	execCommandStreaming,
-	formatCommandError,
-	parseStreamJsonResult,
-} from "./base.ts";
-import type { AIResult, EngineOptions, ProgressCallback } from "./types.ts";
-
-const isWindows = process.platform === "win32";
+import { BaseAIEngine, checkForErrors } from "./base.ts";
+import { createErrorResult, createSuccessResult, parseStreamJsonResult } from "./parsers.ts";
+import type { AIResult, EngineOptions } from "./types.ts";
 
 /**
  * Qwen-Code AI Engine
@@ -18,145 +9,33 @@ export class QwenEngine extends BaseAIEngine {
 	name = "Qwen-Code";
 	cliCommand = "qwen";
 
-	async execute(prompt: string, workDir: string, options?: EngineOptions): Promise<AIResult> {
+	protected buildArgs(_prompt: string, _workDir: string, options?: EngineOptions): string[] {
 		const args = ["--output-format", "stream-json", "--approval-mode", "yolo"];
 		if (options?.modelOverride) {
 			args.push("--model", options.modelOverride);
 		}
-		// Add any additional engine-specific arguments
-		if (options?.engineArgs && options.engineArgs.length > 0) {
+		if (options?.engineArgs) {
 			args.push(...options.engineArgs);
 		}
-
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			args.push("-p");
-			stdinContent = prompt;
-		} else {
-			args.push("-p", prompt);
-		}
-
-		const { stdout, stderr, exitCode } = await execCommand(
-			this.cliCommand,
-			args,
-			workDir,
-			undefined,
-			stdinContent,
-		);
-
-		const output = stdout + stderr;
-
-		// Check for errors
-		const error = checkForErrors(output);
-		if (error) {
-			return {
-				success: false,
-				response: "",
-				inputTokens: 0,
-				outputTokens: 0,
-				error,
-			};
-		}
-
-		// Parse result (same format as Claude)
-		const { response, inputTokens, outputTokens } = parseStreamJsonResult(output);
-
-		// If command failed with non-zero exit code, provide a meaningful error
-		if (exitCode !== 0) {
-			return {
-				success: false,
-				response,
-				inputTokens,
-				outputTokens,
-				error: formatCommandError(exitCode, output),
-			};
-		}
-
-		return {
-			success: true,
-			response,
-			inputTokens,
-			outputTokens,
-		};
+		// Note: The prompt is passed via stdin by the base engine for cross-platform compatibility
+		// The -p flag tells Qwen to read from stdin
+		args.push("-p");
+		return args;
 	}
 
-	async executeStreaming(
-		prompt: string,
-		workDir: string,
-		onProgress: ProgressCallback,
-		options?: EngineOptions,
-	): Promise<AIResult> {
-		const args = ["--output-format", "stream-json", "--approval-mode", "yolo"];
-		if (options?.modelOverride) {
-			args.push("--model", options.modelOverride);
-		}
-		// Add any additional engine-specific arguments
-		if (options?.engineArgs && options.engineArgs.length > 0) {
-			args.push(...options.engineArgs);
-		}
-
-		// On Windows, pass prompt via stdin to avoid cmd.exe argument parsing issues with multi-line content
-		let stdinContent: string | undefined;
-		if (isWindows) {
-			args.push("-p");
-			stdinContent = prompt;
-		} else {
-			args.push("-p", prompt);
-		}
-
-		const outputLines: string[] = [];
-
-		const { exitCode } = await execCommandStreaming(
-			this.cliCommand,
-			args,
-			workDir,
-			(line) => {
-				outputLines.push(line);
-
-				// Detect and report step changes
-				const step = detectStepFromOutput(line);
-				if (step) {
-					onProgress(step);
-				}
-			},
-			undefined,
-			stdinContent,
-		);
-
-		const output = outputLines.join("\n");
-
-		// Check for errors
+	protected processCliResult(stdout: string, stderr: string, exitCode: number): AIResult {
+		const output = stdout + stderr;
 		const error = checkForErrors(output);
 		if (error) {
-			return {
-				success: false,
-				response: "",
-				inputTokens: 0,
-				outputTokens: 0,
-				error,
-			};
+			return { success: false, response: "", inputTokens: 0, outputTokens: 0, error };
 		}
 
-		// Parse result (same format as Claude)
 		const { response, inputTokens, outputTokens } = parseStreamJsonResult(output);
 
-		// If command failed with non-zero exit code, provide a meaningful error
 		if (exitCode !== 0) {
-			return {
-				success: false,
-				response,
-				inputTokens,
-				outputTokens,
-				error: formatCommandError(exitCode, output),
-			};
+			return createErrorResult(exitCode, output, response, inputTokens, outputTokens);
 		}
 
-		return {
-			success: true,
-			response,
-			inputTokens,
-			outputTokens,
-		};
+		return createSuccessResult(response, inputTokens, outputTokens);
 	}
 }
diff --git a/cli/src/engines/types.ts b/cli/src/engines/types.ts
index 6e9f9c8f..f3bb342f 100644
--- a/cli/src/engines/types.ts
+++ b/cli/src/engines/types.ts
@@ -9,6 +9,8 @@ export interface AIResult {
 	/** Actual cost in dollars (if provided by engine) or duration in ms */
 	cost?: string;
 	error?: string;
+	/** Session ID if the engine supports it (like OpenCode) */
+	sessionId?: string;
 }
 
 /**
@@ -19,6 +21,18 @@ export interface EngineOptions {
 	modelOverride?: string;
 	/** Additional arguments to pass to the engine CLI */
 	engineArgs?: string[];
+	/** Additional environment variables for the engine CLI */
+	env?: Record<string, string>;
+	/** Enable comprehensive OpenCode debugging */
+	debugOpenCode?: boolean;
+	/** Allow OpenCode to access sandbox directories without permission prompts */
+	allowOpenCodeSandboxAccess?: boolean;
+	/** General debug flag */
+	debug?: boolean;
+	/** Whether this is a dry run (no actual AI execution) */
+	dryRun?: boolean;
+	/** Log AI thoughts/reasoning to console */
+	logThoughts?: boolean;
 }
 
 /**
@@ -26,6 +40,19 @@ export interface EngineOptions {
  */
 export type ProgressCallback = (step: string) => void;
 
+/**
+ * Process reference type for child processes
+ * Compatible with both Bun and Node.js child processes
+ * BUG FIX: Use proper union type instead of any for type safety
+ */
+export type ChildProcess =
+	| (Bun.Subprocess & { kill: (signal?: string) => void; pid: number; exited: Promise<number> })
+	| (import("node:child_process").ChildProcess & {
+			kill: (signal?: string) => void;
+			pid: number;
+			exited?: Promise<number>;
+	  });
+
 /**
  * AI Engine interface - one per AI tool
  */
diff --git a/cli/src/engines/validation.ts b/cli/src/engines/validation.ts
new file mode 100644
index 00000000..6219ae96
--- /dev/null
+++ b/cli/src/engines/validation.ts
@@ -0,0 +1,178 @@
+import { logDebug } from "../ui/logger.ts";
+
+// Check platform
+const isWindows = process.platform === "win32";
+const DEBUG = process.env.RALPHY_DEBUG === "true";
+
+/**
+ * Maximum lengths to prevent DoS attacks
+ */
+const MAX_COMMAND_LENGTH = 1000;
+const MAX_ARG_LENGTH = 10000;
+const MAX_TOTAL_ARGS_LENGTH = 100000;
+const MAX_ARG_COUNT = 1000;
+
+function debugLog(...args: unknown[]): void {
+	if (DEBUG || (globalThis as { verboseMode?: boolean }).verboseMode === true) {
+		logDebug(args.map((a) => String(a)).join(" "));
+	}
+}
+
+/**
+ * Validate command name to prevent command injection
+ * Only allows alphanumeric characters, hyphens, underscores, and dots
+ * Also allows forward slashes for path-based commands (e.g., ./node_modules/.bin/cli)
+ */
+function tokenizeCommand(command: string): string[] {
+	const tokens: string[] = [];
+	const regex = /[^\s"']+|"([^"]*)"|'([^']*)'/g;
+	let match = regex.exec(command);
+	while (match !== null) {
+		tokens.push(match[1] ?? match[2] ?? match[0]);
+		match = regex.exec(command);
+	}
+
+	return tokens;
+}
+
+export function validateCommand(command: string): string | null {
+	const trimmedCommand = command.trim();
+	if (!trimmedCommand) {
+		debugLog("Command validation failed: command is empty");
+		return null;
+	}
+
+	// Check command length to prevent DoS
+	if (trimmedCommand.length > MAX_COMMAND_LENGTH) {
+		debugLog(
+			`Command validation failed: command too long (${trimmedCommand.length} > ${MAX_COMMAND_LENGTH})`,
+		);
+		return null;
+	}
+
+	// Block shell metacharacters and dangerous patterns
+	const dangerousPatterns = [
+		/[;&|`$]/, // Shell metacharacters
+		/\$\{/, // Variable expansion
+		/\$\(/, // Command substitution
+		/`/, // Backtick substitution
+		/\|\|/, // OR operator
+		/&&/, // AND operator
+		/[<>]/, // Redirection
+	];
+
+	for (const pattern of dangerousPatterns) {
+		if (pattern.test(trimmedCommand)) {
+			debugLog(`Command validation failed: dangerous pattern detected in "${trimmedCommand}"`);
+			return null;
+		}
+	}
+
+	const tokens = tokenizeCommand(trimmedCommand);
+	if (tokens.length === 0) {
+		debugLog("Command validation failed: no command token found");
+		return null;
+	}
+
+	const [commandToken, ...args] = tokens;
+
+	// Allow executable characters: alphanumeric, hyphen, underscore, dot, slashes.
+	// Windows also needs drive-letter colon support (e.g., C:\tools\bun.exe).
+	const validCommandPattern = isWindows ? /^[a-zA-Z0-9._\-\\/:]+$/ : /^[a-zA-Z0-9._\-/]+$/;
+
+	if (!validCommandPattern.test(commandToken)) {
+		debugLog(`Command validation failed: invalid command token "${commandToken}"`);
+		return null;
+	}
+
+	if (args.length > 0 && !validateArgs(args)) {
+		debugLog(`Command validation failed: invalid args in "${trimmedCommand}"`);
+		return null;
+	}
+
+	return trimmedCommand;
+}
+
+/**
+ * Validate command arguments to prevent injection
+ * Returns null if any argument contains dangerous patterns
+ */
+export function validateArgs(args: string[]): string[] | null {
+	// Check argument count to prevent DoS
+	if (args.length > MAX_ARG_COUNT) {
+		debugLog(`Argument validation failed: too many arguments (${args.length} > ${MAX_ARG_COUNT})`);
+		return null;
+	}
+
+	// Check total arguments length
+	const totalLength = args.reduce((sum, arg) => sum + arg.length, 0);
+	if (totalLength > MAX_TOTAL_ARGS_LENGTH) {
+		debugLog(
+			`Argument validation failed: total arguments too long (${totalLength} > ${MAX_TOTAL_ARGS_LENGTH})`,
+		);
+		return null;
+	}
+
+	const dangerousPatterns = [
+		/[;&|`]/, // Shell metacharacters
+		/\$\{/, // Variable expansion
+		/\$\(/, // Command substitution
+		/`/, // Backtick substitution
+		/\|\|/, // OR operator
+		/&&/, // AND operator
+	];
+
+	for (const arg of args) {
+		// Check individual argument length
+		if (arg.length > MAX_ARG_LENGTH) {
+			debugLog(`Argument validation failed: argument too long (${arg.length} > ${MAX_ARG_LENGTH})`);
+			return null;
+		}
+
+		for (const pattern of dangerousPatterns) {
+			if (pattern.test(arg)) {
+				debugLog(`Argument validation failed: dangerous pattern in "${arg}"`);
+				return null;
+			}
+		}
+	}
+
+	return args;
+}
+
+/**
+ * Validation result type
+ */
+export interface ValidationResult {
+	valid: boolean;
+	command?: string;
+	args?: string[];
+	error?: string;
+}
+
+/**
+ * Validate both command and arguments in one call
+ */
+export function validateCommandAndArgs(command: string, args: string[]): ValidationResult {
+	const validatedCommand = validateCommand(command);
+	if (!validatedCommand) {
+		return {
+			valid: false,
+			error: "Invalid command - potential command injection detected",
+		};
+	}
+
+	const validatedArgs = validateArgs(args);
+	if (!validatedArgs) {
+		return {
+			valid: false,
+			error: "Invalid arguments - potential command injection detected",
+		};
+	}
+
+	return {
+		valid: true,
+		command: validatedCommand,
+		args: validatedArgs,
+	};
+}
diff --git a/cli/src/execution/parallel.ts b/cli/src/execution/parallel.ts
index 5318088e..765618a7 100644
--- a/cli/src/execution/parallel.ts
+++ b/cli/src/execution/parallel.ts
@@ -1,5 +1,5 @@
 import { copyFileSync, cpSync, existsSync, mkdirSync } from "node:fs";
-import { join } from "node:path";
+import { dirname, join, resolve, sep } from "node:path";
 import simpleGit from "simple-git";
 import { PROGRESS_FILE, RALPHY_DIR } from "../config/loader.ts";
 import { logTaskProgress } from "../config/writer.ts";
@@ -41,6 +41,23 @@ interface ParallelAgentResult {
 	usedSandbox?: boolean;
 }
 
+function resolveRelativePathInside(baseDir: string, relativePath: string): string | null {
+	if (!relativePath || relativePath.startsWith("/") || relativePath.startsWith("\\")) {
+		return null;
+	}
+	if (/^[a-zA-Z]:/.test(relativePath)) {
+		return null;
+	}
+
+	const resolvedBase = resolve(baseDir);
+	const resolvedPath = resolve(baseDir, relativePath);
+	if (resolvedPath !== resolvedBase && !resolvedPath.startsWith(`${resolvedBase}${sep}`)) {
+		return null;
+	}
+
+	return resolvedPath;
+}
+
 /**
  * Run a single agent in a worktree
  */
@@ -80,17 +97,28 @@ async function runAgentInWorktree(
 		logDebug(`Agent ${agentNum}: Created worktree at ${worktreeDir}`);
 
 		// Copy PRD file or folder to worktree
-		if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(worktreeDir, prdFile);
-			if (existsSync(srcPath)) {
+		if (
+			prdSource === "markdown" ||
+			prdSource === "yaml" ||
+			prdSource === "json" ||
+			prdSource === "csv"
+		) {
+			const srcPath = resolveRelativePathInside(originalDir, prdFile);
+			const destPath = resolveRelativePathInside(worktreeDir, prdFile);
+			if (srcPath && destPath && existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				copyFileSync(srcPath, destPath);
+			} else if (!srcPath || !destPath) {
+				throw new Error(`Invalid PRD path: ${prdFile}`);
 			}
 		} else if (prdSource === "markdown-folder" && prdIsFolder) {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(worktreeDir, prdFile);
-			if (existsSync(srcPath)) {
+			const srcPath = resolveRelativePathInside(originalDir, prdFile);
+			const destPath = resolveRelativePathInside(worktreeDir, prdFile);
+			if (srcPath && destPath && existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				cpSync(srcPath, destPath, { recursive: true });
+			} else if (!srcPath || !destPath) {
+				throw new Error(`Invalid PRD path: ${prdFile}`);
 			}
 		}
 
@@ -173,17 +201,28 @@ async function runAgentInSandbox(
 		);
 
 		// Copy PRD file or folder to sandbox (same as worktree mode)
-		if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(sandboxDir, prdFile);
-			if (existsSync(srcPath)) {
+		if (
+			prdSource === "markdown" ||
+			prdSource === "yaml" ||
+			prdSource === "json" ||
+			prdSource === "csv"
+		) {
+			const srcPath = resolveRelativePathInside(originalDir, prdFile);
+			const destPath = resolveRelativePathInside(sandboxDir, prdFile);
+			if (srcPath && destPath && existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				copyFileSync(srcPath, destPath);
+			} else if (!srcPath || !destPath) {
+				throw new Error(`Invalid PRD path: ${prdFile}`);
 			}
 		} else if (prdSource === "markdown-folder" && prdIsFolder) {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(sandboxDir, prdFile);
-			if (existsSync(srcPath)) {
+			const srcPath = resolveRelativePathInside(originalDir, prdFile);
+			const destPath = resolveRelativePathInside(sandboxDir, prdFile);
+			if (srcPath && destPath && existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				cpSync(srcPath, destPath, { recursive: true });
+			} else if (!srcPath || !destPath) {
+				throw new Error(`Invalid PRD path: ${prdFile}`);
 			}
 		}
 
@@ -381,12 +420,13 @@ export async function runParallel(
 		// Run agents in parallel (using sandbox or worktree mode)
 		const promises = batch.map((task) => {
 			globalAgentNum++;
+			const agentId = globalAgentNum;
 
 			const runInSandbox = () =>
 				runAgentInSandbox(
 					engine,
 					task,
-					globalAgentNum,
+					agentId,
 					getSandboxBase(workDir),
 					workDir,
 					prdSource,
@@ -408,7 +448,7 @@ export async function runParallel(
 			return runAgentInWorktree(
 				engine,
 				task,
-				globalAgentNum,
+				agentId,
 				baseBranch,
 				isolationBase,
 				workDir,
@@ -424,7 +464,7 @@ export async function runParallel(
 				engineArgs,
 			).then((res) => {
 				if (shouldFallbackToSandbox(res.error)) {
-					logWarn(`Agent ${globalAgentNum}: Worktree unavailable, retrying in sandbox mode.`);
+					logWarn(`Agent ${agentId}: Worktree unavailable, retrying in sandbox mode.`);
 					if (res.worktreeDir) {
 						cleanupAgentWorktree(res.worktreeDir, res.branchName, workDir).catch(() => {
 							// Ignore cleanup failures during fallback
diff --git a/cli/src/execution/retry.ts b/cli/src/execution/retry.ts
index 9eb4f293..a9d7707c 100644
--- a/cli/src/execution/retry.ts
+++ b/cli/src/execution/retry.ts
@@ -34,7 +34,7 @@ export function calculateBackoffDelay(
 	useJitter: boolean,
 ): number {
 	// Exponential backoff: baseDelay * 2^(attempt-1)
-	let delay = baseDelayMs * Math.pow(2, attempt - 1);
+	let delay = baseDelayMs * 2 ** (attempt - 1);
 
 	// Cap at maximum delay
 	delay = Math.min(delay, maxDelayMs);
diff --git a/cli/src/execution/sandbox-git.ts b/cli/src/execution/sandbox-git.ts
index 45ceb1e9..b4ee4a85 100644
--- a/cli/src/execution/sandbox-git.ts
+++ b/cli/src/execution/sandbox-git.ts
@@ -12,7 +12,7 @@ class GitMutex {
 	private queue: Promise<void> = Promise.resolve();
 
 	async acquire<T>(fn: () => Promise<T>): Promise<T> {
-		let release: () => void;
+		let release: (() => void) | undefined;
 		const next = new Promise<void>((resolve) => {
 			release = resolve;
 		});
@@ -22,7 +22,7 @@ class GitMutex {
 		try {
 			return await fn();
 		} finally {
-			release!();
+			release?.();
 		}
 	}
 }
diff --git a/cli/src/execution/sandbox.ts b/cli/src/execution/sandbox.ts
index d27e76c9..00a9252e 100644
--- a/cli/src/execution/sandbox.ts
+++ b/cli/src/execution/sandbox.ts
@@ -32,21 +32,27 @@ export async function rmRF(path: string): Promise<void> {
 			// Using force: true and recursive: true is standard
 			rmSync(path, { recursive: true, force: true });
 			return;
-		} catch (err: any) {
-			const isLockError = err.code === "EBUSY" || err.code === "EPERM" || err.code === "ENOTEMPTY";
+		} catch (err: unknown) {
+			const errorCode =
+				typeof err === "object" && err !== null && "code" in err
+					? String((err as { code?: string }).code)
+					: "";
+			const isLockError =
+				errorCode === "EBUSY" || errorCode === "EPERM" || errorCode === "ENOTEMPTY";
 
 			if (isLockError && i < retries - 1) {
 				// Wait with exponential backoff: 500, 1000, 2000, 4000...
-				const delay = 500 * Math.pow(2, i);
+				const delay = 500 * 2 ** i;
 				await new Promise((resolve) => setTimeout(resolve, delay));
 				continue;
 			}
 
 			// On final failure for lock errors, log warning and swallow.
 			// For non-lock errors (any time), throw immediately.
+			const errorMessage = err instanceof Error ? err.message : String(err);
 			if (isLockError && i === retries - 1) {
 				logWarn(
-					`Failed to clean up ${path} after ${retries} attempts: ${err.message}. This may be due to a file lock. Proceeding anyway.`,
+					`Failed to clean up ${path} after ${retries} attempts: ${errorMessage}. This may be due to a file lock. Proceeding anyway.`,
 				);
 			} else {
 				throw err;
@@ -252,7 +258,6 @@ export function verifySandboxIsolation(sandboxDir: string, symlinkDirs: string[]
 				const stat = lstatSync(sandboxPath);
 				if (stat.isSymbolicLink()) {
 					// Good - it's a symlink
-					continue;
 				}
 			} catch {
 				// Error checking - assume not isolated
diff --git a/cli/src/telemetry/collector.ts b/cli/src/telemetry/collector.ts
index 37b38fe9..1d33a037 100644
--- a/cli/src/telemetry/collector.ts
+++ b/cli/src/telemetry/collector.ts
@@ -18,6 +18,43 @@ import type {
 // Package version (loaded lazily)
 let cachedVersion: string | undefined;
 
+function sanitizeSecrets(input: string): string {
+	const patterns = [
+		{ regex: /sk-[a-zA-Z0-9]{48}/g, replacement: "[API_KEY_REDACTED]" },
+		{ regex: /sk-ant-[a-zA-Z0-9_-]{16,256}/g, replacement: "[ANTHROPIC_KEY_REDACTED]" },
+		{ regex: /ghp_[a-zA-Z0-9]{36}/g, replacement: "[GITHUB_TOKEN_REDACTED]" },
+		{ regex: /gho_[a-zA-Z0-9]{52}/g, replacement: "[GITHUB_OAUTH_REDACTED]" },
+		{ regex: /AKIA[0-9A-Z]{16}/g, replacement: "[AWS_KEY_REDACTED]" },
+		{ regex: /\b[0-9a-f]{64}\b/g, replacement: "[HEX_SECRET_REDACTED]" },
+	];
+
+	let result = input;
+	for (const { regex, replacement } of patterns) {
+		result = result.replace(regex, replacement);
+	}
+	return result;
+}
+
+function sanitizeTelemetryValue(value: unknown): unknown {
+	if (typeof value === "string") {
+		return sanitizeSecrets(value);
+	}
+
+	if (Array.isArray(value)) {
+		return value.map((item) => sanitizeTelemetryValue(item));
+	}
+
+	if (value && typeof value === "object") {
+		const sanitized: Record<string, unknown> = {};
+		for (const [key, nested] of Object.entries(value)) {
+			sanitized[key] = sanitizeTelemetryValue(nested);
+		}
+		return sanitized;
+	}
+
+	return value;
+}
+
 function getCliVersion(): string {
 	if (cachedVersion) return cachedVersion;
 	try {
@@ -116,8 +153,8 @@ export class TelemetryCollector {
 
 		// Store prompts/responses for full mode
 		if (this.level === "full") {
-			if (prompt) this.prompts.push(prompt);
-			if (response) this.responses.push(response);
+			if (prompt) this.prompts.push(sanitizeSecrets(prompt));
+			if (response) this.responses.push(sanitizeSecrets(response));
 		}
 	}
 
@@ -131,7 +168,10 @@ export class TelemetryCollector {
 			startTime: Date.now(),
 			toolName,
 			parameterKeys: parameters ? Object.keys(parameters) : undefined,
-			parameters: this.level === "full" ? parameters : undefined,
+			parameters:
+				this.level === "full"
+					? (sanitizeTelemetryValue(parameters) as Record<string, unknown> | undefined)
+					: undefined,
 		};
 
 		// Track file paths in full mode
@@ -164,7 +204,7 @@ export class TelemetryCollector {
 		// Add full mode data
 		if (this.level === "full") {
 			toolCall.parameters = this.activeToolCall.parameters;
-			if (result) toolCall.result = result;
+			if (result) toolCall.result = sanitizeSecrets(result);
 		}
 
 		this.toolCalls.push(toolCall);
@@ -199,8 +239,10 @@ export class TelemetryCollector {
 		};
 
 		if (this.level === "full") {
-			toolCall.parameters = options?.parameters;
-			toolCall.result = options?.result;
+			toolCall.parameters = sanitizeTelemetryValue(options?.parameters) as
+				| Record<string, unknown>
+				| undefined;
+			toolCall.result = options?.result ? sanitizeSecrets(options.result) : undefined;
 
 			// Track file paths
 			if (options?.parameters) {
@@ -303,7 +345,7 @@ export class TelemetryCollector {
 				fullSession.response = this.responses.join("\n\n---\n\n");
 			}
 			if (this.filePaths.size > 0) {
-				fullSession.filePaths = Array.from(this.filePaths);
+				fullSession.filePaths = Array.from(this.filePaths).map((path) => sanitizeSecrets(path));
 			}
 			return { session: fullSession, toolCalls: this.toolCalls };
 		}
diff --git a/cli/src/telemetry/exporter.ts b/cli/src/telemetry/exporter.ts
index c4b4a2be..bef8331c 100644
--- a/cli/src/telemetry/exporter.ts
+++ b/cli/src/telemetry/exporter.ts
@@ -161,7 +161,7 @@ export class TelemetryExporter {
 
 		await this.ensureExportsDir();
 		const filePath = outputPath || join(this.exportsDir, "openai-evals.jsonl");
-		await writeFile(filePath, entries.join("\n") + "\n", "utf-8");
+		await writeFile(filePath, `${entries.join("\n")}\n`, "utf-8");
 
 		return filePath;
 	}
@@ -194,7 +194,7 @@ export class TelemetryExporter {
 
 		await this.ensureExportsDir();
 		const filePath = outputPath || join(this.exportsDir, "raw-telemetry.jsonl");
-		const lines = entries.map((e) => JSON.stringify(e)).join("\n") + "\n";
+		const lines = `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`;
 		await writeFile(filePath, lines, "utf-8");
 
 		return filePath;
diff --git a/cli/src/telemetry/types.ts b/cli/src/telemetry/types.ts
index 41650f3c..11424fa8 100644
--- a/cli/src/telemetry/types.ts
+++ b/cli/src/telemetry/types.ts
@@ -78,6 +78,51 @@ export interface ToolCall {
  */
 export type TelemetryLevel = "anonymous" | "full";
 
+/**
+ * Full session data for webhook
+ */
+export interface WebhookSessionData {
+	sessionId: string;
+	engine: string;
+	mode: string;
+	cliVersion: string;
+	platform: string;
+	totalTokensIn: number;
+	totalTokensOut: number;
+	totalDurationMs: number;
+	taskCount: number;
+	successCount: number;
+	failedCount: number;
+	toolCalls: {
+		toolName: string;
+		callCount: number;
+		successCount: number;
+		failedCount: number;
+		avgDurationMs: number;
+	}[];
+	tags?: string[];
+}
+
+/**
+ * Full session details for webhook (full privacy mode)
+ */
+export interface WebhookSessionDetails {
+	prompt?: string;
+	response?: string;
+	filePaths?: string[];
+}
+
+/**
+ * Telemetry webhook payload
+ */
+export interface TelemetryWebhookPayload {
+	event: string;
+	version: string;
+	timestamp: string;
+	session: WebhookSessionData;
+	details?: WebhookSessionDetails;
+}
+
 /**
  * Telemetry configuration
  */
diff --git a/cli/src/telemetry/webhook.ts b/cli/src/telemetry/webhook.ts
index c305e0e0..6ce965d3 100644
--- a/cli/src/telemetry/webhook.ts
+++ b/cli/src/telemetry/webhook.ts
@@ -76,11 +76,18 @@ export async function sendTelemetryWebhook(
 	}
 
 	const payload = buildPayload(session, level);
+	const controller = new AbortController();
+	const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
+	const safeWebhookTarget = (() => {
+		try {
+			const parsed = new URL(webhookUrl);
+			return `${parsed.protocol}//${parsed.host}`;
+		} catch {
+			return "[invalid-webhook-url]";
+		}
+	})();
 
 	try {
-		const controller = new AbortController();
-		const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
-
 		const response = await fetch(webhookUrl, {
 			method: "POST",
 			headers: {
@@ -90,14 +97,12 @@ export async function sendTelemetryWebhook(
 			signal: controller.signal,
 		});
 
-		clearTimeout(timeoutId);
-
 		if (!response.ok) {
 			const text = await response.text().catch(() => "");
 			throw new Error(`HTTP ${response.status}${text ? `: ${text}` : ""}`);
 		}
 
-		logDebug(`Telemetry webhook sent successfully to ${webhookUrl}`);
+		logDebug(`Telemetry webhook sent successfully to ${safeWebhookTarget}`);
 	} catch (error) {
 		if (error instanceof Error && error.name === "AbortError") {
 			logError("Telemetry webhook timed out after 10 seconds");
@@ -107,5 +112,7 @@ export async function sendTelemetryWebhook(
 			);
 		}
 		// Don't throw - webhook failures shouldn't break the session
+	} finally {
+		clearTimeout(timeoutId);
 	}
 }
diff --git a/cli/src/telemetry/writer.ts b/cli/src/telemetry/writer.ts
index ab64c48f..8b0d9569 100644
--- a/cli/src/telemetry/writer.ts
+++ b/cli/src/telemetry/writer.ts
@@ -7,6 +7,7 @@
 import { existsSync } from "node:fs";
 import { appendFile, mkdir, readFile, readdir } from "node:fs/promises";
 import { dirname, join } from "node:path";
+import { logDebug } from "../ui/logger.ts";
 import type { Session, SessionFull, ToolCall } from "./types.js";
 
 const DEFAULT_OUTPUT_DIR = ".ralphy/telemetry";
@@ -56,7 +57,7 @@ export class TelemetryWriter {
 	async writeSession(session: Session | SessionFull): Promise<void> {
 		await this.ensureDir();
 		const path = join(this.outputDir, SESSIONS_FILE);
-		const line = JSON.stringify(session) + "\n";
+		const line = `${JSON.stringify(session)}\n`;
 		await appendFile(path, line, "utf-8");
 	}
 
@@ -68,7 +69,7 @@ export class TelemetryWriter {
 
 		await this.ensureDir();
 		const path = join(this.outputDir, TOOL_CALLS_FILE);
-		const lines = toolCalls.map((call) => JSON.stringify(call)).join("\n") + "\n";
+		const lines = `${toolCalls.map((call) => JSON.stringify(call)).join("\n")}\n`;
 		await appendFile(path, lines, "utf-8");
 	}
 
@@ -91,8 +92,16 @@ export class TelemetryWriter {
 
 		const content = await readFile(path, "utf-8");
 		const lines = content.trim().split("\n").filter(Boolean);
+		const sessions: Array<Session | SessionFull> = [];
+		for (const line of lines) {
+			try {
+				sessions.push(JSON.parse(line) as Session | SessionFull);
+			} catch (error) {
+				logDebug(`Skipping invalid telemetry session line: ${error}`);
+			}
+		}
 
-		return lines.map((line) => JSON.parse(line) as Session | SessionFull);
+		return sessions;
 	}
 
 	/**
@@ -107,8 +116,16 @@ export class TelemetryWriter {
 
 		const content = await readFile(path, "utf-8");
 		const lines = content.trim().split("\n").filter(Boolean);
+		const toolCalls: ToolCall[] = [];
+		for (const line of lines) {
+			try {
+				toolCalls.push(JSON.parse(line) as ToolCall);
+			} catch (error) {
+				logDebug(`Skipping invalid telemetry tool-call line: ${error}`);
+			}
+		}
 
-		return lines.map((line) => JSON.parse(line) as ToolCall);
+		return toolCalls;
 	}
 
 	/**
diff --git a/cli/src/utils/ai-output-parser.ts b/cli/src/utils/ai-output-parser.ts
new file mode 100644
index 00000000..a5e6b5fc
--- /dev/null
+++ b/cli/src/utils/ai-output-parser.ts
@@ -0,0 +1,155 @@
+import { parseJsonLine, TextSchema, ToolUseSchema } from "./json-validation.ts";
+
+export interface ParsedStep {
+	thought?: string;
+	tool?: string;
+	toolArgs?: string;
+	reading?: string;
+	writing?: string;
+	running?: string;
+	executed?: string;
+	raw?: string;
+}
+
+/**
+ * Parse AI output step and extract structured information
+ */
+export function parseAIStep(step: string): ParsedStep {
+	const parsed: ParsedStep = { raw: step };
+
+	// Try to parse as JSON first
+	const result = parseJsonLine(step);
+	if (result) {
+		const { event, remaining } = result;
+
+		// If there's remaining text, treat it as a thought/progress message
+		if (remaining) {
+			parsed.thought = remaining;
+		}
+
+		// Extract text/response content
+		const textResult = TextSchema.safeParse(event);
+		if (textResult.success) {
+			const textEvent = textResult.data;
+			if (textEvent.part?.text) {
+				const text = textEvent.part.text;
+
+				// Try to categorize based on content patterns
+				if (text.startsWith("[thinking") || text.startsWith("Thinking")) {
+					parsed.thought = text;
+				} else if (text.startsWith("Running") || text.startsWith("Executing")) {
+					parsed.running = text;
+				} else if (text.startsWith("Reading") || text.startsWith("Examining")) {
+					parsed.reading = text;
+				} else if (text.startsWith("Writing") || text.startsWith("Creating") || text.startsWith("Updating")) {
+					parsed.writing = text;
+				} else if (text.startsWith("Executed") || text.startsWith("Finished")) {
+					parsed.executed = text;
+				} else {
+					// Default: if it's text content, treat as general progress
+					parsed.thought = text;
+				}
+			}
+		}
+
+		// Extract tool use content
+		const toolUseResult = ToolUseSchema.safeParse(event);
+		if (toolUseResult.success) {
+			const toolUse = toolUseResult.data;
+			const toolName = toolUse.tool || toolUse.part?.tool || "";
+			const toolInput = toolUse.part?.state?.input;
+
+			if (toolName) {
+				parsed.tool = toolName;
+
+				// Extract meaningful summary of arguments
+				let argSummary = "";
+				if (toolInput) {
+					if (typeof toolInput === "string") {
+						argSummary = toolInput;
+					} else if (typeof toolInput === "object" && toolInput !== null && !Array.isArray(toolInput)) {
+						// Heuristics for common tools
+						const input = toolInput as Record<string, unknown>;
+						argSummary =
+							String(input.pattern || "") ||
+							String(input.path || "") ||
+							String(input.filePath || "") ||
+							String(input.command || "") ||
+							JSON.stringify(toolInput);
+					}
+				}
+
+				parsed.toolArgs = argSummary;
+
+				// Map specific tools to display categories with refined messages
+				const lowerTool = toolName.toLowerCase();
+				const shortArgs = argSummary;
+
+				if (lowerTool === "read") {
+					parsed.reading = `Read: ${shortArgs} `;
+				} else if (lowerTool === "glob") {
+					parsed.reading = `Glob: ${shortArgs} `;
+				} else if (lowerTool === "grep") {
+					parsed.reading = `Grep: ${shortArgs} `;
+				} else if (lowerTool === "ls") {
+					parsed.reading = `List: ${shortArgs} `;
+				} else if (lowerTool === "write" || lowerTool === "edit" || lowerTool === "create") {
+					parsed.writing = `Write: ${shortArgs} `;
+				} else if (lowerTool === "run" || lowerTool === "execute" || lowerTool === "terminal") {
+					parsed.running = `Run: ${shortArgs} `;
+				} else {
+					// Catch-all for other tools
+					parsed.tool = `${toolName}: ${shortArgs} `;
+				}
+			}
+		}
+	}
+
+	return parsed;
+}
+
+/**
+ * Format parsed step for display
+ */
+export function formatParsedStep(step: ParsedStep, agentNum?: number): string | null {
+	const prefix = agentNum !== undefined ? `Agent ${agentNum}: ` : "";
+
+	// Prioritize concrete actions over generic thoughts
+	if (step.writing) {
+		return `${prefix}${step.writing} `;
+	}
+
+	if (step.reading) {
+		return `${prefix}${step.reading} `;
+	}
+
+	if (step.running) {
+		return `${prefix}${step.running} `;
+	}
+
+	if (step.thought) {
+		// Clean up common "Thinking:" prefixes since we wrap in {} later
+		const cleanedThought = step.thought.replace(/^(Thinking|Analyzing|Considering|Warning|Waiting)[:\s]*/i, "").trim();
+		return `${prefix}${cleanedThought} `;
+	}
+
+	if (step.executed) {
+		return `${prefix} Done: ${step.executed.substring(0, 100)} `;
+	}
+
+	if (step.tool) {
+		return `${prefix} Tool: ${step.tool} `;
+	}
+
+	// If raw but couldn't parse, truncate and show
+	if (step.raw && step.raw.length > 0) {
+		const trimmed = step.raw.trim();
+		if (trimmed.startsWith("{") || trimmed.startsWith('"')) {
+			// It's JSON we couldn't parse, skip it
+			return null;
+		}
+		return `${prefix}${trimmed.substring(0, 100)} `;
+	}
+
+	return null;
+}
diff --git a/cli/src/utils/cleanup.ts b/cli/src/utils/cleanup.ts
new file mode 100644
index 00000000..211458cb
--- /dev/null
+++ b/cli/src/utils/cleanup.ts
@@ -0,0 +1,157 @@
+import type { ChildProcess } from "node:child_process";
+import { spawnSync } from "node:child_process";
+import { logDebug, logWarn } from "../ui/logger.ts";
+
+type CleanupFn = () => Promise<void> | void;
+
+const cleanupRegistry: Set<CleanupFn> = new Set();
+const trackedProcesses: Set<ChildProcess> = new Set();
+let isCleaningUp = false;
+
+function isProcessRunning(proc: ChildProcess): boolean {
+	return proc.exitCode === null && proc.signalCode === null;
+}
+
+/**
+ * Register a function to be called on process exit or manual cleanup
+ */
+export function registerCleanup(fn: CleanupFn): () => void {
+	cleanupRegistry.add(fn);
+	return () => cleanupRegistry.delete(fn);
+}
+
+/**
+ * Register a child process to be tracked and killed on exit
+ */
+export function registerProcess(proc: ChildProcess): () => void {
+	trackedProcesses.add(proc);
+
+	const remove = () => trackedProcesses.delete(proc);
+
+	proc.on("exit", remove);
+	proc.on("error", remove);
+
+	return remove;
+}
+
+/**
+ * Run all registered cleanup functions and kill tracked processes
+ */
+export async function runCleanup(): Promise<void> {
+	if (isCleaningUp) return;
+	isCleaningUp = true;
+
+	// 1. Kill all tracked child processes with verification
+	for (const proc of trackedProcesses) {
+		try {
+			if (proc.pid && isProcessRunning(proc)) {
+				const pid = proc.pid;
+
+				if (process.platform === "win32") {
+					// Windows needs taskkill for robust child tree termination
+					const result = spawnSync("taskkill", ["/pid", String(pid), "/f", "/t"], {
+						stdio: "pipe",
+					});
+
+					// Verify the process was actually killed
+					// Status 128 = process already exited, which is fine
+					if (result.status !== 0 && result.status !== 128) {
+						logWarn(`taskkill may have failed for PID ${pid} (exit code: ${result.status})`);
+						if (result.stderr) {
+							logDebug(`taskkill stderr: ${result.stderr.toString()}`);
+						}
+					}
+
+					await new Promise((resolve) => setTimeout(resolve, 500));
+					if (isProcessRunning(proc)) {
+						logWarn(`Process ${pid} may still be running after taskkill`);
+					}
+				} else {
+					// Try graceful termination first
+					proc.kill("SIGTERM");
+
+					// Wait a bit and verify it's dead
+					await new Promise((resolve) => setTimeout(resolve, 1000));
+
+					// Check if process is still running
+					if (isProcessRunning(proc)) {
+						proc.kill("SIGKILL");
+
+						// Final verification
+						await new Promise((resolve) => setTimeout(resolve, 500));
+						if (isProcessRunning(proc)) {
+							logWarn(`Failed to terminate process ${pid} after SIGKILL`);
+						}
+					}
+				}
+			}
+		} catch (err) {
+			// Process termination failed, continue cleanup
+			logDebug(`Failed to terminate process ${proc.pid}: ${err}`);
+		}
+	}
+	trackedProcesses.clear();
+
+	// 2. Run registered cleanup functions
+	const promises: Promise<void>[] = [];
+	for (const fn of cleanupRegistry) {
+		try {
+			const result = fn();
+			if (result instanceof Promise) {
+				promises.push(result);
+			}
+		} catch (err) {
+			// Log sync errors but continue with other cleanup functions
+			promises.push(Promise.reject(err));
+		}
+	}
+
+	const results = await Promise.allSettled(promises);
+	for (const result of results) {
+		if (result.status === "rejected") {
+			logWarn(`Cleanup task failed: ${result.reason}`);
+		}
+	}
+	cleanupRegistry.clear();
+	isCleaningUp = false;
+}
+
+let isShuttingDown = false;
+let handlersRegistered = false;
+
+/**
+ * Setup process signal handlers for cleanup
+ */
+export function setupSignalHandlers(): void {
+	if (handlersRegistered) {
+		return;
+	}
+	handlersRegistered = true;
+
+	const signals: NodeJS.Signals[] = ["SIGINT", "SIGTERM"];
+
+	for (const signal of signals) {
+		process.on(signal, async () => {
+			// Prevent duplicate cleanup runs
+			if (isShuttingDown) {
+				process.stdout.write(`\nReceived ${signal}, cleanup already in progress...\n`);
+				return;
+			}
+			isShuttingDown = true;
+
+			// Use writeSync to avoid event loop issues during exit
+			process.stdout.write(`\nReceived ${signal}, cleaning up processes and files...\n`);
+
+			try {
+				await runCleanup();
+				process.exit(0);
+			} catch (error) {
+				process.stderr.write(`\nCleanup failed: ${error}\n`);
+				process.exit(1);
+			}
+		});
+	}
+
+	// Note: uncaughtException is handled in cli/src/index.ts for the main process
+	// This avoids duplicate handlers and ensures consistent error handling
+}
diff --git a/cli/src/utils/json-validation.ts b/cli/src/utils/json-validation.ts
new file mode 100644
index 00000000..e5a069dc
--- /dev/null
+++ b/cli/src/utils/json-validation.ts
@@ -0,0 +1,179 @@
+import { z } from "zod";
+
+export const StepFinishSchema = z.object({
+	type: z.literal("step_finish"),
+	part: z
+		.object({
+			tokens: z
+				.object({
+					input: z.number().optional(),
+					output: z.number().optional(),
+				})
+				.optional(),
+			input: z.number().optional(),
+			output: z.number().optional(),
+			cost: z.number().optional(),
+		})
+		.optional(),
+	tokens: z
+		.object({
+			input: z.number().optional(),
+			output: z.number().optional(),
+		})
+		.optional(),
+	cost: z.number().optional(),
+	// Session ID fields (various naming conventions)
+	sessionID: z.string().optional(),
+	sessionId: z.string().optional(),
+	session_id: z.string().optional(),
+});
+
+export const StepStartSchema = z.object({
+	type: z.literal("step_start"),
+});
+
+export const TextSchema = z.object({
+	type: z.literal("text"),
+	part: z.object({
+		text: z.string(),
+	}),
+});
+
+export const ErrorSchema = z.object({
+	type: z.literal("error"),
+	error: z
+		.object({
+			message: z.string().optional(),
+		})
+		.optional(),
+	message: z.string().optional(),
+});
+
+export const ResultSchema = z.object({
+	type: z.literal("result"),
+	result: z.string().optional(),
+	usage: z
+		.object({
+			input_tokens: z.number().optional(),
+			output_tokens: z.number().optional(),
+		})
+		.optional(),
+});
+
+export const ToolUseSchema = z.object({
+	type: z.literal("tool_use"),
+	part: z
+		.object({
+			tool: z.string().optional(),
+			state: z
+				.object({
+					input: z
+						.union([z.string(), z.number(), z.boolean(), z.object({}).passthrough(), z.array(z.unknown())])
+						.optional(),
+					status: z.string().optional(),
+				})
+				.optional(),
+		})
+		.optional(),
+	tool: z.string().optional(),
+	callID: z.string().optional(),
+});
+
+export const StreamJsonEventSchema = z.union([
+	StepFinishSchema,
+	StepStartSchema,
+	TextSchema,
+	ErrorSchema,
+	ResultSchema,
+	ToolUseSchema,
+]);
+
+export type StreamJsonEvent = z.infer<typeof StreamJsonEventSchema>;
+export type StepFinish = z.infer<typeof StepFinishSchema>;
+export type TextEvent = z.infer<typeof TextSchema>;
+export type ToolUseEvent = z.infer<typeof ToolUseSchema>;
+
+/**
+ * Safely parse a JSON line with schema validation
+ * Returns null if parsing fails or schema is invalid
+ *
+ * This function handles:
+ * - Complete JSON objects
+ * - JSON followed by additional text (splits at proper object boundary)
+ * - Truncated JSON (attempts to recover)
+ * - Special characters in strings (properly handles escape sequences)
+ */
+export function parseJsonLine(line: string): { event: StreamJsonEvent; remaining?: string } | null {
+	try {
+		const trimmed = line.trim();
+		if (!trimmed) return null;
+
+		// Handle cases where JSON is followed by text without a newline
+		// Search for the end of the JSON object
+		let jsonStr = trimmed;
+		let remaining: string | undefined;
+
+		if (trimmed.startsWith("{")) {
+			let depth = 0;
+			let inString = false;
+			let isEscaped = false;
+			let jsonEndIndex = -1;
+
+			for (let i = 0; i < trimmed.length; i++) {
+				const char = trimmed[i];
+				if (isEscaped) {
+					isEscaped = false;
+					continue;
+				}
+				if (char === "\\") {
+					isEscaped = true;
+					continue;
+				}
+				if (char === '"' && !isEscaped) {
+					inString = !inString;
+					continue;
+				}
+				if (!inString) {
+					if (char === "{") depth++;
+					if (char === "}") {
+						depth--;
+						if (depth === 0) {
+							jsonEndIndex = i;
+							break;
+						}
+					}
+				}
+			}
+
+			// If we found a complete JSON object, split it from any remaining text
+			if (jsonEndIndex >= 0) {
+				jsonStr = trimmed.substring(0, jsonEndIndex + 1);
+				remaining = trimmed.substring(jsonEndIndex + 1).trim();
+			}
+			// If we didn't find a complete object but started with '{',
+			// it might be truncated - still try to parse what we have
+		}
+
+		const parsed = JSON.parse(jsonStr);
+		const event = StreamJsonEventSchema.parse(parsed);
+		return { event, remaining: remaining || undefined };
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Extract session ID from a parsed JSON event
+ */
+export function extractSessionId(event: StreamJsonEvent): string | null {
+	if ("sessionID" in event && typeof event.sessionID === "string") {
+		return event.sessionID;
+	}
+	if ("sessionId" in event && typeof event.sessionId === "string") {
+		return event.sessionId;
+	}
+	if ("session_id" in event && typeof event.session_id === "string") {
+		return event.session_id;
+	}
+	return null;
+}