diff --git a/cli/README.md b/cli/README.md
index e4a2e199..ea770950 100644
--- a/cli/README.md
+++ b/cli/README.md
@@ -198,6 +198,18 @@ tasks:
     parallel_group: 2  # runs after group 1
 ```
 
+## Planning and test orchestration
+
+Parallel and sequential runs now use explicit task state transitions (`pending`, `running`, `completed`, `failed`, `deferred`, `skipped`) to avoid duplicate work and race conditions.
+
+Planning behavior:
+- The planner can generate structured analysis (`ANALYSIS`, `PLAN`, `FILES`, `OPTIMIZATION`) before execution.
+- Planned file lists are cached and reused when repo fingerprints match.
+
+Testing behavior:
+- Test-heavy tasks can run through an orchestrated test loop with stricter retry/defer handling.
+- Locking/state checks prevent multiple agents from claiming the same task at once.
+
 ## Branch Workflow
 
 ```bash
diff --git a/cli/__tests__/locking-security.test.ts b/cli/__tests__/locking-security.test.ts
new file mode 100644
index 00000000..c622c950
--- /dev/null
+++ b/cli/__tests__/locking-security.test.ts
@@ -0,0 +1,254 @@
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
+import { createHash } from "node:crypto";
+import { existsSync, mkdirSync, readdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import process from "node:process";
+
+import { LOCK_DIR } from "../src/config/constants.ts";
+import {
+	acquireFileLock,
+	acquireLocksForFiles,
+	cleanupStaleLocks,
+	normalizePathForLocking,
+	releaseFileLock,
+} from "../src/execution/locking.ts";
+
+const TEST_BASE = join(tmpdir(), "ralphy-locking-test");
+
+describe("Lock Management Security and Reliability Tests", () => {
+	beforeEach(() => {
+		// Clean up any existing test directory
+		if (existsSync(TEST_BASE)) {
+			rmSync(TEST_BASE, { recursive: true, force: true });
+		}
+		mkdirSync(TEST_BASE, { recursive: true });
+	});
+
+	afterEach(() => {
+		// Clean up test directory
+		if (existsSync(TEST_BASE)) {
+			rmSync(TEST_BASE, { recursive: true, force: true });
+		}
+		// Clean up any stale locks
+		cleanupStaleLocks();
+	});
+
+	describe("Lock Acquisition Security Tests", () => {
+		it("should reject concurrent access from different owner", async () => {
+			const testFile = join(TEST_BASE, "test.txt");
+			writeFileSync(testFile, "test content");
+
+			// Manually create a lock valid for another process
+			const hash = createHash("sha256")
+				.update(normalizePathForLocking(testFile, TEST_BASE))
+				.digest("hex");
+			const lockDir = join(TEST_BASE, LOCK_DIR);
+			mkdirSync(lockDir, { recursive: true });
+			const lockPath = join(lockDir, `${hash}.lock`);
+
+			writeFileSync(
+				lockPath,
+				JSON.stringify({
+					timestamp: Date.now(),
+					timeout: 30000,
+					owner: "other-process-123",
+					refreshCount: 0,
+				}),
+			);
+
+			// Verify lock was created
+			if (!existsSync(lockPath)) {
+				console.warn(`Test setup failed: Lock file not created at ${lockPath}`);
+			}
+
+			// Try to acquire lock (should fail as it's owned by "other-process")
+			const lockResult = acquireFileLock(testFile, TEST_BASE);
+			expect(lockResult).toBe(false);
+
+			// Cleanup
+			releaseFileLock(testFile, TEST_BASE); // This might fail to delete others cert, but we clean up directory anyway
+		});
+
+		it("should allow re-entrant access for same owner", async () => {
+			const testFile = join(TEST_BASE, "reentrant.txt");
+			writeFileSync(testFile, "test content");
+
+			// Acquire lock first time
+			const lock1 = acquireFileLock(testFile, TEST_BASE);
+			expect(lock1).toBe(true);
+
+			// Acquire same lock again (re-entrant)
+			const lock2 = acquireFileLock(testFile, TEST_BASE, 5, true);
+			expect(lock2).toBe(true);
+		});
+
+		// ...
+
+		it("should rollback on partial failure", () => {
+			const testFiles = [
+				join(TEST_BASE, "test1.txt"),
+				join(TEST_BASE, "test2.txt"),
+				join(TEST_BASE, "test3.txt"),
+			];
+
+			// Create test files
+			for (const file of testFiles) {
+				writeFileSync(file, "test content");
+			}
+
+			// Block the second file with a lock from another process
+			const file2 = testFiles[1];
+			const hash = createHash("sha256")
+				.update(normalizePathForLocking(file2, TEST_BASE))
+				.digest("hex");
+			const lockDir = join(TEST_BASE, LOCK_DIR);
+			mkdirSync(lockDir, { recursive: true });
+			const lockPath = join(lockDir, `${hash}.lock`);
+			writeFileSync(
+				lockPath,
+				JSON.stringify({
+					timestamp: Date.now(),
+					timeout: 30000,
+					owner: "other-process-999",
+					refreshCount: 0,
+				}),
+			);
+
+			// Try to acquire all locks (should fail because of file2)
+			const success = acquireLocksForFiles(testFiles, TEST_BASE);
+			expect(success).toBe(false);
+
+			// Should NOT hold locks for 1 and 3 (rollback)
+			// But wait, acquireLocksForFiles releases locks it ACQUIRED. It didn't acquire file2.
+			// It acquired file1. So file1 should be released.
+			// But we can check if we can acquire them now?
+			// If they were held, we wouldn't be able to acquire them IF we weren't re-entrant.
+			// Since we are re-entrant, we can always acquire them if we own them.
+			// So we need to check if the LOCK FILE exists?
+			// Verify lock for file1 is gone?
+
+			const hash1 = createHash("sha256")
+				.update(normalizePathForLocking(testFiles[0], TEST_BASE))
+				.digest("hex");
+			const lockPath1 = join(lockDir, `${hash1}.lock`);
+			expect(existsSync(lockPath1)).toBe(false);
+		});
+	});
+
+	describe("Path Normalization Security Tests", () => {
+		it("should normalize paths consistently", () => {
+			const paths = [
+				"test.txt",
+				"./test.txt",
+				"test/../test.txt",
+				"test\\file.txt",
+				"test/file.txt",
+			];
+
+			const normalizedPaths = paths.map((path) => normalizePathForLocking(path, TEST_BASE));
+
+			// All should be resolved to absolute paths within TEST_BASE
+			for (const path of normalizedPaths) {
+				const expectedBase = process.platform === "win32" ? TEST_BASE.toLowerCase() : TEST_BASE;
+				expect(path).toContain(expectedBase);
+				expect(path).not.toContain("..");
+			}
+		});
+
+		it("should handle cross-platform paths", () => {
+			const windowsPath = "src\\components\\Button.tsx";
+			const unixPath = "src/components/Button.tsx";
+
+			const normalizedWindows = normalizePathForLocking(windowsPath, TEST_BASE);
+			const normalizedUnix = normalizePathForLocking(unixPath, TEST_BASE);
+
+			// Should resolve to same structure
+			expect(normalizedWindows).toContain("components");
+			expect(normalizedUnix).toContain("components");
+		});
+	});
+
+	describe("Lock File Integrity Tests", () => {
+		it("should create lock files with proper permissions", () => {
+			const testFile = join(TEST_BASE, "permissions.txt");
+			writeFileSync(testFile, "test content");
+
+			const success = acquireFileLock(testFile, TEST_BASE);
+			expect(success).toBe(true);
+
+			// Lock file should exist
+			const lockDir = join(TEST_BASE, LOCK_DIR);
+			const lockFiles: string[] = [];
+			try {
+				if (existsSync(lockDir)) {
+					lockFiles.push(...readdirSync(lockDir));
+				}
+			} catch {
+				console.warn("Could not check lock files");
+			}
+
+			// Should not allow unlimited locks
+			expect(lockFiles.length).toBeLessThan(5050); // Some limit should be enforced
+			releaseFileLock(testFile, TEST_BASE);
+		});
+
+		it("should handle lock file corruption gracefully", () => {
+			const testFile = join(TEST_BASE, "corrupt.txt");
+			writeFileSync(testFile, "test content");
+
+			// Create corrupted lock file
+			const lockDir = join(TEST_BASE, LOCK_DIR);
+			mkdirSync(lockDir, { recursive: true });
+			const lockFile = join(lockDir, "corrupt.lock");
+			writeFileSync(lockFile, "invalid json content");
+
+			// Should still work (fallback to corrupted file handling)
+			const success = acquireFileLock(testFile, TEST_BASE);
+			expect(success).toBe(true);
+
+			releaseFileLock(testFile, TEST_BASE);
+		});
+	});
+
+	describe("Cleanup and Maintenance Tests", () => {
+		it("should clean up expired locks", () => {
+			const testFile = join(TEST_BASE, "cleanup.txt");
+			writeFileSync(testFile, "test content");
+
+			// Acquire lock
+			const success = acquireFileLock(testFile, TEST_BASE);
+			expect(success).toBe(true);
+
+			// Simulate time passing
+			const originalNow = Date.now;
+			const mockDateNow = () => originalNow() + 61000; // 61 seconds in future (to trigger LOCK_CLEANUP_INTERVAL_MS)
+
+			// Mock Date.now for cleanup function
+			const originalDateNow = Date.now;
+			Date.now = mockDateNow;
+
+			cleanupStaleLocks();
+
+			// Should be able to acquire lock again (old one cleaned up)
+			// Trigger cleanup by keeping the time in the future so acquireFileLock triggers internal cleanup
+			const lock2 = acquireFileLock(testFile, TEST_BASE);
+			expect(lock2).toBe(true);
+
+			// Restore Date.now
+			Date.now = originalDateNow;
+		});
+
+		it("should handle lock cleanup errors", () => {
+			const testFile = join(TEST_BASE, "cleanup-error.txt");
+			writeFileSync(testFile, "test content");
+
+			// Acquire lock
+			const success = acquireFileLock(testFile, TEST_BASE);
+			expect(success).toBe(true);
+
+			// Cleanup should not throw
+			expect(() => cleanupStaleLocks()).not.toThrow();
+		});
+	});
+});
diff --git a/cli/__tests__/locking.test.ts b/cli/__tests__/locking.test.ts
new file mode 100644
index 00000000..fb3f6cd8
--- /dev/null
+++ b/cli/__tests__/locking.test.ts
@@ -0,0 +1,136 @@
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import {
+	acquireFileLock,
+	acquireLocksForFiles,
+	normalizePathForLocking,
+	releaseFileLock,
+} from "../src/execution/locking";
+
+describe("Locking System", () => {
+	let testDir: string;
+	let workDir: string;
+
+	beforeEach(() => {
+		testDir = join(tmpdir(), `ralphy-test-${Date.now()}`);
+		workDir = testDir;
+		mkdirSync(testDir, { recursive: true });
+	});
+
+	afterEach(() => {
+		if (existsSync(testDir)) {
+			rmSync(testDir, { recursive: true, force: true });
+		}
+	});
+
+	describe("Path Normalization", () => {
+		it("should normalize paths correctly", () => {
+			const normalized = normalizePathForLocking("./test/file.txt", workDir);
+			expect(normalized).toBeTruthy();
+			expect(normalized).toContain("test");
+		});
+
+		it("should handle absolute paths", () => {
+			const normalized = normalizePathForLocking(`${workDir}/test.txt`, workDir);
+			expect(normalized).toContain("test.txt");
+		});
+	});
+
+	describe("Lock Acquisition", () => {
+		it("should acquire a lock successfully", () => {
+			const result = acquireFileLock(join(workDir, "test.txt"), workDir);
+			expect(result).toBe(true);
+		});
+
+		it("should allow re-entrant access for same owner", () => {
+			const testFile = join(workDir, "test.txt");
+			const lock1 = acquireFileLock(testFile, workDir);
+			expect(lock1).toBe(true);
+
+			const lock2 = acquireFileLock(testFile, workDir, 5, true);
+			expect(lock2).toBe(true);
+		});
+
+		it("should create lock file in correct location", () => {
+			const testFile = join(workDir, "test.txt");
+			acquireFileLock(testFile, workDir);
+
+			const lockDir = join(workDir, ".ralphy", "locks");
+			const lockFiles = existsSync(lockDir);
+			expect(lockFiles).toBe(true);
+		});
+	});
+
+	describe("Lock Release", () => {
+		it("should release a lock successfully", () => {
+			const testFile = join(workDir, "test.txt");
+			acquireFileLock(testFile, workDir);
+			releaseFileLock(testFile, workDir);
+
+			// Should be able to acquire lock again
+			const result = acquireFileLock(testFile, workDir);
+			expect(result).toBe(true);
+		});
+	});
+
+	describe("Multiple Locks", () => {
+		it("should acquire multiple locks for different files", () => {
+			const files = [
+				join(workDir, "file1.txt"),
+				join(workDir, "file2.txt"),
+				join(workDir, "file3.txt"),
+			];
+
+			const result = acquireLocksForFiles(files, workDir);
+			expect(result).toBe(true);
+
+			for (const file of files) {
+				releaseFileLock(file, workDir);
+			}
+		});
+
+		it("should fail if any file is already locked", () => {
+			const file1 = join(workDir, "file1.txt");
+			const file2 = join(workDir, "file2.txt");
+
+			acquireFileLock(file1, workDir);
+
+			const files = [file1, file2];
+			const result = acquireLocksForFiles(files, workDir);
+			expect(result).toBe(false);
+		});
+
+		it("should rollback all locks if acquisition fails", () => {
+			const file1 = join(workDir, "file1.txt");
+			const file2 = join(workDir, "file2.txt");
+
+			acquireFileLock(file1, workDir);
+
+			const files = [file1, file2];
+			const result = acquireLocksForFiles(files, workDir);
+			expect(result).toBe(false);
+
+			// file1 should still be locked (not released by acquireLocksForFiles since it was pre-locked)
+			// We can re-acquire it with allowReentrant
+			const canReacquireFile1 = acquireFileLock(file1, workDir, 5, true);
+			expect(canReacquireFile1).toBe(true);
+		});
+	});
+
+	describe("Lock File Security", () => {
+		it("should use hash-based lock filenames to prevent collisions", () => {
+			const file1 = join(workDir, "subdir", "file.txt");
+			const file2 = join(workDir, "otherdir", "file.txt");
+
+			acquireFileLock(file1, workDir);
+			acquireFileLock(file2, workDir);
+
+			// Different files should have different lock files
+			// (if they had collision, second acquisition would fail)
+			expect(acquireFileLock(file1, workDir)).toBe(false);
+			expect(acquireFileLock(file2, workDir)).toBe(false);
+		});
+	});
+});
diff --git a/cli/__tests__/orchestrator.test.ts b/cli/__tests__/orchestrator.test.ts
new file mode 100644
index 00000000..9e09514a
--- /dev/null
+++ b/cli/__tests__/orchestrator.test.ts
@@ -0,0 +1,335 @@
+import { describe, expect, test } from "bun:test";
+import { executeWithOrchestrator, shouldUseOrchestrator } from "../src/execution/orchestrator.ts";
+import type { AIEngine, AIResult } from "../src/engines/types.ts";
+
+// Mock engine for testing - tracks calls per instance
+function createMockEngine(responses: AIResult[]): AIEngine & { getCallCount(): number } {
+	let callIndex = 0;
+	return {
+		name: "MockEngine",
+		cliCommand: "mock",
+		async isAvailable(): Promise<boolean> {
+			return true;
+		},
+		async execute(_prompt: string, _workDir: string, _options?: { modelOverride?: string }): Promise<AIResult> {
+			const response = responses[callIndex] ?? responses[responses.length - 1] ?? { success: false, response: "", inputTokens: 0, outputTokens: 0, error: "No responses" };
+			callIndex++;
+			return response;
+		},
+		getCallCount(): number {
+			return callIndex;
+		},
+	};
+}
+
+describe("Orchestrator Pattern", () => {
+	describe("shouldUseOrchestrator", () => {
+		test("returns false when no testModel provided", () => {
+			expect(shouldUseOrchestrator("implement feature", "build something", undefined)).toBe(false);
+		});
+
+		test("returns true for test-related keywords", () => {
+			expect(shouldUseOrchestrator("add tests", "write test suite", "test-model")).toBe(true);
+			expect(shouldUseOrchestrator("fix jest tests", "debug failing specs", "test-model")).toBe(true);
+		});
+
+		test("returns true for implementation keywords", () => {
+			expect(shouldUseOrchestrator("implement login", "build auth system", "test-model")).toBe(true);
+			expect(shouldUseOrchestrator("create feature", "develop new module", "test-model")).toBe(true);
+		});
+
+		test("returns true for fix/debug keywords", () => {
+			expect(shouldUseOrchestrator("fix bug", "debug issue", "test-model")).toBe(true);
+		});
+	});
+
+	describe("executeWithOrchestrator", () => {
+		test("successfully completes when tests pass", async () => {
+			const mainResponses: AIResult[] = [
+				{
+					success: true,
+					response: "Implemented the feature successfully",
+					inputTokens: 100,
+					outputTokens: 50,
+				},
+			];
+			const testResponses: AIResult[] = [
+				{
+					success: true,
+					response: "All tests passed! ✓ 5 passed, 0 failed",
+					inputTokens: 50,
+					outputTokens: 25,
+				},
+			];
+
+			const mainEngine = createMockEngine(mainResponses);
+			const testEngine = createMockEngine(testResponses);
+
+			const result = await executeWithOrchestrator(
+				"Implement a login feature",
+				{
+					mainEngine,
+					testEngine,
+					mainModel: "main-model",
+					testModel: "test-model",
+					workDir: "/tmp/test",
+				},
+			);
+
+			expect(result.success).toBe(true);
+			expect(result.mainModelCalls).toBe(1);
+			expect(result.testModelCalls).toBe(1);
+			expect(result.iterations).toBe(1);
+			expect(result.response).toContain("Implemented the feature");
+			expect(result.response).toContain("All tests passed");
+		});
+
+		test("delegates to test model and requests fixes when tests fail", async () => {
+			const mainResponses: AIResult[] = [
+				{
+					success: true,
+					response: "Initial implementation with bug",
+					inputTokens: 100,
+					outputTokens: 50,
+				},
+				{
+					success: true,
+					response: "Fixed implementation, bug resolved",
+					inputTokens: 100,
+					outputTokens: 50,
+				},
+			];
+			const testResponses: AIResult[] = [
+				{
+					success: true,
+					response: "Tests failed! ✗ 2 failed\nError: null pointer exception",
+					inputTokens: 50,
+					outputTokens: 25,
+				},
+			];
+
+			const mainEngine = createMockEngine(mainResponses);
+			const testEngine = createMockEngine(testResponses);
+
+			const result = await executeWithOrchestrator(
+				"Implement feature",
+				{
+					mainEngine,
+					testEngine,
+					mainModel: "main-model",
+					testModel: "test-model",
+					workDir: "/tmp/test",
+				},
+			);
+
+			expect(result.success).toBe(true);
+			// Orchestrator runs: main -> test -> main(fix) = 2 main, 1 test
+			expect(result.mainModelCalls).toBe(2);
+			expect(result.testModelCalls).toBe(1);
+			expect(result.response).toContain("Fixed implementation");
+		});
+
+		test("reports failure when main model fails", async () => {
+			const mainResponses: AIResult[] = [
+				{
+					success: false,
+					response: "",
+					inputTokens: 0,
+					outputTokens: 0,
+					error: "API rate limit exceeded",
+				},
+			];
+
+			const mainEngine = createMockEngine(mainResponses);
+			const testEngine = createMockEngine([]);
+
+			const result = await executeWithOrchestrator(
+				"Implement feature",
+				{
+					mainEngine,
+					testEngine,
+					mainModel: "main-model",
+					testModel: "test-model",
+					workDir: "/tmp/test",
+				},
+			);
+
+			expect(result.success).toBe(false);
+			expect(result.mainModelCalls).toBe(1);
+			expect(result.testModelCalls).toBe(0); // Test model never called
+			expect(result.error).toContain("API rate limit exceeded");
+		});
+
+		test("delegates to test model automatically without markers", async () => {
+			let testPromptReceived = "";
+			const mainEngine = createMockEngine([
+				{
+					success: true,
+					response: "Code implementation complete",
+					inputTokens: 100,
+					outputTokens: 50,
+				},
+			]);
+			const testEngine: AIEngine = {
+				name: "TestEngine",
+				cliCommand: "test",
+				async isAvailable(): Promise<boolean> {
+					return true;
+				},
+				async execute(prompt: string): Promise<AIResult> {
+					testPromptReceived = prompt;
+					return {
+						success: true,
+						response: "Tests verified implementation",
+						inputTokens: 50,
+						outputTokens: 25,
+					};
+				},
+			};
+
+			await executeWithOrchestrator(
+				"Build auth system",
+				{
+					mainEngine,
+					testEngine,
+					mainModel: "main-model",
+					testModel: "test-model",
+					workDir: "/tmp/test",
+				},
+			);
+
+			// Verify test model was called with appropriate prompt
+			expect(testPromptReceived).toContain("test");
+			expect(testPromptReceived).toContain("Code implementation complete");
+		});
+
+		test("handles test model failure gracefully", async () => {
+			const mainEngine = createMockEngine([
+				{
+					success: true,
+					response: "Implementation done",
+					inputTokens: 100,
+					outputTokens: 50,
+				},
+			]);
+			// Test engine that returns failure (not connection error - those retry)
+			const testEngine: AIEngine = {
+				name: "TestEngine",
+				cliCommand: "test",
+				async isAvailable(): Promise<boolean> {
+					return true;
+				},
+				async execute(): Promise<AIResult> {
+					return {
+						success: false,
+						response: "",
+						inputTokens: 0,
+						outputTokens: 0,
+						error: "Test execution failed: compilation error",
+					};
+				},
+			};
+
+			const result = await executeWithOrchestrator(
+				"Implement feature",
+				{
+					mainEngine,
+					testEngine,
+					mainModel: "main-model",
+					testModel: "test-model",
+					workDir: "/tmp/test",
+				},
+			);
+
+			// Should still report success since main model succeeded
+			// Test failure is noted in results but not fatal
+			expect(result.success).toBe(true);
+			expect(result.testModelCalls).toBe(1);
+			expect(result.response).toContain("Test execution failed");
+		});
+
+		test("works with opencode/kimi-k2.5-free model and delegates to test model", async () => {
+			let testPromptReceived = "";
+			const mainEngine = createMockEngine([
+				{
+					success: true,
+					response: "Created factorial function with TypeScript",
+					inputTokens: 150,
+					outputTokens: 80,
+				},
+			]);
+			const testEngine: AIEngine & { getCallCount(): number } = {
+				name: "TestEngine",
+				cliCommand: "test",
+				async isAvailable(): Promise<boolean> {
+					return true;
+				},
+				async execute(prompt: string): Promise<AIResult> {
+					testPromptReceived = prompt;
+					return {
+						success: true,
+						response: "TEST RESULTS:\n- Framework: jest\n- Command: npm test\n- Passed: 5\n- Failed: 0\n- Status: PASS",
+						inputTokens: 80,
+						outputTokens: 40,
+					};
+				},
+				getCallCount(): number {
+					return 1;
+				},
+			};
+
+			const result = await executeWithOrchestrator(
+				"Create factorial function with tests",
+				{
+					mainEngine,
+					testEngine,
+					mainModel: "opencode/kimi-k2.5-free",
+					testModel: "opencode/gpt-5-nano",
+					workDir: "/tmp/test",
+				},
+			);
+
+			expect(result.success).toBe(true);
+			expect(result.mainModelCalls).toBe(1);
+			expect(result.testModelCalls).toBe(1);
+			// Verify test model received the prompt with test instructions
+			expect(testPromptReceived).toContain("test runner");
+			expect(testPromptReceived).toContain("npm test");
+			expect(result.response).toContain("TEST RESULTS");
+		});
+
+		test("progress callback receives updates", async () => {
+			const progressMessages: string[] = [];
+			const mainEngine = createMockEngine([
+				{
+					success: true,
+					response: "Done",
+					inputTokens: 100,
+					outputTokens: 50,
+				},
+			]);
+			const testEngine = createMockEngine([
+				{
+					success: true,
+					response: "Tests pass",
+					inputTokens: 50,
+					outputTokens: 25,
+				},
+			]);
+
+			await executeWithOrchestrator(
+				"Test task",
+				{
+					mainEngine,
+					testEngine,
+					workDir: "/tmp/test",
+				},
+				(msg) => progressMessages.push(msg),
+			);
+
+			expect(progressMessages.length).toBeGreaterThan(0);
+			expect(progressMessages.some((m) => m.includes("main model"))).toBe(true);
+			expect(progressMessages.some((m) => m.includes("test"))).toBe(true);
+		});
+	});
+});
diff --git a/cli/src/cli/commands/run.ts b/cli/src/cli/commands/run.ts
index e9fc95b7..6482f168 100644
--- a/cli/src/cli/commands/run.ts
+++ b/cli/src/cli/commands/run.ts
@@ -40,19 +40,19 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 		if (!existsSync(options.prdFile)) {
 			logError(`${options.prdFile} not found in current directory`);
 			logInfo(`Create a ${options.prdFile} file with tasks`);
-			process.exit(1);
+			throw new Error(`PRD source not found: ${options.prdFile}`);
 		}
 	} else if (options.prdSource === "markdown-folder") {
 		if (!existsSync(options.prdFile)) {
 			logError(`PRD folder ${options.prdFile} not found`);
 			logInfo(`Create a ${options.prdFile}/ folder with markdown files containing tasks`);
-			process.exit(1);
+			throw new Error(`PRD folder not found: ${options.prdFile}`);
 		}
 	}
 
 	if (options.prdSource === "github" && !options.githubRepo) {
 		logError("GitHub repository not specified. Use --github owner/repo");
-		process.exit(1);
+		throw new Error("GitHub repository not specified");
 	}
 
 	// Check engine availability
@@ -61,7 +61,7 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 
 	if (!available) {
 		logError(`${engine.name} CLI not found. Make sure '${engine.cliCommand}' is in your PATH.`);
-		process.exit(1);
+		throw new Error(`${engine.name} CLI not available`);
 	}
 
 	// Create task source with caching for better performance
@@ -91,7 +91,7 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 			logError("Cannot run in parallel/branch mode: repository has no commits yet.");
 			logInfo("Please make an initial commit first:");
 			logInfo('  git add . && git commit -m "Initial commit"');
-			process.exit(1);
+			throw new Error("Repository has no commits yet");
 		}
 	}
 
@@ -195,6 +195,6 @@ export async function runLoop(options: RuntimeOptions): Promise<void> {
 	}
 
 	if (result.tasksFailed > 0) {
-		process.exit(1);
+		throw new Error(`${result.tasksFailed} task(s) failed`);
 	}
 }
diff --git a/cli/src/cli/commands/task.ts b/cli/src/cli/commands/task.ts
index 312977e7..f34d633c 100644
--- a/cli/src/cli/commands/task.ts
+++ b/cli/src/cli/commands/task.ts
@@ -28,7 +28,7 @@ export async function runTask(task: string, options: RuntimeOptions): Promise<vo
 
 	if (!available) {
 		logError(`${engine.name} CLI not found. Make sure '${engine.cliCommand}' is in your PATH.`);
-		process.exit(1);
+		throw new Error(`${engine.name} CLI not available`);
 	}
 
 	logInfo(`Running task with ${engine.name}...`);
@@ -129,7 +129,7 @@ export async function runTask(task: string, options: RuntimeOptions): Promise<vo
 				tasksFailed: 1,
 			});
 			notifyTaskFailed(task, result.error || "Unknown error");
-			process.exit(1);
+			throw new Error(result.error || "Unknown error");
 		}
 	} catch (error) {
 		const errorMsg = error instanceof Error ? error.message : String(error);
@@ -140,6 +140,6 @@ export async function runTask(task: string, options: RuntimeOptions): Promise<vo
 			tasksFailed: 1,
 		});
 		notifyTaskFailed(task, errorMsg);
-		process.exit(1);
+		throw error instanceof Error ? error : new Error(errorMsg);
 	}
 }
diff --git a/cli/src/config/constants.ts b/cli/src/config/constants.ts
new file mode 100644
index 00000000..04789fde
--- /dev/null
+++ b/cli/src/config/constants.ts
@@ -0,0 +1,80 @@
+export const PROGRESS_UPDATE_INTERVAL = 500;
+export const HEARTBEAT_INTERVAL = 5000;
+// Default retry count used by CLI/runtime options.
+export const DEFAULT_MAX_RETRIES = 3;
+// Legacy alias retained for older modules.
+export const MAX_RETRIES = DEFAULT_MAX_RETRIES;
+export const UI_LABELS = {
+	PLANNING: "[PLANNING]",
+	EXECUTION: "[EXECUTION]",
+	DONE: "[DONE]",
+	FAIL: "[FAIL]",
+	OK: "[OK]",
+};
+export const SPINNER_CHARS = ["|", "/", "-", "\\"];
+export const MAX_EXECUTION_TIME = 300000; // 5 minutes
+export const PROGRESS_POLL_INTERVAL = 2000;
+export const WATCHER_DEBOUNCE = 250;
+export const PLANNING_COOLDOWN = 2000;
+
+// CLI Defaults
+export const DEFAULT_RETRY_DELAY = 5;
+export const DEFAULT_MAX_PARALLEL = 3;
+export const DEFAULT_MAX_REPLANS = 3;
+export const DEFAULT_MAX_ITERATIONS = 0;
+
+// AI Engine Defaults
+export const DEFAULT_AI_ENGINE_TIMEOUT_MS = 80 * 60 * 1000; // 80 minutes
+export const STREAM_HEARTBEAT_INTERVAL_MS = 30000; // 30 seconds without output = potential hang
+
+// Parallel Execution
+export const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
+export const INITIAL_POOL_SIZE_MULTIPLIER = 1;
+export const MAX_POOL_SIZE_MULTIPLIER = 5;
+export const PLANNING_CONCURRENCY = 5;
+export const POOL_INCREMENT = 2;
+
+// Progress Monitoring
+export const MAX_OPERATIONS_HISTORY = 10;
+export const RECENT_ACTIONS_COUNT = 3;
+export const FIND_WORKTREE_RETRIES = 20;
+export const MAX_DISPLAYED_ACTIONS = 3;
+
+// Sandbox Management
+export const DEFAULT_MAX_SANDBOX_AGE_MS = 60 * 60 * 1000; // 1 hour
+export const SANDBOX_STALE_THRESHOLD_MS = 24 * 60 * 60 * 1000; // 24 hours
+export const SANDBOX_BACKGROUND_CLEANUP_DELAY_MS = 5 * 60 * 1000; // 5 minutes
+export const MS_PER_MINUTE = 60000;
+export const CLEANUP_DELAY_MS = 5000;
+export const COPY_BACK_CONCURRENCY = 10;
+export const SANDBOX_DIR_PREFIX = "agent-";
+export const SANDBOX_SUFFIX = "";
+export const DEFAULT_IGNORE_PATTERNS = [
+	".git",
+	"node_modules",
+	".ralphy-sandboxes",
+	".ralphy-worktrees",
+	".ralphy",
+	"agent-*",
+	"sandbox-*",
+];
+
+// File Utilities
+export const MAX_FILE_SIZE_FOR_HASH = 2 * 1024 * 1024; // 2MB
+export const DEFAULT_RECURSION_DEPTH = 5;
+
+// Lock Management
+export const LOCK_TIMEOUT_MS = 30000; // 30 seconds
+export const LOCK_MAX_LOCKS = 5000; // Maximum number of locks
+export const LOCK_DIR = ".ralphy/locks"; // Lock directory
+export const LOCK_CLEANUP_INTERVAL_MS = 60000; // 1 minute between cleanups
+
+// Path Constants
+export const SANDBOX_DIR = ".ralphy-worktrees";
+export const PLANNING_CACHE_FILE = "planning-cache.json";
+
+// Hash Store Constants
+export const HASH_STORE_DIR = ".ralphy-hashes";
+export const HASH_STORE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours
+export const HASH_REFERENCE_SUFFIX = ".hash-ref";
+export const ENABLE_HASH_STORE = true; // Feature flag
diff --git a/cli/src/config/types.ts b/cli/src/config/types.ts
index 6a292aa9..22c8f2b3 100644
--- a/cli/src/config/types.ts
+++ b/cli/src/config/types.ts
@@ -17,6 +17,7 @@ export const NotificationsSchema = z.object({
 	discord_webhook: z.string().default(""),
 	slack_webhook: z.string().default(""),
 	custom_webhook: z.string().default(""),
+	telemetry_webhook: z.string().default(""),
 });
 
 /**
@@ -109,6 +110,8 @@ export interface RuntimeOptions {
 	browserEnabled: "auto" | "true" | "false";
 	/** Override default model for the engine */
 	modelOverride?: string;
+	/** Enable comprehensive OpenCode debugging */
+	debugOpenCode?: boolean;
 	/** Skip automatic branch merging after parallel execution */
 	skipMerge?: boolean;
 	/** Use lightweight sandboxes instead of git worktrees for parallel execution */
@@ -142,4 +145,5 @@ export const DEFAULT_OPTIONS: RuntimeOptions = {
 	githubLabel: "",
 	autoCommit: true,
 	browserEnabled: "auto",
+	debugOpenCode: false,
 };
diff --git a/cli/src/execution/locking.ts b/cli/src/execution/locking.ts
new file mode 100644
index 00000000..2134e3f0
--- /dev/null
+++ b/cli/src/execution/locking.ts
@@ -0,0 +1,412 @@
+import { createHash, randomBytes } from "node:crypto";
+import {
+	existsSync,
+	mkdirSync,
+	readFileSync,
+	readdirSync,
+	unlinkSync,
+	writeFileSync,
+} from "node:fs";
+import { join, normalize, resolve } from "node:path";
+import process from "node:process";
+import {
+	LOCK_CLEANUP_INTERVAL_MS,
+	LOCK_DIR,
+	LOCK_MAX_LOCKS,
+	LOCK_TIMEOUT_MS,
+} from "../config/constants.ts";
+import { logDebug, logWarn } from "../ui/logger.ts";
+import { registerCleanup } from "../utils/cleanup.ts";
+
+interface LockInfo {
+	timestamp: number;
+	timeout: number;
+	owner: string; // Track lock owner
+	refreshCount: number;
+}
+
+// Unified lock structure for better performance
+const locks = new Map<string, LockInfo>();
+const lockOwner = `${process.pid.toString()}-${Date.now()}`;
+const sleepBuffer = new SharedArrayBuffer(4);
+const sleepArray = new Int32Array(sleepBuffer);
+function sleepBlocking(ms: number): void {
+	if (ms <= 0) return;
+
+	if (typeof Bun !== "undefined" && Bun.sleepSync) {
+		Bun.sleepSync(ms);
+		return;
+	}
+
+	try {
+		// Node runtime fallback. If unavailable in current runtime/thread, skip blocking delay.
+		Atomics.wait(sleepArray, 0, 0, ms);
+	} catch {
+		// No-op fallback.
+	}
+}
+
+function refreshLock(normalizedPath: string, workDir: string): void {
+	const lockInfo = locks.get(normalizedPath);
+	if (!lockInfo) return;
+
+	const updatedLockInfo: LockInfo = {
+		...lockInfo,
+		timestamp: Date.now(),
+		refreshCount: lockInfo.refreshCount + 1,
+	};
+
+	// Update lock file on disk
+	const lockFile = getLockFilePath(normalizedPath, workDir);
+	try {
+		writeFileSync(lockFile, JSON.stringify(updatedLockInfo));
+		locks.set(normalizedPath, updatedLockInfo);
+	} catch (err) {
+		logDebug(`Failed to refresh lock ${normalizedPath}: ${err}`);
+	}
+}
+
+// Define global state interface for type safety
+declare global {
+	interface RalphyGlobalState {
+		_lockState?: {
+			_lastLockCleanup?: number;
+		};
+		verboseMode?: boolean;
+	}
+}
+
+// Register for global cleanup
+registerCleanup(() => {
+	locks.clear();
+});
+
+function getLockFilePath(normalizedPath: string, workDir: string): string {
+	const hash = createHash("sha256").update(normalizedPath).digest("hex");
+	const lockDir = join(workDir, LOCK_DIR);
+	return join(lockDir, `${hash}.lock`);
+}
+
+function ensureLockDir(workDir: string): void {
+	const lockDir = join(workDir, LOCK_DIR);
+	try {
+		mkdirSync(lockDir, { recursive: true });
+	} catch (err) {
+		// Directory may already exist, that's OK
+		if ((err as NodeJS.ErrnoException).code !== "EEXIST") {
+			throw err;
+		}
+	}
+}
+
+function cleanupStaleLockFiles(workDir: string): void {
+	const lockDir = join(workDir, LOCK_DIR);
+	if (!existsSync(lockDir)) return;
+
+	const files = readdirSync(lockDir);
+	const now = Date.now();
+
+	for (const file of files) {
+		if (!file.endsWith(".lock")) continue;
+		const filePath = join(lockDir, file);
+		try {
+			const content = readFileSync(filePath, "utf8");
+			const lockInfo: LockInfo = JSON.parse(content);
+			if (now - lockInfo.timestamp >= lockInfo.timeout) {
+				try {
+					unlinkSync(filePath);
+				} catch {
+					// Best-effort cleanup: lock may be removed by another process.
+				}
+			}
+		} catch {
+			try {
+				unlinkSync(filePath);
+			} catch {
+				// Best-effort cleanup: lock may be removed by another process.
+			}
+		}
+	}
+}
+
+export function normalizePathForLocking(filePath: string, workDir: string): string {
+	// Resolve to absolute path first
+	const absolutePath = resolve(workDir, filePath);
+
+	// Normalize path separators and resolve .. etc.
+	const normalized = normalize(absolutePath);
+
+	// On Windows, convert to lowercase for case-insensitive comparison
+	if (process.platform === "win32") {
+		return normalized.toLowerCase();
+	}
+
+	return normalized;
+}
+
+export function isInRalphyDir(filePath: string): boolean {
+	return filePath.includes(".ralphy") || filePath.includes(".ralphy-worktrees");
+}
+
+function getGlobalLockState(): NonNullable<RalphyGlobalState["_lockState"]> {
+	if (!(globalThis as RalphyGlobalState)._lockState) {
+		(globalThis as RalphyGlobalState)._lockState = { _lastLockCleanup: 0 };
+	}
+	// biome-ignore lint/style/noNonNullAssertion: guaranteed to be set above
+	return (globalThis as RalphyGlobalState)._lockState!;
+}
+
+export function acquireFileLock(
+	filePath: string,
+	workDir: string,
+	maxRetries = 5,
+	allowReentrant = false,
+): boolean {
+	const normalizedPath = normalizePathForLocking(filePath, workDir);
+	const now = Date.now();
+
+	// CRITICAL FIX: Check in-memory lock FIRST before any file operations
+	// This handles re-entrant locks without file I/O
+	const existing = locks.get(normalizedPath);
+	if (existing && now - existing.timestamp < existing.timeout) {
+		if (existing.owner === lockOwner && allowReentrant) {
+			refreshLock(normalizedPath, workDir);
+			return true;
+		}
+		return false; // Someone else owns it
+	}
+
+	ensureLockDir(workDir);
+	const lockState = getGlobalLockState();
+	const lastCleanupTime = lockState._lastLockCleanup || 0;
+
+	if (now - lastCleanupTime > LOCK_CLEANUP_INTERVAL_MS) {
+		cleanupStaleLocks();
+		cleanupStaleLockFiles(workDir);
+		lockState._lastLockCleanup = now;
+	}
+
+	const lockFile = getLockFilePath(normalizedPath, workDir);
+
+	// Atomic lock acquisition using writeFileSync with exclusive flag
+	// This is the ONLY source of truth - in-memory cache is updated AFTER file succeeds
+	for (let attempt = 1; attempt <= maxRetries; attempt++) {
+		try {
+			const lockInfo = {
+				timestamp: Date.now(),
+				timeout: LOCK_TIMEOUT_MS,
+				owner: lockOwner,
+				refreshCount: 0,
+			};
+
+			// CRITICAL: Use writeFileSync with 'wx' flag for atomic creation
+			// This is the race condition prevention - only one process can succeed
+			writeFileSync(lockFile, JSON.stringify(lockInfo), { flag: "wx" });
+
+			// ONLY update in-memory cache AFTER successful file write
+			// This ensures file is the source of truth
+			locks.set(normalizedPath, lockInfo);
+
+			return true;
+		} catch (_error) {
+			const currentTime = Date.now();
+
+			// Check if we should retry based on lock file state
+			if (existsSync(lockFile)) {
+				try {
+					const content = readFileSync(lockFile, "utf8");
+
+					// Handle empty or corrupt lock file
+					if (!content || content.trim().length === 0) {
+						logDebug(`Lock file ${lockFile} is empty, removing`);
+						unlinkSync(lockFile);
+						continue;
+					}
+
+					let fileLockInfo: unknown;
+					try {
+						fileLockInfo = JSON.parse(content);
+					} catch (parseError) {
+						logDebug(`Failed to parse lock file ${lockFile}: ${parseError}`);
+						unlinkSync(lockFile);
+						continue;
+					}
+
+					// Validate lock info and check if stale
+					if (
+						fileLockInfo &&
+						typeof fileLockInfo === "object" &&
+						"timestamp" in fileLockInfo &&
+						typeof fileLockInfo.timestamp === "number" &&
+						"timeout" in fileLockInfo &&
+						typeof fileLockInfo.timeout === "number"
+					) {
+						// Check if lock is stale
+						if (currentTime - fileLockInfo.timestamp >= fileLockInfo.timeout) {
+							logDebug(`Removing stale lock file ${lockFile}`);
+							unlinkSync(lockFile);
+							continue; // Retry after removing stale lock
+						}
+
+						// Lock is valid and held by someone else
+						logDebug(`Lock file ${lockFile} is held by another process`);
+
+						// Check if it's our own lock (file exists but memory doesn't have it)
+						// Use type assertion for owner/refreshCount which may not be in older lock files
+						const typedLockInfo = fileLockInfo as LockInfo;
+						if (typedLockInfo.owner === lockOwner && allowReentrant) {
+							logDebug(`Reclaiming our own lock ${lockFile}`);
+							// Reclaim the lock in memory
+							locks.set(normalizedPath, {
+								timestamp: typedLockInfo.timestamp,
+								timeout: typedLockInfo.timeout,
+								owner: typedLockInfo.owner,
+								refreshCount: typedLockInfo.refreshCount || 0,
+							});
+							refreshLock(normalizedPath, workDir);
+							return true;
+						}
+					}
+				} catch (readError) {
+					logDebug(`Error reading lock file ${lockFile}: ${readError}`);
+					try {
+						unlinkSync(lockFile);
+					} catch (unlinkError) {
+						logDebug(`Failed to remove lock file ${lockFile}: ${unlinkError}`);
+					}
+				}
+			}
+
+			// Exponential backoff with jitter - use non-blocking approach
+			if (attempt < maxRetries) {
+				const baseDelay = 2 ** attempt * 100; // 100, 200, 400, 800, 1600ms
+				// Use cryptographically secure random for jitter (not Math.random())
+				const jitter = Number.parseInt(randomBytes(2).toString("hex"), 16) % 50; // 0-50ms jitter
+				const delay = Math.min(baseDelay + jitter, 5000); // Max 5 seconds
+
+				logDebug(
+					`Lock acquisition attempt ${attempt}/${maxRetries} failed, retrying in ${Math.round(delay)}ms`,
+				);
+				sleepBlocking(delay);
+			}
+		}
+	}
+	logDebug(`Failed to acquire lock after ${maxRetries} attempts: ${normalizedPath}`);
+	return false;
+}
+
+export function releaseFileLock(filePath: string, workDir: string): void {
+	const normalizedPath = normalizePathForLocking(filePath, workDir);
+	const inMemory = locks.get(normalizedPath);
+	if (inMemory && inMemory.owner !== lockOwner) {
+		logDebug(`Skipping release of lock not owned by this process: ${normalizedPath}`);
+		return;
+	}
+	locks.delete(normalizedPath);
+
+	// Remove persistent lock file
+	const lockFile = getLockFilePath(normalizedPath, workDir);
+	if (existsSync(lockFile)) {
+		try {
+			const content = readFileSync(lockFile, "utf8");
+			const fileLock = JSON.parse(content) as Partial<LockInfo>;
+			if (fileLock.owner && fileLock.owner !== lockOwner) {
+				logDebug(`Skipping delete of lock file owned by ${fileLock.owner}: ${lockFile}`);
+				return;
+			}
+			unlinkSync(lockFile);
+		} catch (err) {
+			logDebug(`Failed to delete lock file ${lockFile}: ${err}`);
+		}
+	}
+}
+
+export function acquireLocksForFiles(files: string[], workDir: string): boolean {
+	// Remove duplicates by normalizing paths first
+	const fileMap = new Map<string, string>();
+
+	for (const file of files) {
+		const normalizedPath = normalizePathForLocking(file, workDir);
+		if (!fileMap.has(normalizedPath)) {
+			fileMap.set(normalizedPath, file);
+		}
+	}
+
+	const uniqueFiles = Array.from(fileMap.values());
+	const acquiredThisAttempt: string[] = [];
+
+	try {
+		for (const file of uniqueFiles) {
+			if (acquireFileLock(file, workDir)) {
+				acquiredThisAttempt.push(file);
+			} else {
+				// Rollback: release only locks acquired in THIS attempt
+				for (const acquiredFile of acquiredThisAttempt) {
+					releaseFileLock(acquiredFile, workDir);
+				}
+				return false;
+			}
+		}
+		return true;
+	} catch (err) {
+		// Rollback on error
+		for (const acquiredFile of acquiredThisAttempt) {
+			releaseFileLock(acquiredFile, workDir);
+		}
+		throw err;
+	}
+}
+
+export function releaseLocksForFiles(files: string[], workDir: string): void {
+	for (const file of files) {
+		releaseFileLock(file, workDir);
+	}
+}
+
+export function clearAllLocks(): void {
+	locks.clear();
+}
+
+export function getActiveLocks(): string[] {
+	return Array.from(locks.keys());
+}
+
+export function cleanupStaleLocks(): void {
+	const now = Date.now();
+	const locksToEvict: string[] = [];
+
+	// Remove expired locks first
+	for (const [path, lockInfo] of locks.entries()) {
+		if (now - lockInfo.timestamp > lockInfo.timeout) {
+			locksToEvict.push(path);
+		}
+	}
+
+	// Notify before eviction
+	for (const path of locksToEvict) {
+		const lockInfo = locks.get(path);
+		if (lockInfo && lockInfo.owner !== lockOwner) {
+			logDebug(`Evicting lock owned by ${lockInfo.owner}: ${path}`);
+		}
+		locks.delete(path);
+	}
+
+	// If still too many, remove oldest but check ownership
+	if (locks.size > LOCK_MAX_LOCKS) {
+		logWarn(
+			`Lock registry size (${locks.size}) exceeded ${LOCK_MAX_LOCKS}. Evicting oldest non-own locks.`,
+		);
+
+		const sorted = Array.from(locks.entries()).sort((a, b) => a[1].timestamp - b[1].timestamp);
+
+		// Keep all locks owned by this process, evict oldest of others first
+		const others = sorted.filter(([_path, info]) => info.owner !== lockOwner);
+		const overflow = locks.size - LOCK_MAX_LOCKS;
+		const toEvictOthers = others.slice(0, Math.max(overflow, 0));
+
+		for (const [path] of toEvictOthers) {
+			logDebug(`Evicting lock from other process: ${path}`);
+			locks.delete(path);
+		}
+	}
+}
diff --git a/cli/src/execution/orchestrator.ts b/cli/src/execution/orchestrator.ts
new file mode 100644
index 00000000..3299d8c4
--- /dev/null
+++ b/cli/src/execution/orchestrator.ts
@@ -0,0 +1,348 @@
+/**
+ * Simplified Orchestrator for Test Model Integration
+ *
+ * Automatically runs tests after main model completes, no special markers needed.
+ * Test model analyzes results and suggests fixes if tests fail.
+ */
+
+import type { AIEngine, AIResult } from "../engines/types.ts";
+import { logDebug, logError, logWarn } from "../ui/logger.ts";
+import { StaticAgentDisplay } from "../ui/static-agent-display.ts";
+import {
+	canMakeConnectionAttempt,
+	circuitBreaker,
+	sleep,
+	waitForConnectionRestore,
+} from "./retry.ts";
+
+const MAX_CONTEXT_CHARS = 12000;
+
+function truncateContext(mainOutput: string): string {
+	if (mainOutput.length <= MAX_CONTEXT_CHARS) {
+		return mainOutput;
+	}
+
+	const omitted = mainOutput.length - MAX_CONTEXT_CHARS;
+	return `${mainOutput.slice(0, MAX_CONTEXT_CHARS)}\n\n[...output truncated, ${omitted} chars omitted...]`;
+}
+
+export interface OrchestratorOptions {
+	mainEngine: AIEngine;
+	testEngine?: AIEngine;
+	mainModel?: string;
+	testModel?: string;
+	workDir: string;
+	maxIterations?: number;
+	debug?: boolean;
+	/** Agent number for display updates */
+	agentNum?: number;
+}
+
+export interface OrchestratorResult {
+	success: boolean;
+	response: string;
+	iterations: number;
+	mainModelCalls: number;
+	testModelCalls: number;
+	error?: string;
+}
+
+async function executeWithRetry(
+	engine: AIEngine,
+	prompt: string,
+	workDir: string,
+	options: { modelOverride?: string },
+	maxRetries = 3,
+): Promise<AIResult> {
+	let lastError: string | undefined;
+
+	const circuitCheck = canMakeConnectionAttempt();
+	if (!circuitCheck.allowed) {
+		logError(`Circuit breaker preventing execution: ${circuitCheck.reason}`);
+		const restored = await waitForConnectionRestore(60000);
+		if (!restored) {
+			return {
+				success: false,
+				response: "",
+				inputTokens: 0,
+				outputTokens: 0,
+				error: circuitCheck.reason || "Connection circuit open - too many failures",
+			};
+		}
+	}
+
+	for (let attempt = 1; attempt <= maxRetries; attempt++) {
+		const attemptCheck = canMakeConnectionAttempt();
+		if (!attemptCheck.allowed) {
+			logError(`Circuit breaker preventing retry: ${attemptCheck.reason}`);
+			return {
+				success: false,
+				response: "",
+				inputTokens: 0,
+				outputTokens: 0,
+				error: attemptCheck.reason || "Connection circuit open - stopping retries",
+			};
+		}
+
+		let result: AIResult;
+		try {
+			result = await engine.execute(prompt, workDir, options);
+		} catch (error) {
+			result = {
+				success: false,
+				response: "",
+				inputTokens: 0,
+				outputTokens: 0,
+				error: error instanceof Error ? error.message : String(error),
+			};
+		}
+
+		if (result.success) {
+			circuitBreaker.recordSuccess();
+			return result;
+		}
+
+		lastError = result.error;
+
+		const isConnectionError =
+			/connection|network|timeout|unable to connect|internet connection|econnrefused|econnreset|socket hang up|dns|ENOTFOUND/i.test(
+				result.error || "",
+			);
+
+		if (isConnectionError) {
+			circuitBreaker.recordFailure(new Error(result.error || "Connection error"));
+
+			if (attempt < maxRetries) {
+				const delayMs = Math.min(2000 * 2 ** (attempt - 1), 30000);
+				logWarn(
+					`Connection error on attempt ${attempt}/${maxRetries}. Retrying in ${delayMs}ms...`,
+				);
+				await sleep(delayMs);
+
+				const postFailureCheck = canMakeConnectionAttempt();
+				if (!postFailureCheck.allowed) {
+					logError(`Circuit opened after ${attempt} attempts: ${postFailureCheck.reason}`);
+					return {
+						success: false,
+						response: "",
+						inputTokens: 0,
+						outputTokens: 0,
+						error: postFailureCheck.reason || `Connection failed after ${attempt} attempts`,
+					};
+				}
+			} else {
+				break;
+			}
+		} else if (attempt >= maxRetries) {
+			break;
+		} else {
+			const delayMs = Math.min(1000 * 2 ** (attempt - 1), 10000);
+			logWarn(
+				`Attempt ${attempt}/${maxRetries} failed: ${result.error || "Unknown error"}. Retrying in ${delayMs}ms...`,
+			);
+			await sleep(delayMs);
+		}
+	}
+
+	return {
+		success: false,
+		response: "",
+		inputTokens: 0,
+		outputTokens: 0,
+		error: lastError || "All retry attempts failed",
+	};
+}
+
+function buildTestPrompt(mainOutput: string, _workDir: string): string {
+	return `You are a test runner. Your job is to verify that the implementation is correct by RUNNING the actual tests.
+
+## Previous Implementation Work
+
+${truncateContext(mainOutput)}
+
+## Your Task
+
+1. First, identify what test framework is being used (jest, pytest, npm test, cargo test, etc.)
+2. Run the tests using the appropriate command
+3. Report the results clearly:
+   - How many tests passed/failed
+   - Any error messages
+   - Specific files that failed
+
+## Commands to try (in order):
+- npm test
+- npm run test
+- yarn test
+- pnpm test
+- pytest
+- python -m pytest
+- cargo test
+- go test
+- make test
+
+## Output Format
+
+Report your findings in this format:
+
+TEST RESULTS:
+- Framework: <name>
+- Command: <command you ran>
+- Passed: <N>
+- Failed: <N>
+- Status: PASS / FAIL / PARTIAL
+
+DETAILS:
+<specific failures or "All tests passed">`;
+}
+
+function buildFixPrompt(originalPrompt: string, mainOutput: string, testResults: string): string {
+	return `${originalPrompt}
+
+## Your Previous Implementation
+
+${truncateContext(mainOutput)}
+
+## Test Results
+
+${testResults}
+
+## Instructions
+
+The tests have revealed issues. Please:
+1. Fix the problems identified in the test results
+2. Run tests again to verify fixes
+3. Provide the corrected implementation`;
+}
+
+/**
+ * Execute with orchestrator pattern - automatically runs tests after main model
+ */
+export async function executeWithOrchestrator(
+	prompt: string,
+	options: OrchestratorOptions,
+	onProgress?: (step: string) => void,
+): Promise<OrchestratorResult> {
+	const { mainEngine, testEngine, mainModel, testModel, workDir, debug = false } = options;
+
+	const reportProgress = (message: string) => {
+		if (debug) logDebug(`[Orchestrator] ${message}`);
+		onProgress?.(message);
+	};
+
+	reportProgress("Starting execution with test feedback");
+
+	// Step 1: Run main model to implement the task
+	reportProgress("Running main model...");
+	const mainResult = await executeWithRetry(mainEngine, prompt, workDir, {
+		modelOverride: mainModel,
+	});
+
+	if (!mainResult.success) {
+		return {
+			success: false,
+			response: mainResult.response,
+			iterations: 1,
+			mainModelCalls: 1,
+			testModelCalls: 0,
+			error: `Main model failed: ${mainResult.error}`,
+		};
+	}
+
+	const mainOutput = mainResult.response || "";
+	reportProgress("Main model complete, running tests...");
+
+	// Update display to show test model is running
+	const display = StaticAgentDisplay.getInstance();
+	if (display && options.agentNum !== undefined) {
+		const currentTitle = display.getAgentTaskTitle(options.agentNum) || "Orchestrator task";
+		display.setAgentStatus(
+			options.agentNum,
+			currentTitle,
+			"working",
+			"testing",
+			testModel || "test",
+		);
+	}
+
+	// Step 2: Run test model to verify the work
+	reportProgress(`Sending to test model (${testModel || "default"})...`);
+	const testPrompt = buildTestPrompt(mainOutput, workDir);
+	const testEngineToUse = testEngine || mainEngine;
+	reportProgress("Test prompt ready, executing test model...");
+	const testResult = await executeWithRetry(testEngineToUse, testPrompt, workDir, {
+		modelOverride: testModel,
+	});
+
+	const testOutput = testResult.success
+		? testResult.response || "Tests completed"
+		: `Test execution failed: ${testResult.error}`;
+
+	reportProgress(`Test model complete. Response length: ${testOutput.length} chars`);
+	reportProgress(`Test output preview: ${testOutput.slice(0, 100)}...`);
+
+	// Check if tests indicate failures that need fixing
+	const hasFailures =
+		/\b\d+\s*(tests?|specs?|assertions?)\s*(failed|failing)\b/i.test(testOutput) ||
+		/\b[1-9]\d*\s+failed\b/i.test(testOutput) ||
+		/\bfailed:\s*[1-9]\d*\b/i.test(testOutput) ||
+		/[✗❌]\s*\d+/i.test(testOutput);
+
+	if (!hasFailures) {
+		// Tests passed or no issues found
+		return {
+			success: true,
+			response: `${mainOutput}\n\n---\n\nTest Results:\n${testOutput}`,
+			iterations: 1,
+			mainModelCalls: 1,
+			testModelCalls: 1,
+		};
+	}
+
+	// Step 3: Tests failed - run main model again with fix instructions
+	reportProgress("Issues found, requesting fixes...");
+	const fixPrompt = buildFixPrompt(prompt, mainOutput, testOutput);
+	const fixResult = await executeWithRetry(mainEngine, fixPrompt, workDir, {
+		modelOverride: mainModel,
+	});
+
+	if (!fixResult.success) {
+		return {
+			success: false,
+			response: `${mainOutput}\n\n---\n\nTest Results:\n${testOutput}`,
+			iterations: 2,
+			mainModelCalls: 2,
+			testModelCalls: 1,
+			error: `Failed to fix issues: ${fixResult.error}`,
+		};
+	}
+
+	return {
+		success: true,
+		response: `${fixResult.response}\n\n---\n\nOriginal Test Results:\n${testOutput}`,
+		iterations: 2,
+		mainModelCalls: 2,
+		testModelCalls: 1,
+	};
+}
+
+/**
+ * Check if orchestrator pattern should be used for this task
+ */
+export function shouldUseOrchestrator(
+	taskTitle: string,
+	taskDescription: string,
+	testModel?: string,
+): boolean {
+	if (!testModel) return false;
+
+	const combined = `${taskTitle} ${taskDescription}`.toLowerCase();
+
+	// Use orchestrator for tasks that likely need testing
+	const testKeywords = ["test", "spec", "jest", "vitest", "mocha", "cypress", "playwright"];
+	const implKeywords = ["implement", "create feature", "fix bug", "debug", "failing"];
+
+	return (
+		testKeywords.some((kw) => combined.includes(kw)) ||
+		implKeywords.some((kw) => combined.includes(kw))
+	);
+}
diff --git a/cli/src/execution/parallel.ts b/cli/src/execution/parallel.ts
index 5318088e..53f2a6a0 100644
--- a/cli/src/execution/parallel.ts
+++ b/cli/src/execution/parallel.ts
@@ -1,5 +1,5 @@
 import { copyFileSync, cpSync, existsSync, mkdirSync } from "node:fs";
-import { join } from "node:path";
+import { dirname, isAbsolute, join, normalize, relative, resolve, sep } from "node:path";
 import simpleGit from "simple-git";
 import { PROGRESS_FILE, RALPHY_DIR } from "../config/loader.ts";
 import { logTaskProgress } from "../config/writer.ts";
@@ -41,6 +41,30 @@ interface ParallelAgentResult {
 	usedSandbox?: boolean;
 }
 
+function resolveSafeRelativePath(baseDir: string, candidatePath: string): string | null {
+	if (!candidatePath || isAbsolute(candidatePath)) {
+		return null;
+	}
+
+	const normalized = normalize(candidatePath);
+	const resolved = resolve(baseDir, normalized);
+	const rel = relative(baseDir, resolved);
+
+	if (rel === "" || rel === ".") {
+		return normalized;
+	}
+
+	if (rel.startsWith(`..${sep}`) || rel === "..") {
+		return null;
+	}
+
+	if (isAbsolute(rel)) {
+		return null;
+	}
+
+	return rel;
+}
+
 /**
  * Run a single agent in a worktree
  */
@@ -66,6 +90,11 @@ async function runAgentInWorktree(
 	let branchName = "";
 
 	try {
+		const safePrdPath = resolveSafeRelativePath(originalDir, prdFile);
+		if (!safePrdPath) {
+			throw new Error(`Invalid PRD path outside project: ${prdFile}`);
+		}
+
 		// Create worktree
 		const worktree = await createAgentWorktree(
 			task.title,
@@ -80,16 +109,23 @@ async function runAgentInWorktree(
 		logDebug(`Agent ${agentNum}: Created worktree at ${worktreeDir}`);
 
 		// Copy PRD file or folder to worktree
-		if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(worktreeDir, prdFile);
+		if (
+			prdSource === "markdown" ||
+			prdSource === "yaml" ||
+			prdSource === "json" ||
+			prdSource === "csv"
+		) {
+			const srcPath = join(originalDir, safePrdPath);
+			const destPath = join(worktreeDir, safePrdPath);
 			if (existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				copyFileSync(srcPath, destPath);
 			}
 		} else if (prdSource === "markdown-folder" && prdIsFolder) {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(worktreeDir, prdFile);
+			const srcPath = join(originalDir, safePrdPath);
+			const destPath = join(worktreeDir, safePrdPath);
 			if (existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				cpSync(srcPath, destPath, { recursive: true });
 			}
 		}
@@ -161,6 +197,11 @@ async function runAgentInSandbox(
 	const branchName = "";
 
 	try {
+		const safePrdPath = resolveSafeRelativePath(originalDir, prdFile);
+		if (!safePrdPath) {
+			throw new Error(`Invalid PRD path outside project: ${prdFile}`);
+		}
+
 		// Create sandbox
 		const sandboxResult = await createSandbox({
 			originalDir,
@@ -173,16 +214,23 @@ async function runAgentInSandbox(
 		);
 
 		// Copy PRD file or folder to sandbox (same as worktree mode)
-		if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(sandboxDir, prdFile);
+		if (
+			prdSource === "markdown" ||
+			prdSource === "yaml" ||
+			prdSource === "json" ||
+			prdSource === "csv"
+		) {
+			const srcPath = join(originalDir, safePrdPath);
+			const destPath = join(sandboxDir, safePrdPath);
 			if (existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				copyFileSync(srcPath, destPath);
 			}
 		} else if (prdSource === "markdown-folder" && prdIsFolder) {
-			const srcPath = join(originalDir, prdFile);
-			const destPath = join(sandboxDir, prdFile);
+			const srcPath = join(originalDir, safePrdPath);
+			const destPath = join(sandboxDir, safePrdPath);
 			if (existsSync(srcPath)) {
+				mkdirSync(dirname(destPath), { recursive: true });
 				cpSync(srcPath, destPath, { recursive: true });
 			}
 		}
@@ -380,13 +428,13 @@ export async function runParallel(
 
 		// Run agents in parallel (using sandbox or worktree mode)
 		const promises = batch.map((task) => {
-			globalAgentNum++;
+			const agentId = ++globalAgentNum;
 
 			const runInSandbox = () =>
 				runAgentInSandbox(
 					engine,
 					task,
-					globalAgentNum,
+					agentId,
 					getSandboxBase(workDir),
 					workDir,
 					prdSource,
@@ -408,7 +456,7 @@ export async function runParallel(
 			return runAgentInWorktree(
 				engine,
 				task,
-				globalAgentNum,
+				agentId,
 				baseBranch,
 				isolationBase,
 				workDir,
@@ -424,7 +472,7 @@ export async function runParallel(
 				engineArgs,
 			).then((res) => {
 				if (shouldFallbackToSandbox(res.error)) {
-					logWarn(`Agent ${globalAgentNum}: Worktree unavailable, retrying in sandbox mode.`);
+					logWarn(`Agent ${agentId}: Worktree unavailable, retrying in sandbox mode.`);
 					if (res.worktreeDir) {
 						cleanupAgentWorktree(res.worktreeDir, res.branchName, workDir).catch(() => {
 							// Ignore cleanup failures during fallback
diff --git a/cli/src/execution/planning.ts b/cli/src/execution/planning.ts
new file mode 100644
index 00000000..dc1e4b50
--- /dev/null
+++ b/cli/src/execution/planning.ts
@@ -0,0 +1,613 @@
+import { createHash } from "node:crypto";
+import {
+	existsSync,
+	lstatSync,
+	readFileSync,
+	readdirSync,
+	unlinkSync,
+	writeFileSync,
+} from "node:fs";
+import { isAbsolute, join, normalize } from "node:path";
+import { gunzipSync, gzipSync } from "node:zlib";
+import { DEFAULT_MAX_REPLANS, PLANNING_CACHE_FILE } from "../config/constants.ts";
+import { RALPHY_DIR } from "../config/loader.ts";
+import type { AIEngine, AIResult } from "../engines/types.ts";
+import type { Task } from "../tasks/types.ts";
+import { logDebug, logWarn } from "../ui/logger.ts";
+import { extractTaskKeywords, getRelevantFilesForTask } from "../utils/file-indexer.ts";
+import type { PlanningProgressCallback, PlanningProgressEvent } from "./progress-types.ts";
+import { buildPlanningPrompt } from "./prompt.ts";
+
+// Re-export PlanningProgressEvent from ui module for backward compatibility
+export type { PlanningProgressCallback, PlanningProgressEvent } from "./progress-types.ts";
+
+export function getPlanningCacheFile(workDir: string): string {
+	return join(workDir, RALPHY_DIR, PLANNING_CACHE_FILE);
+}
+
+interface RepoFingerprint {
+	fileStates: Map<string, { mtime: number; size: number; hash: string }>;
+	dirHash: string;
+	timestamp: number;
+}
+
+const fingerprintCache = new Map<string, RepoFingerprint>();
+
+export function generateRepoFingerprint(workDir: string): string {
+	const cached = fingerprintCache.get(workDir);
+	const now = Date.now();
+
+	// Check if cache is very recent (1 minute) for high-frequency calls
+	if (cached && now - cached.timestamp < 60000) {
+		return cached.dirHash;
+	}
+
+	const keyFiles = [
+		"package.json",
+		"pyproject.toml",
+		"Cargo.toml",
+		"go.mod",
+		"requirements.txt",
+		"pnpm-lock.yaml",
+		"package-lock.json",
+		"yarn.lock",
+	];
+	const fileStates = new Map<string, { mtime: number; size: number; hash: string }>();
+	let changed = !cached;
+
+	for (const file of keyFiles) {
+		const filePath = join(workDir, file);
+		if (existsSync(filePath)) {
+			try {
+				const stat = lstatSync(filePath);
+				const mtime = stat.mtimeMs;
+				const size = stat.size;
+
+				const cachedState = cached?.fileStates.get(file);
+				if (cachedState && cachedState.mtime === mtime && cachedState.size === size) {
+					fileStates.set(file, cachedState);
+				} else {
+					const content = readFileSync(filePath);
+					const hash = createHash("sha256").update(content).digest("hex");
+					fileStates.set(file, { mtime, size, hash });
+					changed = true;
+				}
+			} catch {
+				// Ignore errors
+			}
+		}
+	}
+
+	// Also factor in top-level directory structure changes
+	let dirFingerprint = "";
+	try {
+		const entries = readdirSync(workDir, { withFileTypes: true })
+			.filter((entry) => entry.isDirectory())
+			.map((entry) => entry.name)
+			.sort();
+		dirFingerprint = entries.join(",");
+		if (cached && cached.fileStates?.get("dirs")?.hash !== dirFingerprint) {
+			changed = true;
+		}
+		fileStates.set("dirs", { mtime: 0, size: 0, hash: dirFingerprint });
+	} catch {
+		// Ignore errors
+	}
+
+	if (!changed && cached) {
+		// Update timestamp but keep dirHash
+		cached.timestamp = now;
+		return cached.dirHash;
+	}
+
+	const combinedHashes = Array.from(fileStates.entries())
+		.sort(([a], [b]) => a.localeCompare(b))
+		.map(([file, state]) => `${file}:${state.hash}`)
+		.join("|");
+
+	const dirHash = createHash("sha256").update(combinedHashes).digest("hex");
+
+	fingerprintCache.set(workDir, {
+		fileStates,
+		dirHash,
+		timestamp: now,
+	});
+
+	return dirHash;
+}
+
+export function loadPlanningCache(
+	workDir: string,
+): Map<string, { files: string[]; timestamp: number; repoFingerprint: string }> {
+	const cacheFile = getPlanningCacheFile(workDir);
+	const compressedCacheFile = `${cacheFile}.gz`;
+
+	if (existsSync(compressedCacheFile)) {
+		try {
+			const compressed = readFileSync(compressedCacheFile);
+			const data = JSON.parse(gunzipSync(compressed).toString("utf-8"));
+			return new Map(Object.entries(data));
+		} catch (error) {
+			logWarn(`Failed to load compressed planning cache: ${error}`);
+			// Fall through
+		}
+	}
+
+	if (!existsSync(cacheFile)) {
+		return new Map();
+	}
+
+	try {
+		const data = JSON.parse(readFileSync(cacheFile, "utf-8"));
+		return new Map(Object.entries(data));
+	} catch (error) {
+		logWarn(`Failed to load planning cache: ${error}`);
+		return new Map();
+	}
+}
+
+export function savePlanningCache(
+	workDir: string,
+	cache: Map<string, { files: string[]; timestamp: number; repoFingerprint: string }>,
+): void {
+	const cacheFile = getPlanningCacheFile(workDir);
+	const compressedCacheFile = `${cacheFile}.gz`;
+	const data = Object.fromEntries(cache);
+	const jsonStr = JSON.stringify(data);
+
+	try {
+		const compressed = gzipSync(Buffer.from(jsonStr, "utf-8"));
+		writeFileSync(compressedCacheFile, compressed);
+
+		if (existsSync(cacheFile)) {
+			try {
+				unlinkSync(cacheFile);
+			} catch {}
+		}
+	} catch {
+		writeFileSync(cacheFile, JSON.stringify(data, null, 2));
+	}
+}
+
+export function generateTaskHash(task: Task): string {
+	const raw = `${task.id}:${task.title}`;
+	return createHash("sha256").update(raw).digest("hex").slice(0, 16);
+}
+
+export function normalizePlannedPath(filePath: string): string {
+	let processed = filePath.trim();
+
+	// Strip leading bullets (*, -, +)
+	processed = processed.replace(/^[*\-+]\s+/, "");
+
+	// Strip leading numbering (1., 1), etc.)
+	processed = processed.replace(/^\d+[.)]\s+/, "");
+
+	// Strip wrapping backticks if present
+	processed = processed.replace(/^`+|`+$/g, "");
+
+	// Remove leading ./
+	if (processed.startsWith("./")) {
+		processed = processed.substring(2);
+	}
+
+	// Normalize path separators
+	processed = normalize(processed);
+
+	if (!processed || isAbsolute(processed) || processed.startsWith("..")) {
+		return "";
+	}
+
+	return processed;
+}
+
+export function parsePlannedFiles(response: string): string[] {
+	const files = new Set<string>();
+
+	// Robust Regex approach for <FILES> blocks
+	const filesMatch = response.match(/<FILES>([\s\S]*?)<\/FILES>/i);
+	if (filesMatch) {
+		const content = filesMatch[1];
+		const lines = content.split(/\r?\n/);
+		for (const line of lines) {
+			const trimmed = line.trim();
+			if (trimmed && !trimmed.startsWith("#") && !trimmed.startsWith("<")) {
+				const normalizedPath = normalizePlannedPath(trimmed);
+				if (normalizedPath) {
+					files.add(normalizedPath);
+				}
+			}
+		}
+	} else {
+		// Fallback: look for lines that look like paths if the block tags are missing/broken
+		const lines = response.split(/\r?\n/);
+		let inManualBlock = false;
+		for (const line of lines) {
+			const trimmed = line.trim();
+			if (
+				trimmed.toUpperCase().includes("FILES") &&
+				(trimmed.includes("<") || trimmed.includes("["))
+			) {
+				inManualBlock = true;
+				continue;
+			}
+			if (inManualBlock && trimmed === "") continue;
+			if (
+				inManualBlock &&
+				(trimmed.startsWith("/") ||
+					trimmed.startsWith("./") ||
+					trimmed.startsWith("../") ||
+					/^[a-zA-Z0-9_\-.]+\/[a-zA-Z0-9_\-./]+/.test(trimmed))
+			) {
+				const normalizedPath = normalizePlannedPath(trimmed);
+				if (normalizedPath) {
+					files.add(normalizedPath);
+				}
+			}
+		}
+	}
+
+	return Array.from(files);
+}
+
+function parseEnhancedPlanning(response: string): {
+	analysis?: string;
+	plan?: string[];
+	optimization?: string;
+} {
+	// Use robust regex approach for tags
+	const analysisMatch = response.match(/<ANALYSIS>([\s\S]*?)<\/ANALYSIS>/i);
+	const planMatch = response.match(/<PLAN>([\s\S]*?)<\/PLAN>/i);
+	const optimizationMatch = response.match(/<OPTIMIZATION>([\s\S]*?)<\/OPTIMIZATION>/i);
+
+	const analysis = analysisMatch ? analysisMatch[1].trim() : undefined;
+	const optimization = optimizationMatch ? optimizationMatch[1].trim() : undefined;
+
+	let plan: string[] | undefined;
+	if (planMatch) {
+		const content = planMatch[1];
+		const lines = content.split(/\r?\n/);
+		const planSteps: string[] = [];
+		for (const line of lines) {
+			const trimmed = line.trim();
+			if (trimmed) {
+				// Parse numbered steps (strip bullet points if present)
+				let stepText = trimmed;
+				if (stepText.startsWith("- ")) {
+					stepText = stepText.substring(2);
+				}
+				const stepMatch = stepText.match(/^\d+\.\s*(.*)/);
+				if (stepMatch) {
+					planSteps.push(stepMatch[1]);
+				} else if (!line.startsWith("<") && !line.startsWith("</") && stepText.length > 0) {
+					planSteps.push(stepText);
+				}
+			}
+		}
+		if (planSteps.length > 0) {
+			plan = planSteps;
+		}
+	}
+
+	return {
+		analysis,
+		plan,
+		optimization,
+	};
+}
+
+export interface PlanningResult {
+	files: string[];
+	analysis?: string;
+	plan?: string[];
+	optimization?: string;
+	noFilesNeeded?: boolean;
+	error?: string;
+}
+
+export async function planTaskFiles(
+	engine: AIEngine,
+	task: Task,
+	workDir: string,
+	modelOverride?: string,
+	maxReplans = DEFAULT_MAX_REPLANS,
+	planningModel?: string,
+	fullTasksContext?: string,
+	debug?: boolean,
+	onProgress?: PlanningProgressCallback,
+	debugOpenCode?: boolean,
+	logThoughts?: boolean,
+	engineArgs?: string[],
+): Promise<PlanningResult> {
+	const taskId = task.title && task.title !== "No title" ? task.title : task.id || "unknown";
+
+	// Use semantic chunking to get relevant files for this task
+	let relevantFiles: string[] = [];
+	try {
+		const taskDescription = `${task.title || ""} ${task.description || ""}`;
+		relevantFiles = await getRelevantFilesForTask(workDir, taskDescription, {
+			maxFiles: 50,
+			minRelevance: 0.1,
+		});
+		logDebug(`Semantic chunking found ${relevantFiles.length} relevant files for task "${taskId}"`);
+
+		// Log extracted keywords for debugging
+		const keywords = extractTaskKeywords(taskDescription);
+		logDebug(`Task keywords: ${keywords.join(", ")}`);
+	} catch (error) {
+		logDebug(`Failed to get relevant files for task: ${error}`);
+		// Continue without semantic chunking - planning will use full codebase
+	}
+
+	// Build prompt with relevant files context if available
+	const prompt = buildPlanningPrompt(task, fullTasksContext, relevantFiles);
+
+	// Emit planning started
+	if (onProgress) {
+		try {
+			onProgress({
+				taskId,
+				status: "started",
+				timestamp: Date.now(),
+				message:
+					relevantFiles.length > 0
+						? `Planning with ${relevantFiles.length} relevant files...`
+						: "Planning...",
+			});
+		} catch (error) {
+			// Don't let progress callback errors break planning
+			logDebug(`Progress callback error: ${error}`);
+		}
+	}
+
+	// Use planningModel if provided, otherwise default to modelOverride or engine default
+	const options = {
+		modelOverride: planningModel || modelOverride || undefined,
+		...(debugOpenCode && { debugOpenCode }),
+		...(logThoughts !== undefined && { logThoughts }),
+		...(engineArgs && engineArgs.length > 0 && { engineArgs }),
+	};
+
+	let result: AIResult;
+	if (onProgress && engine.executeStreaming) {
+		// Emit starting status
+		try {
+			onProgress({
+				taskId,
+				status: "started",
+				timestamp: Date.now(),
+				message: "Starting planning analysis...",
+			});
+		} catch (error) {
+			logDebug(`Progress callback error: ${error}`);
+		}
+
+		// Create wrapper for streaming progress
+		const streamingCallback = (step: string) => {
+			try {
+				// Parse step to determine status and extract meaningful action
+				let status: PlanningProgressEvent["status"] = "thinking";
+				let message = step;
+
+				// Detect specific actions for better display
+				if (step.includes("analyzing") || step.includes("I need to") || step.includes("I should")) {
+					status = "analyzing";
+				} else if (
+					step.includes("planning") ||
+					step.includes("I'll create") ||
+					step.includes("Let me create")
+				) {
+					status = "planning";
+				} else if (
+					step.includes("Reading") ||
+					step.includes("Looking at") ||
+					step.includes("Let me examine")
+				) {
+					status = "analyzing";
+					message = "Reading project structure and files";
+				} else if (
+					step.includes("identifying") ||
+					step.includes("found") ||
+					step.includes("need to modify")
+				) {
+					status = "planning";
+					message = "Identifying files that need changes";
+				} else if (step.includes("completed") || step.includes("done") || step.includes("ready")) {
+					status = "completed";
+					message = "Planning complete - ready to implement";
+				} else if (step.includes("failed") || step.includes("error")) {
+					status = "failed";
+					message = "Planning encountered an issue";
+				}
+
+				// Extract reward if present in step (e.g., "reward: 0.85")
+				const rewardMatch = step.match(/reward:\s*([0-9.]+)/i);
+				const reward = rewardMatch ? Number.parseFloat(rewardMatch[1]) : undefined;
+
+				onProgress({
+					taskId,
+					status,
+					reward,
+					message: message,
+					timestamp: Date.now(),
+				});
+			} catch (error) {
+				logDebug(`Streaming progress callback error: ${error}`);
+			}
+		};
+		result = await engine.executeStreaming(prompt, workDir, streamingCallback, options);
+	} else {
+		// Non-streaming: emit thinking status before execution
+		if (onProgress) {
+			try {
+				onProgress({
+					taskId,
+					status: "thinking",
+					timestamp: Date.now(),
+					message: "Processing planning request...",
+				});
+			} catch (error) {
+				logDebug(`Progress callback error: ${error}`);
+			}
+		}
+		result = await engine.execute(prompt, workDir, options);
+	}
+
+	if (!result.success) {
+		const rawResponse = result.response || "";
+		const error = result.error || "Planning failed";
+
+		// Detect if AI returned raw tool_use JSON instead of structured planning
+		const isRawToolUse = rawResponse.trim().startsWith('{"type":"tool_use"');
+
+		if (isRawToolUse) {
+			// Try to extract what file/tool the AI was trying to access
+			let toolInfo = "";
+			try {
+				const parsed = JSON.parse(rawResponse);
+				if (parsed.part?.tool) {
+					toolInfo = ` (tool: ${parsed.part.tool})`;
+				}
+				if (parsed.part?.state?.input?.filePath) {
+					toolInfo += ` file: ${parsed.part.state.input.filePath}`;
+				}
+			} catch {
+				// Ignore parse errors
+			}
+
+			const helpfulError = `Planning failed: AI returned tool output${toolInfo} instead of planning analysis. The AI may have started executing prematurely. This usually indicates the planning prompt was too complex or the AI engine interrupted the planning phase.`;
+			logDebug(
+				`Raw tool use detected instead of planning format. Response: ${rawResponse.substring(0, 500)}...`,
+			);
+
+			if (onProgress) {
+				try {
+					onProgress({
+						taskId,
+						status: "failed",
+						timestamp: Date.now(),
+						message: helpfulError,
+					});
+				} catch (err) {
+					logDebug(`Progress callback error: ${err}`);
+				}
+			}
+
+			if (maxReplans > 0) {
+				logDebug(
+					`Planning failed with malformed response, retrying... (${maxReplans} attempts left)`,
+				);
+				return planTaskFiles(
+					engine,
+					task,
+					workDir,
+					modelOverride,
+					maxReplans - 1,
+					planningModel,
+					fullTasksContext,
+					debug,
+					onProgress,
+					debugOpenCode,
+					logThoughts,
+					engineArgs,
+				);
+			}
+			return { files: [], error: helpfulError };
+		}
+
+		// Regular failure - emit failed status
+		if (onProgress) {
+			try {
+				onProgress({
+					taskId,
+					status: "failed",
+					timestamp: Date.now(),
+					message: error,
+				});
+			} catch (err) {
+				logDebug(`Progress callback error: ${err}`);
+			}
+		}
+
+		if (maxReplans > 0) {
+			// Check if this is a connection error that warrants a longer retry delay
+			const isConnectionError =
+				/connection|network|timeout|unable to connect|internet connection|econnrefused|econnreset/i.test(
+					error,
+				);
+			const attemptNumber = DEFAULT_MAX_REPLANS - maxReplans + 1;
+
+			if (isConnectionError) {
+				// Exponential backoff for connection errors: 2s, 4s, 8s
+				const delayMs = Math.min(2000 * 2 ** (attemptNumber - 1), 10000);
+				logWarn(
+					`Connection error detected. Retrying in ${delayMs}ms... (${maxReplans} attempts left)`,
+				);
+
+				if (onProgress) {
+					try {
+						onProgress({
+							taskId,
+							status: "thinking",
+							timestamp: Date.now(),
+							message: `Connection error. Retrying in ${delayMs}ms... (${maxReplans} attempts left)`,
+						});
+					} catch (err) {
+						logDebug(`Progress callback error: ${err}`);
+					}
+				}
+
+				await new Promise((resolve) => setTimeout(resolve, delayMs));
+			} else {
+				logDebug(`Planning failed, retrying... (${maxReplans} attempts left). Error: ${error}`);
+			}
+
+			return planTaskFiles(
+				engine,
+				task,
+				workDir,
+				modelOverride,
+				maxReplans - 1,
+				planningModel,
+				fullTasksContext,
+				debug,
+				onProgress,
+				debugOpenCode,
+				logThoughts,
+				engineArgs,
+			);
+		}
+		return { files: [], error };
+	}
+
+	const files = parsePlannedFiles(result.response || "");
+	const parsed = parseEnhancedPlanning(result.response || "");
+
+	// Emit completed status
+	if (onProgress) {
+		try {
+			onProgress({
+				taskId,
+				status: "completed",
+				timestamp: Date.now(),
+				message: `Planned ${files.length} files with ${parsed.plan?.length || 0} steps`,
+				metadata: {
+					fileCount: files.length,
+					files: files.slice(0, 10),
+					hasAnalysis: !!parsed.analysis,
+					hasPlan: !!parsed.plan,
+					hasOptimization: !!parsed.optimization,
+				},
+			});
+		} catch (error) {
+			logDebug(`Progress callback error: ${error}`);
+		}
+	}
+
+	return {
+		files,
+		analysis: parsed.analysis,
+		plan: parsed.plan,
+		optimization: parsed.optimization,
+	};
+}
diff --git a/cli/src/execution/progress-types.ts b/cli/src/execution/progress-types.ts
new file mode 100644
index 00000000..9dba9c07
--- /dev/null
+++ b/cli/src/execution/progress-types.ts
@@ -0,0 +1,42 @@
+/**
+ * High-level execution phase - stable throughout the workflow
+ */
+export type ExecutionPhase = "planning" | "execution" | "testing";
+
+/**
+ * Detailed current activity - for display purposes only, shown below
+ */
+export type CurrentActivity = "analyzing" | "reading" | "writing" | "thinking" | "running-tests" | "debugging" | "idle";
+
+export interface AgentProgress {
+	agentNum: number;
+	taskTitle: string;
+	worktreeDir: string;
+	status: "planning" | "working" | "completed" | "failed";
+	/** High-level phase: PLANNING → EXECUTION → TESTING */
+	phase?: ExecutionPhase;
+	/** Which model is currently running (e.g., "main", "planning", "test") */
+	modelName?: string;
+	/** Detailed current action shown below */
+	currentActivity?: string;
+	progress?: string;
+	currentStep?: string;
+	recentSteps?: string[];
+	/** Steps the agent plans to do (extracted from agent's output) */
+	plannedSteps?: string[];
+	/** The model's thought pipeline - what it's thinking, goals, what it needs to do */
+	thoughtPipeline?: string[];
+	startTime: number;
+}
+
+
+export interface PlanningProgressEvent {
+	taskId: string;
+	status: "started" | "thinking" | "completed" | "error" | string;
+	timestamp: number;
+	message?: string;
+	metadata?: Record<string, unknown>;
+	reward?: number;
+}
+
+export type PlanningProgressCallback = (event: PlanningProgressEvent) => void;
diff --git a/cli/src/execution/prompt.test.ts b/cli/src/execution/prompt.test.ts
index 25e85238..2b5b6735 100644
--- a/cli/src/execution/prompt.test.ts
+++ b/cli/src/execution/prompt.test.ts
@@ -1,4 +1,4 @@
-import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
 import { mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
diff --git a/cli/src/execution/prompt.ts b/cli/src/execution/prompt.ts
index 19690778..bff43b62 100644
--- a/cli/src/execution/prompt.ts
+++ b/cli/src/execution/prompt.ts
@@ -1,7 +1,60 @@
-import { existsSync } from "node:fs";
+import type { Dirent } from "node:fs";
+import { existsSync, lstatSync, readFileSync, readdirSync } from "node:fs";
 import { join } from "node:path";
+import process from "node:process";
 import { loadBoundaries, loadProjectContext, loadRules } from "../config/loader.ts";
+import type { Task } from "../tasks/types.ts";
+import { logDebug } from "../ui/logger.ts";
 import { getBrowserInstructions, isBrowserAvailable } from "./browser.ts";
+import { getSkillsAsCsv } from "./skill-compress.ts";
+
+// =============================================================================
+// CONSTANTS
+// =============================================================================
+
+const RALPHY_PROTECTED_PATHS = [
+	".ralphy/progress.txt",
+	".ralphy-worktrees",
+	".ralphy-sandboxes",
+] as const;
+
+const SKILL_DIRECTORIES = [".opencode/skills", ".claude/skills", ".skills"] as const;
+
+const PLANNING_SECTIONS = [
+	"<ANALYSIS>",
+	" - Problem: [What is the actual problem being solved?]",
+	" - Goal: [What is the desired end state?]",
+	" - Complexity: [low/medium/high]",
+	" - Risks: [Potential challenges or edge cases]",
+	"</ANALYSIS>",
+	"",
+	"<PLAN>",
+	"1. [Step 1: What to do first]",
+	"2. [Step 2: Analysis or research needed]",
+	"3. [Step 3: Implementation approach]",
+	"4. [Step 4: Testing/validation]",
+	"5. [Step 5: Final integration or cleanup]",
+	"</PLAN>",
+	"",
+	"<FILES>",
+	"path/to/file1.ext",
+	"path/to/file2.ext",
+	"...",
+	"</FILES>",
+	"",
+	"<OPTIMIZATION>",
+	" - Most efficient approach: [How to implement this optimally]",
+	" - Key considerations: [Technical factors to remember]",
+	" - Potential shortcuts: [Ways to accomplish this faster/better]",
+	"</OPTIMIZATION>",
+] as const;
+
+// Default rules that should always be included
+const DEFAULT_RULES = ["Keep changes focused and minimal. Do not refactor unrelated code."];
+
+// =============================================================================
+// TYPES
+// =============================================================================
 
 interface PromptOptions {
 	task: string;
@@ -11,226 +64,526 @@ interface PromptOptions {
 	skipTests?: boolean;
 	skipLint?: boolean;
 	prdFile?: string;
+	progressFile?: string;
 }
 
-/**
- * Detect skill/playbook directories that can guide the agent.
- * We keep this engine-agnostic: OpenCode can load skills via `skill` tool,
- * other engines can still read these docs as repo guidance.
- */
-function detectAgentSkills(workDir: string): string[] {
-	const candidates = [
-		join(workDir, ".opencode", "skills"),
-		join(workDir, ".claude", "skills"),
-		join(workDir, ".github", "skills"),
-		join(workDir, ".skills"),
-	];
+interface ParallelPromptOptions extends PromptOptions {
+	allowCommit?: boolean;
+	planningAnalysis?: string;
+	planningSteps?: string[];
+	enableOrchestrator?: boolean;
+}
 
-	return candidates.filter((p) => existsSync(p));
+interface EnvironmentInfo {
+	language?: string;
+	framework?: string;
+	buildTool?: string;
+	testFramework?: string;
+	projectType?: string;
+	packageManager?: string;
 }
 
-/**
- * Build the full prompt with project context, rules, boundaries, and task
- */
-export function buildPrompt(options: PromptOptions): string {
-	const {
-		task,
-		autoCommit = true,
-		workDir = process.cwd(),
-		browserEnabled = "auto",
-		skipTests = false,
-		skipLint = false,
-		prdFile,
-	} = options;
+// =============================================================================
+// CACHE
+// =============================================================================
 
-	const parts: string[] = [];
+const envCache = new Map<string, EnvironmentInfo>();
 
-	// Add project context if available
-	const context = loadProjectContext(workDir);
-	if (context) {
-		parts.push(`## Project Context\n${context}`);
+// =============================================================================
+// ENVIRONMENT DETECTION
+// =============================================================================
+
+export function detectEnvironment(workDir: string): EnvironmentInfo {
+	const cached = envCache.get(workDir);
+	if (cached) return cached;
+
+	const result: EnvironmentInfo = {};
+
+	const packageJsonPath = join(workDir, "package.json");
+	if (existsSync(packageJsonPath)) {
+		try {
+			const pkg = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
+			Object.assign(result, extractEnvironmentInfo(pkg));
+		} catch (error) {
+			logDebug(`Failed to parse package.json: ${error}`);
+		}
 	}
 
-	// Add rules if available
-	const rules = loadRules(workDir);
-	const codeChangeRules = [
-		"Keep changes focused and minimal. Do not refactor unrelated code.",
-		"One logical change per commit. If a task is too large, break it into subtasks.",
-		"Write concise code. Avoid over-engineering.",
-		"Don't leave dead code. Delete unused code completely.",
-		"Quality over speed. Small steps compound into big progress.",
-		...rules,
-	];
-	if (codeChangeRules.length > 0) {
-		parts.push(
-			`## Rules (you MUST follow these)\n${codeChangeRules.map((r) => `- ${r}`).join("\n")}`,
-		);
+	if (existsSync(join(workDir, "pyproject.toml"))) {
+		result.language = "Python";
+		result.buildTool = "setuptools/poetry";
+		result.packageManager = "pip/poetry";
+	} else if (existsSync(join(workDir, "go.mod"))) {
+		result.language = "Go";
+		result.packageManager = "go mod";
+	} else if (existsSync(join(workDir, "Cargo.toml"))) {
+		result.language = "Rust";
+		result.packageManager = "cargo";
 	}
 
-	// Add boundaries - combine system boundaries with user-defined boundaries
-	// System boundaries come first to ensure they are prominently visible
-	const userBoundaries = loadBoundaries(workDir);
-	const systemBoundaries = [
-		prdFile || "the PRD file",
-		".ralphy/progress.txt",
-		".ralphy-worktrees",
-		".ralphy-sandboxes",
-	];
-	const allBoundaries = [...systemBoundaries, ...userBoundaries];
-	parts.push(
-		`## Boundaries\nDo NOT modify these files/directories:\n${allBoundaries.map((b) => `- ${b}`).join("\n")}`,
-	);
+	envCache.set(workDir, result);
+	return result;
+}
 
-	// Agent skills/playbooks (optional)
-	const skillRoots = detectAgentSkills(workDir);
-	if (skillRoots.length > 0) {
-		parts.push(
-			[
-				"## Agent Skills",
-				"This repo includes skill/playbook docs that describe preferred patterns, workflows, or tooling:",
-				...skillRoots.map((p) => `- ${p}`),
-				"",
-				"Before you start coding:",
-				"- Read and follow any relevant skill docs from the paths above.",
-				"- If your engine supports a `skill` tool (e.g. OpenCode), use it to load the relevant skills before implementing.",
-				"- If none apply, continue normally.",
-			].join("\n"),
+function extractEnvironmentInfo(pkg: {
+	dependencies?: Record<string, string>;
+	devDependencies?: Record<string, string>;
+	scripts?: Record<string, string>;
+	private?: boolean;
+	workspaces?: unknown;
+	bin?: unknown;
+	bun?: unknown;
+	packageManager?: string;
+}): Partial<EnvironmentInfo> {
+	const deps = { ...pkg.dependencies, ...pkg.devDependencies };
+	const scripts = pkg.scripts || {};
+
+	return {
+		language: detectLanguage(deps, scripts),
+		framework: detectFramework(pkg.dependencies || {}),
+		buildTool: detectBuildTool(scripts),
+		testFramework: detectTestFramework(deps, scripts),
+		projectType: detectProjectType(pkg),
+		packageManager: detectPackageManager(pkg),
+	};
+}
+
+function detectLanguage(
+	deps: Record<string, string>,
+	scripts: Record<string, string>,
+): string | undefined {
+	if (deps.typescript || deps["@types/node"] || deps["@types/react"]) {
+		return "TypeScript/JavaScript";
+	}
+	if (deps.react || deps.vue || deps.angular || deps.express || deps.fastify) {
+		return "TypeScript/JavaScript";
+	}
+	const scriptText = Object.values(scripts).join(" ").toLowerCase();
+	if (scriptText.includes("python") || scriptText.includes("pytest")) return "Python";
+	return undefined;
+}
+
+function detectFramework(deps: Record<string, string>): string | undefined {
+	if (deps.next) return "Next.js";
+	if (deps.nuxt) return "Nuxt.js";
+	if (deps["@remix-run/react"]) return "Remix";
+	if (deps["@astrojs/astro"]) return "Astro";
+	if (deps.react || deps["react-dom"]) return "React";
+	if (deps.vue) return "Vue.js";
+	if (deps.svelte) return "Svelte";
+	if (deps.angular) return "Angular";
+	if (deps.express) return "Express.js";
+	if (deps.fastify) return "Fastify";
+	return undefined;
+}
+
+function detectBuildTool(scripts: Record<string, string>): string | undefined {
+	const buildScript = scripts.build?.toLowerCase() || "";
+	if (scripts.vite || /\bvite\b/.test(buildScript)) return "Vite";
+	if (scripts.webpack || /\bwebpack\b/.test(buildScript)) return "Webpack";
+	if (scripts.rollup || /\brollup\b/.test(buildScript)) return "Rollup";
+	if (scripts.esbuild || /\besbuild\b/.test(buildScript)) return "esbuild";
+	if (/\bnext\b/.test(buildScript)) return "Next.js Build";
+	if (/\bnuxt\b/.test(buildScript)) return "Nuxt.js Build";
+	if (scripts.tsc || /\btsc\b/.test(buildScript)) return "TypeScript Compiler";
+	if (/\bbun\b/.test(buildScript)) return "Bun";
+	return undefined;
+}
+
+function detectTestFramework(
+	deps: Record<string, string>,
+	scripts: Record<string, string>,
+): string | undefined {
+	if (deps.vitest || scripts.test?.includes("vitest")) return "Vitest";
+	if (deps.jest || scripts.test?.includes("jest")) return "Jest";
+	if (deps.cypress) return "Cypress";
+	if (deps["@playwright/test"]) return "Playwright";
+	if (deps.pytest) return "Pytest";
+	return undefined;
+}
+
+function detectProjectType(pkg: {
+	private?: boolean;
+	workspaces?: unknown;
+	bin?: unknown;
+}): string | undefined {
+	if (pkg.private) return "Private Package";
+	if (pkg.workspaces) return "Monorepo";
+	if (pkg.bin) return "CLI Tool/Library";
+	return undefined;
+}
+
+function detectPackageManager(pkg: { bun?: unknown; packageManager?: string }): string {
+	if (pkg.bun) return "Bun";
+	if (pkg.packageManager?.startsWith("pnpm")) return "pnpm";
+	if (pkg.packageManager?.startsWith("yarn")) return "Yarn";
+	return "npm";
+}
+
+// =============================================================================
+// UTILITY FUNCTIONS
+// =============================================================================
+
+function detectSymlinks(workDir: string): string[] {
+	if (!existsSync(workDir)) return [];
+
+	let dirents: Dirent[];
+	try {
+		dirents = readdirSync(workDir, { withFileTypes: true }) as Dirent[];
+	} catch {
+		return [];
+	}
+
+	return dirents
+		.filter((d) => {
+			try {
+				return lstatSync(join(workDir, d.name as string)).isSymbolicLink();
+			} catch {
+				return false;
+			}
+		})
+		.map((d) => d.name as string);
+}
+
+function buildEnvironmentSection(workDir: string): string {
+	const env = detectEnvironment(workDir);
+	const lines: string[] = [];
+
+	const envFields = [
+		["Language", env.language],
+		["Framework", env.framework],
+		["Build Tool", env.buildTool],
+		["Test Framework", env.testFramework],
+		["Project Type", env.projectType],
+		["Package Manager", env.packageManager],
+	].filter(([, val]) => val) as [string, string][];
+
+	if (envFields.length > 0) {
+		lines.push("## Environment Detection", "");
+		for (const [label, value] of envFields) {
+			lines.push(`**${label}:** ${value}`);
+		}
+		lines.push(
+			"",
+			"Use this information to:",
+			"- Choose appropriate build/test commands based on detected framework",
+			"- Consider framework-specific patterns and best practices",
+			"- Understand project structure and conventions",
+			"",
 		);
 	}
 
-	// Add browser instructions if available
-	if (isBrowserAvailable(browserEnabled)) {
-		parts.push(getBrowserInstructions());
+	const symlinks = detectSymlinks(workDir);
+	if (symlinks.length > 0) {
+		lines.push(
+			"## Symlink Analysis",
+			"",
+			`**Detected ${symlinks.length} symlink(s):**`,
+			...symlinks.map((s) => `- ${s}`),
+			"",
+			"Note: Symlinks can affect file system operations and tool behavior.",
+			"",
+		);
 	}
 
-	// Add the task
-	parts.push(`## Task\n${task}`);
+	return lines.join("\n");
+}
 
-	// Add instructions
-	const instructions = ["1. Implement the task described above"];
+function buildSkillsSection(workDir: string): string {
+	const skillsCsv = getSkillsAsCsv(workDir);
+	if (skillsCsv) {
+		return `## Agent Skills
+This repo includes compressed skill/playbook documentation for token efficiency:
+${skillsCsv}
+
+Before you start coding:
+- Read and follow any relevant skill docs from compressed list above.
+- If your engine supports a \`skill\` tool (e.g. OpenCode), use it to load relevant skills before implementing.
+- If none apply, continue normally.`;
+	}
+
+	const skillRoots = SKILL_DIRECTORIES.map((dir) => join(workDir, dir)).filter(existsSync);
+	if (skillRoots.length > 0) {
+		return `## Agent Skills
+This repo includes skill/playbook docs that describe preferred patterns, workflows, or tooling:
+${skillRoots.map((p) => `- ${p}`).join("\n")}
+
+Before you start coding:
+- Read and follow any relevant skill docs from paths above.
+- If your engine supports a \`skill\` tool (e.g. OpenCode), use it to load relevant skills before implementing.
+- If none apply, continue normally.`;
+	}
+
+	return "";
+}
+
+function buildInstructions(options: {
+	skipTests: boolean;
+	skipLint: boolean;
+	autoCommit: boolean;
+	progressFile: string;
+}): string[] {
+	const { skipTests, skipLint, autoCommit, progressFile } = options;
+	const instructions: string[] = [];
+	let step = 1;
+
+	instructions.push(`${step++}. Implement the task described above`);
 
-	let step = 2;
 	if (!skipTests) {
-		instructions.push(`${step}. Write tests for the feature`);
-		step++;
-		instructions.push(`${step}. Run tests and ensure they pass before proceeding`);
-		step++;
+		instructions.push(`${step++}. Write tests for the feature`);
+		instructions.push(`${step++}. Run tests and ensure they pass before proceeding`);
 	}
 
 	if (!skipLint) {
-		instructions.push(`${step}. Run linting and ensure it passes`);
-		step++;
+		instructions.push(`${step++}. Run linting and ensure it passes`);
 	}
 
-	instructions.push(`${step}. Ensure the code works correctly`);
-	step++;
+	instructions.push(`${step++}. Update ${progressFile} with what you did`);
 
 	if (autoCommit) {
-		instructions.push(`${step}. Commit your changes with a descriptive message`);
+		instructions.push(`${step++}. Commit your changes with a descriptive message`);
+	} else {
+		instructions.push(`${step++}. Do NOT run git commit; changes will be collected automatically`);
 	}
 
-	parts.push(`## Instructions\n${instructions.join("\n")}`);
+	return instructions;
+}
 
-	return parts.join("\n\n");
+function buildProtectedPathsWarning(prdFile?: string, boundaries: string[] = []): string {
+	const systemPaths = [
+		`- ${prdFile || "the PRD file"}`,
+		...RALPHY_PROTECTED_PATHS.map((p) => `- ${p}`),
+	];
+	const userPaths = boundaries.map((b) => (b.startsWith("- ") ? b : `- ${b}`));
+	return [...systemPaths, ...userPaths].join("\n");
 }
 
-interface ParallelPromptOptions {
-	task: string;
-	progressFile: string;
-	prdFile?: string;
-	workDir?: string;
-	skipTests?: boolean;
-	skipLint?: boolean;
-	browserEnabled?: "auto" | "true" | "false";
-	allowCommit?: boolean;
+// =============================================================================
+// MAIN PROMPT BUILDERS
+// =============================================================================
+
+export function buildPrompt(options: PromptOptions): string {
+	const {
+		task,
+		autoCommit = true,
+		workDir = process.cwd(),
+		browserEnabled = "auto",
+		skipTests = false,
+		skipLint = false,
+		prdFile,
+		progressFile = "progress.txt",
+	} = options;
+
+	const instructions = buildInstructions({ skipTests, skipLint, autoCommit, progressFile });
+	const boundaries = loadBoundaries(workDir);
+	const sections = [
+		buildEnvironmentSection(workDir),
+		buildContextSection(workDir),
+		buildSkillsSection(workDir),
+		isBrowserAvailable(browserEnabled) ? getBrowserInstructions() : "",
+		`## Boundaries\nDo NOT modify these files/directories:\n${buildProtectedPathsWarning(prdFile, boundaries)}`,
+		`## Task\n${task}`,
+		`## Instructions\n${instructions.join("\n")}`,
+	].filter(Boolean);
+
+	return `You are working on a specific task. Focus ONLY on this task:
+
+TASK: ${task}
+
+${sections.join("\n\n")}
+
+Protected paths are listed in the Boundaries section.
+Do NOT Read, Glob, or Search inside .ralphy-sandboxes or .ralphy-worktrees.
+Do NOT mark tasks complete - that will be handled separately.
+Focus only on implementing: ${task}`;
 }
 
-/**
- * Build a prompt for parallel agent execution
- */
-export function buildParallelPrompt(options: ParallelPromptOptions): string {
+function buildContextSection(workDir: string): string {
+	const context = loadProjectContext(workDir);
+	const rules = loadRules(workDir);
+
+	const sections: string[] = [];
+	if (context) sections.push(`## Project Context\n${context}`);
+
+	// Always include rules section with default rules
+	const allRules = [...DEFAULT_RULES, ...rules];
+	sections.push(`## Rules (you MUST follow these)\n${allRules.join("\n")}`);
+
+	// Boundaries are included in the protected paths warning section.
+
+	return sections.join("\n\n");
+}
+
+export function buildExecutionPrompt(options: ParallelPromptOptions): string {
 	const {
 		task,
 		progressFile,
 		prdFile,
-		workDir = process.cwd(),
 		skipTests = false,
 		skipLint = false,
 		browserEnabled = "auto",
 		allowCommit = true,
+		planningAnalysis,
+		planningSteps,
+		enableOrchestrator,
+		workDir = process.cwd(),
 	} = options;
+	const instructions = buildInstructions({
+		skipTests,
+		skipLint,
+		autoCommit: allowCommit,
+		progressFile: progressFile || ".progress.json",
+	});
 
-	// Parallel execution typically runs in a worktree
-	const skillRoots = detectAgentSkills(workDir);
-	const skillsSection =
-		skillRoots.length > 0
-			? `\n\nAgent Skills:\nThis repo includes skill/playbook docs:\n${skillRoots
-					.map((p) => `- ${p}`)
-					.join(
-						"\n",
-					)}\nBefore coding, read relevant skills. If your engine supports a \`skill\` tool, load them before implementing.`
-			: "";
-
-	const browserSection = isBrowserAvailable(browserEnabled)
-		? `\n\n${getBrowserInstructions()}`
-		: "";
-
-	// Load rules from config
+	const context = loadProjectContext(workDir);
 	const rules = loadRules(workDir);
-	const codeChangeRules = [
-		"Keep changes focused and minimal. Do not refactor unrelated code.",
-		"One logical change per commit. If a task is too large, break it into subtasks.",
-		"Write concise code. Avoid over-engineering.",
-		"Don't leave dead code. Delete unused code completely.",
-		"Quality over speed. Small steps compound into big progress.",
-		...rules,
-	];
-	const rulesSection =
-		codeChangeRules.length > 0
-			? `\n\nRules (you MUST follow these):\n${codeChangeRules.map((r) => `- ${r}`).join("\n")}`
-			: "";
-
-	// Build boundaries section - combine system boundaries with user-defined boundaries
-	// System boundaries come first to ensure they are prominently visible
-	const userBoundaries = loadBoundaries(workDir);
+	const boundaries = loadBoundaries(workDir);
+
+	// Build sections in the order tests expect
+	const sections: string[] = [];
+
+	// Task at the top
+	sections.push(`TASK: ${task}`);
+
+	// Environment section
+	const envSection = buildEnvironmentSection(workDir);
+	if (envSection) sections.push(envSection);
+
+	// Context section
+	if (context) sections.push(`## Project Context\n${context}`);
+
+	// Rules section with specific format for tests
+	const allRules = [...DEFAULT_RULES, ...rules];
+	sections.push(`Rules (you MUST follow these):\n${allRules.join("\n")}`);
+
+	// Boundaries section with specific format for tests - system first, then user
 	const systemBoundaries = [
-		prdFile || "the PRD file",
-		".ralphy/progress.txt",
-		".ralphy-worktrees",
-		".ralphy-sandboxes",
+		`- ${prdFile || "the PRD file"}`,
+		"- .ralphy/progress.txt",
+		"- .ralphy-worktrees",
+		"- .ralphy-sandboxes",
 	];
+	const userBoundaries = boundaries.map((b) => (b.startsWith("- ") ? b : `- ${b}`));
 	const allBoundaries = [...systemBoundaries, ...userBoundaries];
-	const boundariesSection = `\n\nBoundaries - Do NOT modify:\n${allBoundaries.map((b) => `- ${b}`).join("\n")}\n\nDo NOT mark tasks complete - that will be handled separately.`;
-
-	const instructions = ["1. Implement this specific task completely"];
+	sections.push(`Boundaries - Do NOT modify:\n${allBoundaries.join("\n")}`);
 
-	let step = 2;
-	if (!skipTests) {
-		instructions.push(`${step}. Write tests for the feature`);
-		step++;
-		instructions.push(`${step}. Run tests and ensure they pass before proceeding`);
-		step++;
+	// Planning section if provided
+	if (planningAnalysis && planningSteps) {
+		sections.push(buildPlanningSection(planningAnalysis, planningSteps));
 	}
 
-	if (!skipLint) {
-		instructions.push(`${step}. Run linting and ensure it passes`);
-		step++;
+	// Skills section
+	const skillsSection = buildSkillsSection(workDir);
+	if (skillsSection) sections.push(skillsSection);
+
+	// Browser instructions
+	if (isBrowserAvailable(browserEnabled)) {
+		sections.push(getBrowserInstructions());
 	}
 
-	instructions.push(`${step}. Update ${progressFile} with what you did`);
-	step++;
-	if (allowCommit) {
-		instructions.push(`${step}. Commit your changes with a descriptive message`);
-	} else {
-		instructions.push(`${step}. Do NOT run git commit; changes will be collected automatically`);
+	// Instructions section with specific format for tests
+	const instructionLines = instructions.map((line) =>
+		line.replace("Implement the task described above", "Implement this specific task completely"),
+	);
+	sections.push(`Instructions:\n${instructionLines.join("\n")}`);
+
+	// Orchestrator section if enabled
+	if (enableOrchestrator) {
+		sections.push(buildOrchestratorSection());
 	}
 
 	return `You are working on a specific task. Focus ONLY on this task:
 
-TASK: ${task}${rulesSection}${boundariesSection}${browserSection}${skillsSection}
-
-Instructions:
-${instructions.join("\n")}
+${sections.join("\n\n")}
 
+Do NOT mark tasks complete - that will be handled separately.
 Focus only on implementing: ${task}`;
 }
+
+function buildPlanningSection(analysis: string, steps: string[]): string {
+	return `## Planning Analysis (Completed Earlier)
+${analysis}
+
+## Planned Implementation Steps
+${steps.map((s, i) => `${i + 1}. ${s}`).join("\n")}
+
+Follow these steps. If they don't apply to the current situation, explain why and propose an alternative approach.`;
+}
+
+function buildOrchestratorSection(): string {
+	return `## Test Delegation (Orchestrator Mode Enabled)
+
+You have access to a specialized test model. When you need tests run, use these markers:
+
+### Quick Test Request
+Use [RUN_TESTS] or [RUN_TESTS:command] to request tests:
+- \`[RUN_TESTS]\` - Run default test command
+- \`[RUN_TESTS:npm test]\` - Run specific command
+
+### Detailed Test Request
+For complex testing scenarios, use:
+\`\`\`
+[TEST_REQUEST]
+command: npm test -- --grep "feature name"
+files: src/feature.ts, tests/feature.test.ts
+context: Brief context about what to test
+[/TEST_REQUEST]
+\`\`\`
+
+### Completion
+When done, signal completion with:
+\`\`\`
+[TEST_COMPLETE]
+Your final summary here
+[/TEST_COMPLETE]
+\`\`\`
+
+The test model will analyze results and return them to you. You can iterate: implement → request tests → review results → fix → request tests again.`;
+}
+
+export function buildPlanningPrompt(
+	task: Task,
+	fullTasksContext?: string,
+	relevantFiles?: string[],
+): string {
+	const relevantFilesSection = relevantFiles?.length
+		? `\nRELEVANT FILES (prioritize these in your analysis):\n${relevantFiles
+				.slice(0, 30)
+				.map((f) => `- ${f}`)
+				.join("\n")}\n`
+		: "";
+
+	return `You are a senior engineering planner. Your job is to create a comprehensive plan for this task.
+
+TASK: ${task.title || task.id}
+${task.description ? `DESCRIPTION: ${task.description}` : ""}
+${task.dependencies?.length ? `DEPENDENCIES: ${task.dependencies.join(", ")}` : ""}
+${relevantFilesSection}
+
+${fullTasksContext ? `FULL PROJECT TASKS CONTEXT:\n${fullTasksContext}\n\n` : ""}
+
+First, analyze this task thoroughly and provide structured output in this format:
+
+${PLANNING_SECTIONS.join("\n")}
+
+IMPORTANT INSTRUCTIONS FOR PLANNING PHASE:
+1. You may use read/glob/grep tools to EXPLORE the codebase and understand the task
+2. DO NOT write, edit, create, or modify any files during planning
+3. DO NOT execute any implementation - this is a planning-only phase
+4. After exploring, return the structured plan above in your final response
+5. Your entire response must contain the <ANALYSIS>, <PLAN>, <FILES>, and <OPTIMIZATION> tags
+6. Return ONLY the planning analysis, not partial results from tool exploration
+
+Think step by step, explaining your reasoning clearly. Use tools to explore the codebase before finalizing your plan.`;
+}
+
+// Backward compatibility
+export function buildParallelPrompt(options: ParallelPromptOptions): string {
+	const { planningAnalysis, planningSteps, ...rest } = options;
+
+	if (planningAnalysis && planningSteps) {
+		return buildExecutionPrompt({ ...rest, planningAnalysis, planningSteps });
+	}
+
+	return buildExecutionPrompt(rest);
+}
diff --git a/cli/src/execution/retry.ts b/cli/src/execution/retry.ts
index 9eb4f293..31272c06 100644
--- a/cli/src/execution/retry.ts
+++ b/cli/src/execution/retry.ts
@@ -1,15 +1,177 @@
-import { logDebug, logWarn } from "../ui/logger.ts";
+import { logDebug, logError, logWarn } from "../ui/logger.ts";
+import { isRetryableError, standardizeError } from "../utils/errors.ts";
 
 interface RetryOptions {
 	maxRetries: number;
-	retryDelay: number; // base delay in seconds
-	onRetry?: (attempt: number, error?: string, nextDelayMs?: number) => void;
-	/** Use exponential backoff (default: true) */
+	retryDelay: number; // in seconds
+	onRetry?: (attempt: number, error: string, delayMs: number) => void;
+	/** Enable exponential backoff for connection errors */
 	exponentialBackoff?: boolean;
 	/** Maximum delay in seconds (default: 60) */
 	maxDelay?: number;
 	/** Add random jitter to delay (default: true) */
 	jitter?: boolean;
+	/** Optional task ID for tracking connection state */
+	taskId?: string;
+	/** Optional circuit-breaker instance to isolate retry state */
+	connectionManager?: ConnectionStateManager;
+}
+
+/**
+ * Circuit breaker states
+ */
+type CircuitState = "CLOSED" | "OPEN" | "HALF_OPEN";
+
+interface CircuitBreakerConfig {
+	/** Number of failures before opening the circuit */
+	failureThreshold: number;
+	/** Time in ms before attempting to close the circuit */
+	resetTimeoutMs: number;
+	/** Half-open max attempts to test if service recovered */
+	halfOpenMaxAttempts: number;
+}
+
+/**
+ * Connection state manager to track global connection health
+ * Prevents infinite retries when connection is consistently failing
+ */
+class ConnectionStateManager {
+	private static instance: ConnectionStateManager;
+	private circuitState: CircuitState = "CLOSED";
+	private consecutiveFailures = 0;
+	private lastFailureTime: number | null = null;
+
+	private halfOpenAttempts = 0;
+
+	private readonly config: CircuitBreakerConfig = {
+		failureThreshold: 3, // Open after 3 consecutive failures
+		resetTimeoutMs: 30000, // Wait 30s before trying again
+		halfOpenMaxAttempts: 2, // Try 2 times in half-open state
+	};
+
+	static getInstance(): ConnectionStateManager {
+		if (!ConnectionStateManager.instance) {
+			ConnectionStateManager.instance = new ConnectionStateManager();
+		}
+		return ConnectionStateManager.instance;
+	}
+
+	/**
+	 * Check if we should attempt a request (circuit allows it)
+	 */
+	canAttempt(): { allowed: boolean; reason?: string } {
+		const now = Date.now();
+
+		switch (this.circuitState) {
+			case "CLOSED":
+				return { allowed: true };
+
+			case "OPEN": {
+				// Check if we should transition to half-open
+				if (this.lastFailureTime && now - this.lastFailureTime > this.config.resetTimeoutMs) {
+					this.circuitState = "HALF_OPEN";
+					this.halfOpenAttempts = 0;
+					logWarn("Circuit breaker entering HALF_OPEN state - testing connection...");
+					return { allowed: true };
+				}
+				const remainingMs = this.config.resetTimeoutMs - (now - (this.lastFailureTime || 0));
+				return {
+					allowed: false,
+					reason: `Connection circuit OPEN - too many failures. Waiting ${Math.ceil(remainingMs / 1000)}s before retry...`,
+				};
+			}
+
+			case "HALF_OPEN":
+				if (this.halfOpenAttempts >= this.config.halfOpenMaxAttempts) {
+					// BUG FIX: Too many attempts in half-open, go back to open and BLOCK the request
+					this.circuitState = "OPEN";
+					this.lastFailureTime = now;
+					return {
+						allowed: false,
+						reason: `Connection circuit OPEN - service still unavailable after ${this.config.halfOpenMaxAttempts} test attempts`,
+					};
+				}
+				this.halfOpenAttempts++;
+				return { allowed: true };
+		}
+	}
+
+	/**
+	 * Record a successful request
+	 */
+	recordSuccess(): void {
+		if (this.circuitState === "HALF_OPEN") {
+			// Success in half-open closes the circuit
+			this.circuitState = "CLOSED";
+			this.consecutiveFailures = 0;
+			this.halfOpenAttempts = 0;
+			logWarn("Circuit breaker CLOSED - connection restored");
+		} else {
+			this.consecutiveFailures = 0;
+		}
+	}
+
+	/**
+	 * Record a failed request
+	 */
+	recordFailure(error: Error): void {
+		const isConnectionError = this.isConnectionRelatedError(error);
+
+		if (!isConnectionError) {
+			// Non-connection errors don't affect circuit breaker
+			return;
+		}
+
+		this.consecutiveFailures++;
+		this.lastFailureTime = Date.now();
+
+		if (this.circuitState === "HALF_OPEN") {
+			// Failure in half-open goes back to open
+			this.circuitState = "OPEN";
+			logWarn(
+				`Circuit breaker OPEN - connection failed in half-open state (failure ${this.consecutiveFailures})`,
+			);
+		} else if (this.consecutiveFailures >= this.config.failureThreshold) {
+			this.circuitState = "OPEN";
+			logError(
+				`Circuit breaker OPEN - ${this.consecutiveFailures} consecutive connection failures. Stopping retries for ${this.config.resetTimeoutMs / 1000}s`,
+			);
+		}
+	}
+
+	/**
+	 * Check if error is connection-related
+	 */
+	private isConnectionRelatedError(error: Error): boolean {
+		return (
+			isRetryableError(error) &&
+			/connection|network|timeout|unable to connect|internet connection|econnrefused|econnreset|socket hang up|dns|ENOTFOUND/i.test(
+				error.message,
+			)
+		);
+	}
+
+	/**
+	 * Get current circuit state for debugging
+	 */
+	getState(): { state: CircuitState; consecutiveFailures: number; lastFailureTime: number | null } {
+		return {
+			state: this.circuitState,
+			consecutiveFailures: this.consecutiveFailures,
+			lastFailureTime: this.lastFailureTime,
+		};
+	}
+
+	/**
+	 * Force reset the circuit (for manual recovery)
+	 */
+	reset(): void {
+		this.circuitState = "CLOSED";
+		this.consecutiveFailures = 0;
+		this.halfOpenAttempts = 0;
+		this.lastFailureTime = null;
+		logWarn("Circuit breaker manually reset to CLOSED");
+	}
 }
 
 /**
@@ -20,42 +182,82 @@ export function sleep(ms: number): Promise<void> {
 }
 
 /**
- * Calculate delay with exponential backoff and optional jitter
- *
- * @param attempt - Current attempt number (1-based)
- * @param baseDelayMs - Base delay in milliseconds
- * @param maxDelayMs - Maximum delay cap in milliseconds
- * @param useJitter - Add random jitter (0-25% of delay)
+ * Global circuit breaker instance
+ */
+export const circuitBreaker = ConnectionStateManager.getInstance();
+
+/**
+ * Check if connection is healthy enough to attempt requests
  */
-export function calculateBackoffDelay(
+export function canMakeConnectionAttempt(): { allowed: boolean; reason?: string } {
+	return circuitBreaker.canAttempt();
+}
+
+/**
+ * Reset connection circuit breaker (for manual recovery)
+ */
+export function resetConnectionCircuit(): void {
+	circuitBreaker.reset();
+}
+
+/**
+ * Get current connection health status
+ */
+export function getConnectionHealth(): {
+	state: CircuitState;
+	consecutiveFailures: number;
+	lastFailureTime: number | null;
+} {
+	return circuitBreaker.getState();
+}
+
+/**
+ * Calculate delay with exponential backoff for connection errors
+ */
+function calculateDelay(
+	baseDelaySeconds: number,
 	attempt: number,
-	baseDelayMs: number,
-	maxDelayMs: number,
+	error: Error,
+	exponentialBackoff: boolean,
+	maxDelaySeconds: number,
 	useJitter: boolean,
 ): number {
-	// Exponential backoff: baseDelay * 2^(attempt-1)
-	let delay = baseDelayMs * Math.pow(2, attempt - 1);
+	const maxDelayMs = maxDelaySeconds * 1000;
+	const baseDelayMs = baseDelaySeconds * 1000;
+
+	if (!exponentialBackoff) {
+		const delay = Math.min(baseDelayMs, maxDelayMs);
+		if (!useJitter) return delay;
+		const jitter = Math.floor(delay * 0.25 * Math.random());
+		return Math.min(delay + jitter, maxDelayMs);
+	}
 
-	// Cap at maximum delay
-	delay = Math.min(delay, maxDelayMs);
+	// Check if this is a connection/network error
+	const isConnectionError =
+		isRetryableError(error) &&
+		/connection|network|timeout|unable to connect|internet connection|econnrefused|econnreset|socket hang up/i.test(
+			error.message,
+		);
 
-	// Add jitter (0-25% of delay) to prevent thundering herd
-	if (useJitter) {
-		const jitter = delay * 0.25 * Math.random();
-		delay += jitter;
+	if (isConnectionError) {
+		// Exponential backoff based on configured base delay.
+		let delayMs = Math.min(baseDelayMs * 2 ** (attempt - 1), maxDelayMs);
+		if (useJitter) {
+			delayMs = Math.min(delayMs + Math.floor(delayMs * 0.25 * Math.random()), maxDelayMs);
+		}
+		logDebug(`Connection error detected, using exponential backoff: ${delayMs}ms`);
+		return delayMs;
 	}
 
-	return Math.floor(delay);
+	let delay = Math.min(baseDelayMs, maxDelayMs);
+	if (useJitter) {
+		delay = Math.min(delay + Math.floor(delay * 0.25 * Math.random()), maxDelayMs);
+	}
+	return delay;
 }
 
 /**
- * Execute a function with retry logic and exponential backoff
- *
- * Features:
- * - Exponential backoff (2^attempt * baseDelay)
- * - Optional jitter to prevent thundering herd
- * - Configurable maximum delay cap
- * - Progress callbacks with next delay info
+ * Execute a function with retry logic and circuit breaker
  */
 export async function withRetry<T>(fn: () => Promise<T>, options: RetryOptions): Promise<T> {
 	const {
@@ -65,32 +267,68 @@ export async function withRetry<T>(fn: () => Promise<T>, options: RetryOptions):
 		exponentialBackoff = true,
 		maxDelay = 60,
 		jitter = true,
+		taskId,
+		connectionManager,
 	} = options;
-
-	const baseDelayMs = retryDelay * 1000;
-	const maxDelayMs = maxDelay * 1000;
 	let lastError: Error | null = null;
+	const breaker = connectionManager || circuitBreaker;
+
+	// Check circuit breaker before attempting
+	const circuitCheck = breaker.canAttempt();
+	if (!circuitCheck.allowed) {
+		logError(`Circuit breaker preventing retry: ${circuitCheck.reason}`);
+		throw new Error(circuitCheck.reason || "Connection circuit open - too many failures");
+	}
 
 	for (let attempt = 1; attempt <= maxRetries; attempt++) {
 		try {
-			return await fn();
+			const result = await fn();
+			// Success - record it to close circuit if in half-open
+			breaker.recordSuccess();
+			return result;
 		} catch (error) {
-			lastError = error instanceof Error ? error : new Error(String(error));
+			lastError = standardizeError(error);
+
+			// Record failure for circuit breaker tracking
+			if (!lastError) {
+				continue;
+			}
+			breaker.recordFailure(lastError);
 
 			if (attempt < maxRetries) {
 				const errorMsg = lastError.message;
 
-				// Calculate delay with exponential backoff
-				const delayMs = exponentialBackoff
-					? calculateBackoffDelay(attempt, baseDelayMs, maxDelayMs, jitter)
-					: baseDelayMs;
+				// Check if circuit is now open
+				const currentState = breaker.canAttempt();
+				if (!currentState.allowed) {
+					logError(`Connection circuit opened after ${attempt} attempts: ${currentState.reason}`);
+					// Don't throw immediately - finish current retry loop but warn user
+					if (taskId) {
+						logWarn(`Task ${taskId} will be paused due to connection issues`);
+					}
+				}
+
+				const delayMs = calculateDelay(
+					retryDelay,
+					attempt,
+					lastError,
+					exponentialBackoff,
+					maxDelay,
+					jitter,
+				);
 
-				const delaySecs = (delayMs / 1000).toFixed(1);
-				logWarn(`Attempt ${attempt}/${maxRetries} failed: ${errorMsg}`);
+				logWarn(
+					`Attempt ${attempt}/${maxRetries} failed: ${errorMsg}. Retrying in ${delayMs}ms...`,
+				);
 				onRetry?.(attempt, errorMsg, delayMs);
 
-				logDebug(`Waiting ${delaySecs}s before retry (exponential backoff)...`);
 				await sleep(delayMs);
+
+				// Re-check circuit state before next attempt
+				const recheck = breaker.canAttempt();
+				if (!recheck.allowed) {
+					throw new Error(recheck.reason || "Connection circuit open - stopping retries");
+				}
 			}
 		}
 	}
@@ -99,32 +337,94 @@ export async function withRetry<T>(fn: () => Promise<T>, options: RetryOptions):
 }
 
 /**
- * Check if an error is retryable (e.g., rate limit, network error)
+ * Connection fallback options for graceful degradation
  */
-export function isRetryableError(error: string): boolean {
-	const retryablePatterns = [
-		/rate limit/i,
-		/rate_limit/i,
-		/hit your limit/i,
-		/quota/i,
-		/too many requests/i,
-		/429/,
-		/timeout/i,
-		/network/i,
-		/connection/i,
-		/ECONNRESET/,
-		/ETIMEDOUT/,
-		/ENOTFOUND/,
-		/overloaded/i,
-	];
+export interface ConnectionFallbackOptions {
+	/** Save task state when connection fails */
+	saveState?: () => Promise<void>;
+	/** Skip current task and continue with next */
+	skipTask?: () => void;
+	/** Pause execution and wait for manual intervention */
+	pauseExecution?: () => void;
+}
+
+/**
+ * Handle connection failure with graceful degradation
+ * This is called when all retries are exhausted due to connection issues
+ */
+export async function handleConnectionFailure(
+	taskId: string,
+	error: Error,
+	options?: ConnectionFallbackOptions,
+): Promise<{ action: "retry" | "skip" | "pause" | "abort"; message: string }> {
+	const state = circuitBreaker.getState();
 
-	return retryablePatterns.some((pattern) => pattern.test(error));
+	logError(`Connection failure for task ${taskId}: ${error.message}`);
+	logError(`Circuit state: ${state.state}, Failures: ${state.consecutiveFailures}`);
+
+	// If circuit is open, we should not retry immediately
+	if (state.state === "OPEN") {
+		const message = `Connection lost. Circuit breaker OPEN. ${state.consecutiveFailures} consecutive failures.\nWaiting ${30000 / 1000}s before next attempt.\nYou can:\n1. Wait for automatic retry\n2. Press Ctrl+C to stop and resume later\n3. Check your internet connection`;
+
+		logWarn(message);
+
+		// Try to save state if provided
+		if (options?.saveState) {
+			try {
+				await options.saveState();
+				logWarn("Task state saved for later resumption");
+			} catch (saveError) {
+				logError(`Failed to save task state: ${saveError}`);
+			}
+		}
+
+		return { action: "pause", message };
+	}
+
+	// For other cases, return the error
+	return {
+		action: "abort",
+		message: `Connection failure after maximum retries: ${error.message}`,
+	};
 }
 
+/**
+ * Wait for connection to be restored with timeout
+ */
+export async function waitForConnectionRestore(timeoutMs = 300000): Promise<boolean> {
+	const checkInterval = 5000; // Check every 5 seconds
+	const startTime = Date.now();
+
+	logWarn("Waiting for connection to be restored...");
+
+	while (Date.now() - startTime < timeoutMs) {
+		const state = circuitBreaker.canAttempt();
+
+		if (state.allowed) {
+			logWarn("Connection restored - resuming execution");
+			return true;
+		}
+
+		const elapsed = Math.floor((Date.now() - startTime) / 1000);
+		const remaining = Math.floor((timeoutMs - (Date.now() - startTime)) / 1000);
+		logWarn(
+			`Still waiting for connection... (${elapsed}s elapsed, ${remaining}s timeout remaining)`,
+		);
+
+		await sleep(checkInterval);
+	}
+
+	logError("Connection restore timeout reached");
+	return false;
+}
+
+/**
+ * Re-export isRetryableError from utils/errors.ts for backward compatibility
+ */
+export { isRetryableError } from "../utils/errors.ts";
+
 /**
  * Check if an error is fatal and should abort all remaining tasks.
- * Fatal errors indicate a configuration or authentication problem that
- * will affect all subsequent tasks.
  */
 export function isFatalError(error: string): boolean {
 	const fatalPatterns = [
@@ -138,7 +438,7 @@ export function isFatalError(error: string): boolean {
 		/\b403\b/i,
 		/command not found/i,
 		/not installed/i,
-		/is not recognized/i, // Windows "command not recognized"
+		/is not recognized/i,
 	];
 
 	return fatalPatterns.some((pattern) => pattern.test(error));
diff --git a/cli/src/execution/sandbox-git.ts b/cli/src/execution/sandbox-git.ts
index 45ceb1e9..b4ee4a85 100644
--- a/cli/src/execution/sandbox-git.ts
+++ b/cli/src/execution/sandbox-git.ts
@@ -12,7 +12,7 @@ class GitMutex {
 	private queue: Promise<void> = Promise.resolve();
 
 	async acquire<T>(fn: () => Promise<T>): Promise<T> {
-		let release: () => void;
+		let release: (() => void) | undefined;
 		const next = new Promise<void>((resolve) => {
 			release = resolve;
 		});
@@ -22,7 +22,7 @@ class GitMutex {
 		try {
 			return await fn();
 		} finally {
-			release!();
+			release?.();
 		}
 	}
 }
diff --git a/cli/src/execution/sandbox.ts b/cli/src/execution/sandbox.ts
index d27e76c9..00a9252e 100644
--- a/cli/src/execution/sandbox.ts
+++ b/cli/src/execution/sandbox.ts
@@ -32,21 +32,27 @@ export async function rmRF(path: string): Promise<void> {
 			// Using force: true and recursive: true is standard
 			rmSync(path, { recursive: true, force: true });
 			return;
-		} catch (err: any) {
-			const isLockError = err.code === "EBUSY" || err.code === "EPERM" || err.code === "ENOTEMPTY";
+		} catch (err: unknown) {
+			const errorCode =
+				typeof err === "object" && err !== null && "code" in err
+					? String((err as { code?: string }).code)
+					: "";
+			const isLockError =
+				errorCode === "EBUSY" || errorCode === "EPERM" || errorCode === "ENOTEMPTY";
 
 			if (isLockError && i < retries - 1) {
 				// Wait with exponential backoff: 500, 1000, 2000, 4000...
-				const delay = 500 * Math.pow(2, i);
+				const delay = 500 * 2 ** i;
 				await new Promise((resolve) => setTimeout(resolve, delay));
 				continue;
 			}
 
 			// On final failure for lock errors, log warning and swallow.
 			// For non-lock errors (any time), throw immediately.
+			const errorMessage = err instanceof Error ? err.message : String(err);
 			if (isLockError && i === retries - 1) {
 				logWarn(
-					`Failed to clean up ${path} after ${retries} attempts: ${err.message}. This may be due to a file lock. Proceeding anyway.`,
+					`Failed to clean up ${path} after ${retries} attempts: ${errorMessage}. This may be due to a file lock. Proceeding anyway.`,
 				);
 			} else {
 				throw err;
@@ -252,7 +258,6 @@ export function verifySandboxIsolation(sandboxDir: string, symlinkDirs: string[]
 				const stat = lstatSync(sandboxPath);
 				if (stat.isSymbolicLink()) {
 					// Good - it's a symlink
-					continue;
 				}
 			} catch {
 				// Error checking - assume not isolated
diff --git a/cli/src/execution/sequential.ts b/cli/src/execution/sequential.ts
index 813bc859..7d95205c 100644
--- a/cli/src/execution/sequential.ts
+++ b/cli/src/execution/sequential.ts
@@ -7,9 +7,11 @@ import type { Task, TaskSource } from "../tasks/types.ts";
 import { logDebug, logError, logInfo, logSuccess, logWarn } from "../ui/logger.ts";
 import { notifyTaskComplete, notifyTaskFailed } from "../ui/notify.ts";
 import { ProgressSpinner } from "../ui/spinner.ts";
+import { standardizeError } from "../utils/errors.ts";
 import { clearDeferredTask, recordDeferredTask } from "./deferred.ts";
 import { buildPrompt } from "./prompt.ts";
-import { isFatalError, isRetryableError, sleep, withRetry } from "./retry.ts";
+import { isFatalError, isRetryableError, withRetry } from "./retry.ts";
+import { type StateFormat, TaskState, TaskStateManager, detectStateFormat } from "./task-state.ts";
 
 export interface ExecutionOptions {
 	engine: AIEngine;
@@ -34,11 +36,31 @@ export interface ExecutionOptions {
 	modelOverride?: string;
 	/** Skip automatic branch merging after parallel execution */
 	skipMerge?: boolean;
+	/** Additional environment variables for the engine CLI */
+	env?: Record<string, string>;
 	/** Use lightweight sandboxes instead of git worktrees for parallel execution */
 	useSandbox?: boolean;
 	/** Additional arguments to pass to the engine CLI */
 	engineArgs?: string[];
-	/** GitHub issue number to sync PRD with on each iteration */
+	/** Separate model for planning phase (cheaper/faster) */
+	planningModel?: string;
+	/** Separate model for test-related tasks (cheaper/faster) */
+	testModel?: string;
+	/** Force non-git parallel execution (sandboxes) even in git repos */
+	noGitParallel?: boolean;
+	/** Log AI thoughts/reasoning to console */
+	logThoughts?: boolean;
+	/** Enable full debug logging (cli errors, full ai responses) */
+	debug?: boolean;
+	/** Enable comprehensive OpenCode debugging */
+	debugOpenCode?: boolean;
+	/** Allow OpenCode to access sandbox directories without permission prompts */
+	allowOpenCodeSandboxAccess?: boolean;
+	/** Progress callback for progress reporting */
+	onProgress?: (step: string) => void;
+	/** Task state manager for centralized state tracking */
+	taskStateManager?: TaskStateManager;
+	/** Optional GitHub issue number to sync progress to */
 	syncIssue?: number;
 }
 
@@ -72,7 +94,7 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 		activeSettings,
 		modelOverride,
 		engineArgs,
-		syncIssue,
+		taskStateManager: externalTaskStateManager,
 	} = options;
 
 	const result: ExecutionResult = {
@@ -82,25 +104,95 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 		totalOutputTokens: 0,
 	};
 
+	if (dryRun && !options.debugOpenCode) {
+		const dryRunTasks = await taskSource.getAllTasks();
+		for (const task of dryRunTasks) {
+			const spinner = new ProgressSpinner(task.title, activeSettings);
+			spinner.success("(dry run) Skipped");
+			result.tasksCompleted++;
+		}
+		return result;
+	}
+
+	// Initialize task state manager if not provided
+	let taskStateManager: TaskStateManager;
+	if (externalTaskStateManager) {
+		taskStateManager = externalTaskStateManager;
+	} else {
+		const format: StateFormat = detectStateFormat(options.prdFile);
+
+		taskStateManager = new TaskStateManager(
+			workDir,
+			taskSource.type,
+			options.prdFile || "tasks.yaml",
+			format,
+		);
+
+		// Get all tasks and initialize state manager
+		const allTasks = await taskSource.getAllTasks();
+		await taskStateManager.initialize(allTasks);
+	}
+
 	let iteration = 0;
 	let abortDueToRetryableFailure = false;
+	let taskIndex = new Map<string, Task>();
+	// BUG FIX: Safety counter to prevent infinite loops
+	let safetyCounter = 0;
+	const MAX_SAFETY_ITERATIONS = 10000;
 
 	while (true) {
+		// Safety check to prevent infinite loops
+		if (safetyCounter++ > MAX_SAFETY_ITERATIONS) {
+			throw new Error("Safety limit exceeded - possible infinite loop in sequential execution");
+		}
 		// Check iteration limit
 		if (maxIterations > 0 && iteration >= maxIterations) {
 			logInfo(`Reached max iterations (${maxIterations})`);
 			break;
 		}
 
-		// Get next task
-		const task = await taskSource.getNextTask();
-		if (!task) {
+		// Get next pending task from state manager
+		const pendingTask = taskStateManager.getNextPendingTask();
+		if (!pendingTask) {
 			logSuccess("All tasks completed!");
 			break;
 		}
 
+		// Find the full task in the source
+		let task = taskIndex.get(pendingTask.id);
+		if (!task) {
+			for (const refreshedTask of await taskSource.getAllTasks()) {
+				taskIndex.set(refreshedTask.id, refreshedTask);
+			}
+			task = taskIndex.get(pendingTask.id);
+		}
+		if (!task) {
+			logError(`Task ${pendingTask.id} not found in source`);
+			await taskStateManager.transitionState(pendingTask.id, TaskState.SKIPPED);
+			continue;
+		}
+
+		// BUG FIX: Check max attempts and claim atomically in claimTaskForExecution
+		// to prevent race condition where state could change between check and claim
+		const claimed = await taskStateManager.claimTaskForExecution(task.id);
+		if (!claimed) {
+			// Task could be: already running, completed, or exceeded max attempts
+			// Check if it was max attempts
+			if (taskStateManager.hasExceededMaxAttempts(task.id, maxRetries)) {
+				logWarn(`Task "${task.title}" has exceeded max attempts (${maxRetries}), skipping...`);
+				await taskStateManager.transitionState(task.id, TaskState.SKIPPED);
+				await taskSource.markComplete(task.id);
+				result.tasksFailed++;
+				notifyTaskFailed(task.title, "Exceeded maximum retry attempts");
+				clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
+			} else {
+				logDebug(`Task "${task.title}" is already being executed, skipping...`);
+			}
+			continue;
+		}
+
 		iteration++;
-		const remaining = await taskSource.countRemaining();
+		const remaining = taskStateManager.countPending();
 		logInfo(`Task ${iteration}: ${task.title} (${remaining} remaining)`);
 
 		// Create branch if needed
@@ -129,7 +221,7 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 		const spinner = new ProgressSpinner(task.title, activeSettings);
 		let aiResult: AIResult | null = null;
 
-		if (dryRun) {
+		if (dryRun && !options.debugOpenCode) {
 			spinner.success("(dry run) Skipped");
 		} else {
 			try {
@@ -141,7 +233,11 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 						const engineOptions = {
 							...(modelOverride && { modelOverride }),
 							...(engineArgs && engineArgs.length > 0 && { engineArgs }),
+							...(options.debugOpenCode && { debugOpenCode: options.debugOpenCode }),
+							...(options.logThoughts !== undefined && { logThoughts: options.logThoughts }),
+							...(dryRun && { dryRun: true }),
 						};
+
 						if (engine.executeStreaming) {
 							return await engine.executeStreaming(
 								prompt,
@@ -170,21 +266,24 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 					},
 				);
 
+				if (options.debug) {
+					logDebug("Full AI Response:", aiResult.response);
+					if (aiResult.error) logDebug("Full AI Error:", aiResult.error);
+				}
+
 				if (aiResult.success) {
-					spinner.success(undefined, true); // Show timing breakdown
+					spinner.success();
 					result.totalInputTokens += aiResult.inputTokens;
 					result.totalOutputTokens += aiResult.outputTokens;
 
-					// Mark task complete
+					// Mark task complete in state manager
+					await taskStateManager.transitionState(task.id, TaskState.COMPLETED, undefined, {
+						branch: branch || undefined,
+					});
 					await taskSource.markComplete(task.id);
 					logTaskProgress(task.title, "completed", workDir);
 					result.tasksCompleted++;
 
-					// Sync PRD to GitHub issue if configured
-					if (syncIssue && options.prdFile) {
-						await syncPrdToIssue(options.prdFile, syncIssue, workDir);
-					}
-
 					notifyTaskComplete(task.title);
 					clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
 
@@ -203,6 +302,10 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 							logSuccess(`PR created: ${prUrl}`);
 						}
 					}
+
+					if (options.syncIssue && options.prdFile) {
+						await syncPrdToIssue(options.prdFile, options.syncIssue, workDir);
+					}
 				} else {
 					const errMsg = aiResult.error || "Unknown error";
 					if (isRetryableError(errMsg)) {
@@ -210,6 +313,7 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 						spinner.error(errMsg);
 						if (deferrals >= maxRetries) {
 							logError(`Task "${task.title}" failed after ${deferrals} deferrals: ${errMsg}`);
+							await taskStateManager.transitionState(task.id, TaskState.FAILED, errMsg);
 							logTaskProgress(task.title, "failed", workDir);
 							result.tasksFailed++;
 							notifyTaskFailed(task.title, errMsg);
@@ -217,34 +321,42 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 							clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
 						} else {
 							logWarn(`Temporary failure, stopping early (${deferrals}/${maxRetries}): ${errMsg}`);
+							await taskStateManager.transitionState(task.id, TaskState.DEFERRED, errMsg);
 							result.tasksFailed++;
 							abortDueToRetryableFailure = true;
 						}
 					} else if (isFatalError(errMsg)) {
-						// Fatal error (auth, config) - abort all remaining tasks
 						spinner.error(errMsg);
-						logError(`Fatal error: ${errMsg}`);
-						logError("Aborting remaining tasks due to configuration/authentication issue.");
+						await taskStateManager.transitionState(task.id, TaskState.FAILED, errMsg);
+						logTaskProgress(task.title, "failed", workDir);
 						result.tasksFailed++;
 						notifyTaskFailed(task.title, errMsg);
-						return result; // Exit immediately
+						await taskSource.markComplete(task.id);
+						clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
+						logError(`Fatal error: ${errMsg}`);
+						logError("Aborting remaining tasks due to configuration/authentication issue.");
+						if (branchPerTask && baseBranch) {
+							await returnToBaseBranch(baseBranch, workDir);
+						}
+						return result;
 					} else {
 						spinner.error(errMsg);
+						await taskStateManager.transitionState(task.id, TaskState.FAILED, errMsg);
 						logTaskProgress(task.title, "failed", workDir);
 						result.tasksFailed++;
 						notifyTaskFailed(task.title, errMsg);
-						// Mark task complete so we don't retry it infinitely
 						await taskSource.markComplete(task.id);
 						clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
 					}
 				}
 			} catch (error) {
-				const errorMsg = error instanceof Error ? error.message : String(error);
+				const errorMsg = standardizeError(error).message;
 				if (isRetryableError(errorMsg)) {
 					const deferrals = recordDeferredTask(taskSource.type, task, workDir, options.prdFile);
 					spinner.error(errorMsg);
 					if (deferrals >= maxRetries) {
 						logError(`Task "${task.title}" failed after ${deferrals} deferrals: ${errorMsg}`);
+						await taskStateManager.transitionState(task.id, TaskState.FAILED, errorMsg);
 						logTaskProgress(task.title, "failed", workDir);
 						result.tasksFailed++;
 						notifyTaskFailed(task.title, errorMsg);
@@ -252,23 +364,30 @@ export async function runSequential(options: ExecutionOptions): Promise<Executio
 						clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
 					} else {
 						logWarn(`Temporary failure, stopping early (${deferrals}/${maxRetries}): ${errorMsg}`);
+						await taskStateManager.transitionState(task.id, TaskState.DEFERRED, errorMsg);
 						result.tasksFailed++;
 						abortDueToRetryableFailure = true;
 					}
 				} else if (isFatalError(errorMsg)) {
-					// Fatal error (auth, config) - abort all remaining tasks
 					spinner.error(errorMsg);
-					logError(`Fatal error: ${errorMsg}`);
-					logError("Aborting remaining tasks due to configuration/authentication issue.");
+					await taskStateManager.transitionState(task.id, TaskState.FAILED, errorMsg);
+					logTaskProgress(task.title, "failed", workDir);
 					result.tasksFailed++;
 					notifyTaskFailed(task.title, errorMsg);
-					return result; // Exit immediately
+					await taskSource.markComplete(task.id);
+					clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
+					logError(`Fatal error: ${errorMsg}`);
+					logError("Aborting remaining tasks due to configuration/authentication issue.");
+					if (branchPerTask && baseBranch) {
+						await returnToBaseBranch(baseBranch, workDir);
+					}
+					return result;
 				} else {
 					spinner.error(errorMsg);
+					await taskStateManager.transitionState(task.id, TaskState.FAILED, errorMsg);
 					logTaskProgress(task.title, "failed", workDir);
 					result.tasksFailed++;
 					notifyTaskFailed(task.title, errorMsg);
-					// Mark task complete so we don't retry it infinitely
 					await taskSource.markComplete(task.id);
 					clearDeferredTask(taskSource.type, task, workDir, options.prdFile);
 				}
diff --git a/cli/src/execution/skill-compress.ts b/cli/src/execution/skill-compress.ts
new file mode 100644
index 00000000..f0cd9044
--- /dev/null
+++ b/cli/src/execution/skill-compress.ts
@@ -0,0 +1,151 @@
+/**
+ * Skill File Compression Utilities
+ * Reduces token usage by minifying markdown skill files
+ */
+
+import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { logDebug } from "../ui/logger.ts";
+
+/**
+ * Compress a markdown file by removing excess whitespace and formatting
+ */
+export function compressMarkdown(content: string): string {
+	const segments = content.split(/(```[\s\S]*?```)/g);
+	const compressed = segments
+		.map((segment) => {
+			if (segment.startsWith("```")) {
+				return segment;
+			}
+
+			return (
+				segment
+					// Remove multiple consecutive blank lines
+					.replace(/\n{3,}/g, "\n\n")
+					// Remove trailing whitespace from lines
+					.replace(/[ \t]+$/gm, "")
+					// Remove whitespace-only lines
+					.replace(/^\s+$/gm, "")
+					// Compress verbose phrases
+					.replace(/Please note that /gi, "Note: ")
+					.replace(/In order to /gi, "To ")
+					.replace(/Make sure to /gi, "")
+					.replace(/You should /gi, "")
+					.replace(/You must /gi, "Must ")
+					.replace(/It is important to /gi, "")
+					.replace(/Keep in mind that /gi, "")
+					// Remove redundant markdown emphasis in instructions
+					.replace(/\*\*Note\*\*:/g, "Note:")
+					.replace(/\*\*Important\*\*:/g, "Important:")
+					// Technical Jargon Compression
+					.replace(/\bimplementation\b/gi, "impl")
+					.replace(/\binformation\b/gi, "info")
+					.replace(/\bdirectory\b/gi, "dir")
+					.replace(/\bdirectories\b/gi, "dirs")
+					.replace(/\binitialization\b/gi, "init")
+					.replace(/\bconfiguration\b/gi, "config")
+					.replace(/\bparameters\b/gi, "params")
+					.replace(/\benvironment\b/gi, "env")
+					.replace(/\bdocumentation\b/gi, "docs")
+			);
+		})
+		.join("");
+
+	return compressed.trim();
+}
+
+function csvEscape(value: string): string {
+	const escaped = value.replace(/"/g, '""');
+	if (/[",\n\r]/.test(escaped)) {
+		return `"${escaped}"`;
+	}
+	return escaped;
+}
+
+/**
+ * Copy and compress skill folders
+ */
+export function copyAndCompressSkillFolders(originalDir: string, sandboxDir: string): number {
+	const skillDirs = [".opencode/skills", ".claude/skills", ".skills"];
+	let totalSaved = 0;
+
+	for (const dir of skillDirs) {
+		const srcPath = join(originalDir, dir);
+		if (!existsSync(srcPath)) continue;
+
+		const destPath = join(sandboxDir, dir);
+		mkdirSync(destPath, { recursive: true });
+
+		const saved = compressDirectory(srcPath, destPath);
+		totalSaved += saved;
+	}
+
+	if (totalSaved > 0) {
+		logDebug(`[SKILLS] Compressed skill files, saved ~${totalSaved} chars`);
+	}
+
+	return totalSaved;
+}
+
+/**
+ * Recursively compress markdown files in a directory
+ */
+function compressDirectory(srcDir: string, destDir: string): number {
+	let saved = 0;
+	// Handle case where srcDir doesn't exist (though checked above)
+	if (!existsSync(srcDir)) return 0;
+
+	const entries = readdirSync(srcDir, { withFileTypes: true });
+
+	for (const entry of entries) {
+		const srcPath = join(srcDir, entry.name);
+		const destPath = join(destDir, entry.name);
+
+		if (entry.isDirectory()) {
+			mkdirSync(destPath, { recursive: true });
+			saved += compressDirectory(srcPath, destPath);
+		} else if (entry.name.endsWith(".md")) {
+			const original = readFileSync(srcPath, "utf-8");
+			const compressed = compressMarkdown(original);
+			writeFileSync(destPath, compressed, "utf-8");
+			saved += original.length - compressed.length;
+		} else {
+			// Copy non-markdown files as-is
+			const content = readFileSync(srcPath);
+			writeFileSync(destPath, content);
+		}
+	}
+
+	return saved;
+}
+
+/**
+ * Get all skills as a compact CSV string for LLM context
+ * Format: SkillName,Instructions
+ */
+export function getSkillsAsCsv(workDir: string): string {
+	const skillDirs = [".opencode/skills", ".claude/skills", ".skills"];
+	const rows: string[] = [];
+
+	for (const dir of skillDirs) {
+		const srcPath = join(workDir, dir);
+		if (!existsSync(srcPath)) continue;
+
+		const entries = readdirSync(srcPath, { withFileTypes: true });
+		for (const entry of entries) {
+			if (entry.isFile() && entry.name.endsWith(".md")) {
+				const content = readFileSync(join(srcPath, entry.name), "utf-8");
+				const compressed = compressMarkdown(content).replace(/\n/g, " ");
+
+				const name = entry.name.replace(".md", "");
+				const nameFinal = csvEscape(name);
+				const contentFinal = csvEscape(compressed);
+
+				rows.push(`${nameFinal},${contentFinal}`);
+			}
+		}
+	}
+
+	if (rows.length === 0) return "";
+	return `Name,Instructions\n${rows.join("\n")}`;
+}
diff --git a/cli/src/execution/task-state.ts b/cli/src/execution/task-state.ts
new file mode 100644
index 00000000..d9f06d5d
--- /dev/null
+++ b/cli/src/execution/task-state.ts
@@ -0,0 +1,767 @@
+/**
+ * Task State Manager
+ *
+ * Centralized state management for task execution.
+ * Provides a single source of truth for task states across all execution modes.
+ * State is persisted in the same format as the input source (YAML, JSON, CSV, MD).
+ */
+
+import {
+	existsSync,
+	mkdirSync,
+	readFileSync,
+	renameSync,
+	unlinkSync,
+	writeFileSync,
+} from "node:fs";
+import { dirname, join } from "node:path";
+import YAML from "yaml";
+import { RALPHY_DIR } from "../config/loader.ts";
+import type { Task, TaskSourceType } from "../tasks/types.ts";
+import { logDebug, logError } from "../ui/logger.ts";
+import { acquireFileLock, releaseFileLock } from "./locking.ts";
+
+export enum TaskState {
+	PENDING = "pending",
+	RUNNING = "running",
+	COMPLETED = "completed",
+	FAILED = "failed",
+	DEFERRED = "deferred",
+	SKIPPED = "skipped",
+}
+
+export interface TaskStateEntry {
+	id: string;
+	title: string;
+	state: TaskState;
+	attemptCount: number;
+	lastAttemptTime?: number;
+	errorHistory: string[];
+	executionContext?: {
+		branch?: string;
+		worktree?: string;
+		sandbox?: string;
+	};
+}
+
+interface StateFileFormat {
+	version: number;
+	lastUpdated: string;
+	tasks: Record<string, TaskStateEntry>;
+}
+
+export type StateFormat = "yaml" | "json" | "csv" | "md";
+
+export function detectStateFormat(filePath: string | undefined): StateFormat {
+	if (!filePath) return "yaml";
+	if (filePath.endsWith(".json")) return "json";
+	if (filePath.endsWith(".csv")) return "csv";
+	if (filePath.endsWith(".md")) return "md";
+	return "yaml";
+}
+
+export class TaskStateManager {
+	private stateFilePath: string;
+	private tasks: Map<string, TaskStateEntry> = new Map();
+	private format: StateFormat;
+	private sourceType: TaskSourceType;
+	private sourcePath: string;
+	private static readonly STATE_VERSION = 1;
+
+	constructor(
+		workDir: string,
+		sourceType: TaskSourceType,
+		sourcePath: string,
+		format: StateFormat = "yaml",
+	) {
+		this.sourceType = sourceType;
+		this.sourcePath = sourcePath;
+		this.format = format;
+		this.stateFilePath = join(workDir, RALPHY_DIR, `task-state.${format}`);
+	}
+
+	/**
+	 * Initialize the state manager with tasks from the source.
+	 * Loads existing state if available, or creates new state from tasks.
+	 */
+	async initialize(tasksFromSource: Task[]): Promise<void> {
+		// Ensure directory exists
+		const dir = dirname(this.stateFilePath);
+		if (!existsSync(dir)) {
+			mkdirSync(dir, { recursive: true });
+		}
+
+		// Try to load existing state
+		if (existsSync(this.stateFilePath)) {
+			await this.loadState();
+		}
+
+		// Reset any RUNNING tasks to PENDING (they were interrupted)
+		// and any DEFERRED tasks that have exceeded max deferrals
+		let resetCount = 0;
+		for (const [_key, task] of this.tasks) {
+			if (task.state === TaskState.RUNNING) {
+				logDebug(`Resetting interrupted task ${task.id} from RUNNING to PENDING`);
+				task.state = TaskState.PENDING;
+				resetCount++;
+			}
+		}
+		if (resetCount > 0) {
+			logDebug(`Reset ${resetCount} interrupted tasks to PENDING`);
+		}
+
+		// Merge with new tasks from source
+		for (const task of tasksFromSource) {
+			const key = this.buildTaskKey(task.id);
+			const existing = this.tasks.get(key);
+
+			if (!existing) {
+				// New task - add with pending state
+				this.tasks.set(key, {
+					id: task.id,
+					title: task.title,
+					state: TaskState.PENDING,
+					attemptCount: 0,
+					errorHistory: [],
+				});
+			} else {
+				// Existing task - update title if changed
+				existing.title = task.title;
+			}
+		}
+
+		// Remove tasks that no longer exist in source
+		const validKeys = new Set(tasksFromSource.map((t) => this.buildTaskKey(t.id)));
+		for (const key of this.tasks.keys()) {
+			if (!validKeys.has(key)) {
+				this.tasks.delete(key);
+			}
+		}
+
+		await this.persistState();
+		logDebug(`TaskStateManager initialized with ${this.tasks.size} tasks`);
+	}
+
+	/**
+	 * Atomically claim a task for execution.
+	 * Returns true if the task was claimed (was in PENDING state), false otherwise.
+	 */
+	async claimTaskForExecution(taskId: string): Promise<boolean> {
+		const key = this.buildTaskKey(taskId);
+		const workDir = dirname(dirname(this.stateFilePath));
+		const lockKey = `${this.stateFilePath}.claim`;
+
+		if (!acquireFileLock(lockKey, workDir, 5)) {
+			logDebug(`Task ${taskId} could not acquire claim lock`);
+			return false;
+		}
+
+		try {
+			await this.loadState();
+			const task = this.tasks.get(key);
+
+			if (!task) {
+				logError(`Task ${taskId} not found in state manager`);
+				return false;
+			}
+
+			if (task.state !== TaskState.PENDING) {
+				logDebug(`Task ${taskId} cannot be claimed - state is ${task.state}`);
+				return false;
+			}
+
+			task.state = TaskState.RUNNING;
+			task.attemptCount++;
+			task.lastAttemptTime = Date.now();
+			await this.persistState();
+
+			logDebug(`Task ${taskId} claimed for execution (attempt ${task.attemptCount})`);
+			return true;
+		} finally {
+			releaseFileLock(lockKey, workDir);
+		}
+	}
+
+	/**
+	 * Transition a task to a new state.
+	 */
+	async transitionState(
+		taskId: string,
+		newState: TaskState,
+		error?: string,
+		executionContext?: TaskStateEntry["executionContext"],
+	): Promise<void> {
+		const key = this.buildTaskKey(taskId);
+		const task = this.tasks.get(key);
+
+		if (!task) {
+			logError(`Task ${taskId} not found in state manager`);
+			return;
+		}
+
+		const oldState = task.state;
+		task.state = newState;
+
+		if (error) {
+			task.errorHistory.push(error);
+		}
+
+		if (executionContext) {
+			task.executionContext = { ...task.executionContext, ...executionContext };
+		}
+
+		await this.persistState();
+		logDebug(`Task ${taskId} transitioned from ${oldState} to ${newState}`);
+	}
+
+	/**
+	 * Get the next pending task that can be executed.
+	 */
+	getNextPendingTask(): TaskStateEntry | null {
+		for (const task of this.tasks.values()) {
+			if (task.state === TaskState.PENDING) {
+				return task;
+			}
+		}
+		return null;
+	}
+
+	/**
+	 * Get all tasks in a specific state.
+	 */
+	getTasksByState(state: TaskState): TaskStateEntry[] {
+		return Array.from(this.tasks.values()).filter((t) => t.state === state);
+	}
+
+	/**
+	 * Get the current state of a task.
+	 */
+	getTaskState(taskId: string): TaskState | null {
+		const key = this.buildTaskKey(taskId);
+		return this.tasks.get(key)?.state ?? null;
+	}
+
+	/**
+	 * Check if a task has exceeded the maximum number of attempts.
+	 */
+	hasExceededMaxAttempts(taskId: string, maxRetries: number): boolean {
+		const key = this.buildTaskKey(taskId);
+		const task = this.tasks.get(key);
+		if (!task) return false;
+		return task.attemptCount >= maxRetries;
+	}
+
+	/**
+	 * Get the number of remaining pending tasks.
+	 */
+	countPending(): number {
+		return this.getTasksByState(TaskState.PENDING).length;
+	}
+
+	/**
+	 * Get summary statistics.
+	 */
+	getStats(): {
+		total: number;
+		pending: number;
+		running: number;
+		completed: number;
+		failed: number;
+		deferred: number;
+		skipped: number;
+	} {
+		return {
+			total: this.tasks.size,
+			pending: this.getTasksByState(TaskState.PENDING).length,
+			running: this.getTasksByState(TaskState.RUNNING).length,
+			completed: this.getTasksByState(TaskState.COMPLETED).length,
+			failed: this.getTasksByState(TaskState.FAILED).length,
+			deferred: this.getTasksByState(TaskState.DEFERRED).length,
+			skipped: this.getTasksByState(TaskState.SKIPPED).length,
+		};
+	}
+
+	/**
+	 * Reset a task to pending state (for retrying failed/skipped tasks).
+	 * Also resets the attempt count so retries don't accumulate across program restarts.
+	 */
+	async resetTask(taskId: string): Promise<void> {
+		const key = this.buildTaskKey(taskId);
+		const task = this.tasks.get(key);
+
+		if (!task) {
+			logError(`Task ${taskId} not found in state manager`);
+			return;
+		}
+
+		task.state = TaskState.PENDING;
+		task.attemptCount = 0;
+		task.errorHistory = [];
+		await this.persistState();
+		logDebug(`Task ${taskId} reset to pending state`);
+	}
+
+	/**
+	 * Reset all failed/skipped tasks to pending.
+	 * Also resets the attempt count so retries don't accumulate across program restarts.
+	 */
+	async resetAllFailed(): Promise<number> {
+		let count = 0;
+		for (const [_key, task] of this.tasks) {
+			if (task.state === TaskState.FAILED || task.state === TaskState.SKIPPED) {
+				task.state = TaskState.PENDING;
+				task.attemptCount = 0;
+				task.errorHistory = [];
+				count++;
+			}
+		}
+		if (count > 0) {
+			await this.persistState();
+		}
+		logDebug(`Reset ${count} failed/skipped tasks to pending`);
+		return count;
+	}
+
+	/**
+	 * Reset attempt counts for all tasks when starting a fresh run.
+	 * This ensures retries don't persist across program restarts.
+	 */
+	async resetAllAttemptCounts(): Promise<void> {
+		for (const task of this.tasks.values()) {
+			task.attemptCount = 0;
+		}
+		await this.persistState();
+		logDebug("Reset all task attempt counts");
+	}
+
+	/**
+	 * Build a unique key for a task.
+	 */
+	private buildTaskKey(taskId: string): string {
+		const encodedSourceType = encodeURIComponent(this.sourceType);
+		const encodedSourcePath = encodeURIComponent(this.sourcePath);
+		const encodedTaskId = encodeURIComponent(taskId);
+		return `${encodedSourceType}:${encodedSourcePath}:${encodedTaskId}`;
+	}
+
+	private extractTaskIdFromKey(key: string): string {
+		const parts = key.split(":");
+		if (parts.length === 3) {
+			try {
+				return decodeURIComponent(parts[2]);
+			} catch {
+				// Fall through to legacy parsing
+			}
+		}
+
+		// Legacy fallback for older unencoded keys.
+		const firstColon = key.indexOf(":");
+		const secondColon = firstColon === -1 ? -1 : key.indexOf(":", firstColon + 1);
+		if (secondColon !== -1 && secondColon + 1 < key.length) {
+			return key.slice(secondColon + 1);
+		}
+
+		return key;
+	}
+
+	/**
+	 * Check for prototype pollution keys in data
+	 */
+	private hasPrototypePollution(data: unknown): boolean {
+		if (data === null || typeof data !== "object") {
+			return false;
+		}
+
+		const pollutionKeys = ["__proto__", "constructor", "prototype"];
+
+		for (const key of Object.keys(data)) {
+			if (pollutionKeys.includes(key)) {
+				return true;
+			}
+			// eslint-disable-next-line @typescript-eslint/no-explicit-any
+			const value = (data as Record<string, unknown>)[key];
+			if (typeof value === "object" && value !== null) {
+				if (this.hasPrototypePollution(value)) {
+					return true;
+				}
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * Persist state to disk in the appropriate format.
+	 */
+	private async persistState(): Promise<void> {
+		// Validate format before proceeding
+		if (!this.format || !["yaml", "json", "csv", "md"].includes(this.format)) {
+			throw new Error(`Invalid state format: ${this.format}`);
+		}
+
+		// Check for prototype pollution before persisting
+		const rawTasks = Object.fromEntries(this.tasks);
+		if (this.hasPrototypePollution(rawTasks)) {
+			throw new Error("State contains potentially malicious prototype pollution keys");
+		}
+
+		const data: StateFileFormat = {
+			version: TaskStateManager.STATE_VERSION,
+			lastUpdated: new Date().toISOString(),
+			tasks: rawTasks,
+		};
+
+		const tempPath = `${this.stateFilePath}.tmp`;
+
+		try {
+			let content: string;
+
+			switch (this.format) {
+				case "yaml":
+					content = YAML.stringify(data);
+					break;
+				case "json":
+					content = JSON.stringify(data, null, 2);
+					break;
+				case "csv":
+					content = this.toCSV(data);
+					break;
+				case "md":
+					content = this.toMarkdown(data);
+					break;
+				default:
+					content = YAML.stringify(data);
+			}
+
+			// Write to temp file first, then rename for atomicity (TOCTOU-safe)
+			writeFileSync(tempPath, content, "utf-8");
+			renameSync(tempPath, this.stateFilePath);
+		} catch (error) {
+			// Clean up temp file on error to prevent stale file accumulation
+			try {
+				if (existsSync(tempPath)) {
+					unlinkSync(tempPath);
+				}
+			} catch {
+				// Ignore cleanup errors
+			}
+			logError(`Failed to persist task state: ${error}`);
+			throw error;
+		}
+	}
+
+	/**
+	 * Load state from disk.
+	 */
+	private async loadState(): Promise<void> {
+		// Validate format before proceeding
+		if (!this.format || !["yaml", "json", "csv", "md"].includes(this.format)) {
+			logError(`Invalid state format: ${this.format}`);
+			this.tasks = new Map();
+			return;
+		}
+
+		try {
+			const content = readFileSync(this.stateFilePath, "utf-8");
+			let data: StateFileFormat;
+
+			switch (this.format) {
+				case "yaml":
+					data = YAML.parse(content) as StateFileFormat;
+					break;
+				case "json":
+					// SECURITY: Parse JSON safely and check for prototype pollution
+					try {
+						data = JSON.parse(content) as StateFileFormat;
+					} catch (parseError) {
+						throw new Error(`Invalid JSON in state file: ${parseError}`);
+					}
+					break;
+				case "csv":
+					data = this.fromCSV(content);
+					break;
+				case "md":
+					data = this.fromMarkdown(content);
+					break;
+				default:
+					data = YAML.parse(content) as StateFileFormat;
+			}
+
+			// Validate data structure before using
+			if (!data || typeof data !== "object") {
+				throw new Error("State file contains invalid data structure");
+			}
+
+			// Validate no prototype pollution keys using deep check
+			if (this.hasPrototypePollution(data)) {
+				throw new Error("State file contains potentially malicious prototype pollution keys");
+			}
+
+			if (data.version !== TaskStateManager.STATE_VERSION) {
+				logDebug(
+					`Migrating state file from version ${data.version} to ${TaskStateManager.STATE_VERSION}`,
+				);
+			}
+
+			// Validate tasks is an object before creating Map
+			if (!data.tasks || typeof data.tasks !== "object") {
+				logDebug("State file has no tasks or invalid tasks structure");
+				this.tasks = new Map();
+				return;
+			}
+
+			this.tasks = new Map(Object.entries(data.tasks));
+			logDebug(`Loaded ${this.tasks.size} tasks from state file`);
+		} catch (error) {
+			logError(`Failed to load task state: ${error}`);
+			this.tasks = new Map();
+		}
+	}
+
+	/**
+	 * Convert state to CSV format.
+	 */
+	private toCSV(data: StateFileFormat): string {
+		const headers = [
+			"key",
+			"id",
+			"title",
+			"state",
+			"attemptCount",
+			"lastAttemptTime",
+			"errorHistory",
+		];
+		const rows = Object.entries(data.tasks).map(([key, task]) => [
+			this.escapeCsvField(key),
+			this.escapeCsvField(task.id),
+			this.escapeCsvField(task.title),
+			this.escapeCsvField(task.state),
+			this.escapeCsvField(String(task.attemptCount)),
+			this.escapeCsvField(task.lastAttemptTime != null ? String(task.lastAttemptTime) : ""),
+			this.escapeCsvField(task.errorHistory.join("|")),
+		]);
+
+		return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
+	}
+
+	private escapeCsvField(value: string): string {
+		if (!/[",\n\r]/.test(value)) {
+			return value;
+		}
+		return `"${value.replace(/"/g, '""')}"`;
+	}
+
+	private parseCsvLine(line: string): string[] {
+		const parts: string[] = [];
+		let current = "";
+		let inQuotes = false;
+
+		for (let i = 0; i < line.length; i++) {
+			const char = line[i];
+			if (char === '"') {
+				if (inQuotes && line[i + 1] === '"') {
+					current += '"';
+					i++;
+				} else {
+					inQuotes = !inQuotes;
+				}
+				continue;
+			}
+			if (char === "," && !inQuotes) {
+				parts.push(current);
+				current = "";
+				continue;
+			}
+			current += char;
+		}
+
+		parts.push(current);
+		return parts;
+	}
+
+	/**
+	 * Parse state from CSV format.
+	 */
+	private fromCSV(content: string): StateFileFormat {
+		const records = this.parseCsvRecords(content);
+		if (records.length < 2) {
+			return {
+				version: TaskStateManager.STATE_VERSION,
+				lastUpdated: new Date().toISOString(),
+				tasks: {},
+			};
+		}
+
+		const tasks: Record<string, TaskStateEntry> = {};
+		for (let i = 1; i < records.length; i++) {
+			const line = records[i];
+			if (!line || line.trim().length === 0) continue;
+
+			const parts = this.parseCsvLine(line);
+			if (parts.length >= 4) {
+				const key = parts[0]?.trim();
+				const id = parts[1]?.trim();
+				const title = parts[2]?.trim();
+				const state = parts[3]?.trim() as TaskState;
+				const attemptCount = parts[4]?.trim();
+				const lastAttemptTime = parts[5]?.trim();
+				const errorHistory = parts[6]?.trim();
+
+				// Skip entries with invalid key
+				if (!key) continue;
+
+				// Validate state is a valid TaskState
+				const validStates = Object.values(TaskState);
+				if (!validStates.includes(state)) {
+					logDebug(`Skipping CSV row with invalid state: ${state}`);
+					continue;
+				}
+
+				tasks[key] = {
+					id: id || key,
+					title: title || "Unknown",
+					state: state,
+					attemptCount: attemptCount ? Number.parseInt(attemptCount, 10) || 0 : 0,
+					lastAttemptTime: lastAttemptTime
+						? Number.parseInt(lastAttemptTime, 10) || undefined
+						: undefined,
+					errorHistory: errorHistory ? errorHistory.split("|").filter(Boolean) : [],
+				};
+			}
+		}
+
+		return {
+			version: TaskStateManager.STATE_VERSION,
+			lastUpdated: new Date().toISOString(),
+			tasks,
+		};
+	}
+
+	private parseCsvRecords(content: string): string[] {
+		const records: string[] = [];
+		let current = "";
+		let inQuotes = false;
+
+		for (let i = 0; i < content.length; i++) {
+			const char = content[i];
+
+			if (char === '"') {
+				if (inQuotes && content[i + 1] === '"') {
+					current += '""';
+					i++;
+				} else {
+					inQuotes = !inQuotes;
+					current += char;
+				}
+				continue;
+			}
+
+			if ((char === "\n" || char === "\r") && !inQuotes) {
+				if (char === "\r" && content[i + 1] === "\n") {
+					i++;
+				}
+				if (current.trim().length > 0) {
+					records.push(current);
+				}
+				current = "";
+				continue;
+			}
+
+			current += char;
+		}
+
+		if (current.trim().length > 0) {
+			records.push(current);
+		}
+
+		return records;
+	}
+
+	/**
+	 * Convert state to Markdown format.
+	 */
+	private toMarkdown(data: StateFileFormat): string {
+		const lines = ["# Task State", "", `Last Updated: ${data.lastUpdated}`, ""];
+
+		for (const [key, task] of Object.entries(data.tasks)) {
+			lines.push(`## ${task.title} (${key})`);
+			lines.push("");
+			lines.push(`- **State**: ${task.state}`);
+			lines.push(`- **Attempt Count**: ${task.attemptCount}`);
+			if (task.lastAttemptTime) {
+				lines.push(`- **Last Attempt**: ${new Date(task.lastAttemptTime).toISOString()}`);
+			}
+			if (task.errorHistory.length > 0) {
+				lines.push(`- **Errors**: ${JSON.stringify(task.errorHistory)}`);
+			}
+			lines.push("");
+		}
+
+		return lines.join("\n");
+	}
+
+	/**
+	 * Parse state from Markdown format.
+	 */
+	private fromMarkdown(content: string): StateFileFormat {
+		const tasks: Record<string, TaskStateEntry> = {};
+		const sections = content.split(/\n## /);
+
+		for (const section of sections.slice(1)) {
+			const lines = section.split("\n");
+			const titleMatch = lines[0].match(/(.+) \((.+)\)/);
+			if (!titleMatch) continue;
+
+			const [, title, key] = titleMatch;
+			const task: TaskStateEntry = {
+				id: "",
+				title,
+				state: TaskState.PENDING,
+				attemptCount: 0,
+				errorHistory: [],
+			};
+
+			for (const line of lines) {
+				if (line.startsWith("- **State**: ")) {
+					task.state = line.replace("- **State**: ", "").trim() as TaskState;
+				} else if (line.startsWith("- **Attempt Count**: ")) {
+					task.attemptCount = Number.parseInt(line.replace("- **Attempt Count**: ", ""), 10) || 0;
+				} else if (line.startsWith("- **Last Attempt**: ")) {
+					const dateStr = line.replace("- **Last Attempt**: ", "").trim();
+					task.lastAttemptTime = new Date(dateStr).getTime();
+				} else if (line.startsWith("- **Errors**: ")) {
+					const errorsRaw = line.replace("- **Errors**: ", "").trim();
+					if (errorsRaw.startsWith("[")) {
+						try {
+							const parsed = JSON.parse(errorsRaw);
+							if (Array.isArray(parsed)) {
+								task.errorHistory = parsed
+									.map((item) => String(item))
+									.filter((item) => item.length > 0);
+								continue;
+							}
+						} catch {
+							// Fall back to legacy format parsing below.
+						}
+					}
+
+					task.errorHistory = errorsRaw
+						.split(", ")
+						.map((s) => s.trim())
+						.filter(Boolean);
+				}
+			}
+
+			// Extract ID from key
+			task.id = this.extractTaskIdFromKey(key);
+
+			tasks[key] = task;
+		}
+
+		return {
+			version: TaskStateManager.STATE_VERSION,
+			lastUpdated: new Date().toISOString(),
+			tasks,
+		};
+	}
+}
diff --git a/cli/src/tasks/types.ts b/cli/src/tasks/types.ts
index a4ba6379..0c5c6c4f 100644
--- a/cli/src/tasks/types.ts
+++ b/cli/src/tasks/types.ts
@@ -10,6 +10,10 @@ export interface Task {
 	body?: string;
 	/** Parallel group number (0 = sequential, >0 = can run in parallel with same group) */
 	parallelGroup?: number;
+	/** Optional description from PRD */
+	description?: string;
+	/** Optional dependencies (task IDs) */
+	dependencies?: string[];
 	/** Whether the task is completed */
 	completed: boolean;
 }
@@ -17,7 +21,7 @@ export interface Task {
 /**
  * Task source type
  */
-export type TaskSourceType = "markdown" | "markdown-folder" | "yaml" | "json" | "github";
+export type TaskSourceType = "markdown" | "markdown-folder" | "yaml" | "csv" | "github" | "json";
 
 /**
  * Task source interface - one per format
@@ -37,4 +41,6 @@ export interface TaskSource {
 	countCompleted(): Promise<number>;
 	/** Get tasks in a specific parallel group */
 	getTasksInGroup?(group: number): Promise<Task[]>;
+	/** Get compact format of all tasks (for planning context) */
+	toCompactFormat?(): Promise<string>;
 }
diff --git a/cli/src/telemetry/collector.ts b/cli/src/telemetry/collector.ts
index 37b38fe9..08fbdd7f 100644
--- a/cli/src/telemetry/collector.ts
+++ b/cli/src/telemetry/collector.ts
@@ -18,6 +18,40 @@ import type {
 // Package version (loaded lazily)
 let cachedVersion: string | undefined;
 
+function sanitizeSecrets(input: string): string {
+	const patterns = [
+		{ regex: /sk-[a-zA-Z0-9]{48}/g, replacement: "[API_KEY_REDACTED]" },
+		{ regex: /sk-ant-[a-zA-Z0-9_-]{16,256}/g, replacement: "[ANTHROPIC_KEY_REDACTED]" },
+		{ regex: /ghp_[a-zA-Z0-9]{36}/g, replacement: "[GITHUB_TOKEN_REDACTED]" },
+		{ regex: /gho_[a-zA-Z0-9]{52}/g, replacement: "[GITHUB_OAUTH_REDACTED]" },
+		{ regex: /AKIA[0-9A-Z]{16}/g, replacement: "[AWS_KEY_REDACTED]" },
+		{ regex: /\b[0-9a-f]{64}\b/g, replacement: "[HEX_SECRET_REDACTED]" },
+	];
+
+	let result = input;
+	for (const { regex, replacement } of patterns) {
+		result = result.replace(regex, replacement);
+	}
+	return result;
+}
+
+function sanitizeTelemetryValue(value: unknown): unknown {
+	if (typeof value === "string") {
+		return sanitizeSecrets(value);
+	}
+	if (Array.isArray(value)) {
+		return value.map((item) => sanitizeTelemetryValue(item));
+	}
+	if (value && typeof value === "object") {
+		const sanitized: Record<string, unknown> = {};
+		for (const [key, nested] of Object.entries(value as Record<string, unknown>)) {
+			sanitized[key] = sanitizeTelemetryValue(nested);
+		}
+		return sanitized;
+	}
+	return value;
+}
+
 function getCliVersion(): string {
 	if (cachedVersion) return cachedVersion;
 	try {
@@ -116,8 +150,8 @@ export class TelemetryCollector {
 
 		// Store prompts/responses for full mode
 		if (this.level === "full") {
-			if (prompt) this.prompts.push(prompt);
-			if (response) this.responses.push(response);
+			if (prompt) this.prompts.push(sanitizeSecrets(prompt));
+			if (response) this.responses.push(sanitizeSecrets(response));
 		}
 	}
 
@@ -131,7 +165,10 @@ export class TelemetryCollector {
 			startTime: Date.now(),
 			toolName,
 			parameterKeys: parameters ? Object.keys(parameters) : undefined,
-			parameters: this.level === "full" ? parameters : undefined,
+			parameters:
+				this.level === "full"
+					? (sanitizeTelemetryValue(parameters) as Record<string, unknown> | undefined)
+					: undefined,
 		};
 
 		// Track file paths in full mode
@@ -164,7 +201,7 @@ export class TelemetryCollector {
 		// Add full mode data
 		if (this.level === "full") {
 			toolCall.parameters = this.activeToolCall.parameters;
-			if (result) toolCall.result = result;
+			if (result) toolCall.result = sanitizeSecrets(result);
 		}
 
 		this.toolCalls.push(toolCall);
@@ -199,8 +236,10 @@ export class TelemetryCollector {
 		};
 
 		if (this.level === "full") {
-			toolCall.parameters = options?.parameters;
-			toolCall.result = options?.result;
+			toolCall.parameters = options?.parameters
+				? (sanitizeTelemetryValue(options.parameters) as Record<string, unknown>)
+				: undefined;
+			toolCall.result = options?.result ? sanitizeSecrets(options.result) : undefined;
 
 			// Track file paths
 			if (options?.parameters) {
diff --git a/cli/src/telemetry/exporter.ts b/cli/src/telemetry/exporter.ts
index c4b4a2be..bef8331c 100644
--- a/cli/src/telemetry/exporter.ts
+++ b/cli/src/telemetry/exporter.ts
@@ -161,7 +161,7 @@ export class TelemetryExporter {
 
 		await this.ensureExportsDir();
 		const filePath = outputPath || join(this.exportsDir, "openai-evals.jsonl");
-		await writeFile(filePath, entries.join("\n") + "\n", "utf-8");
+		await writeFile(filePath, `${entries.join("\n")}\n`, "utf-8");
 
 		return filePath;
 	}
@@ -194,7 +194,7 @@ export class TelemetryExporter {
 
 		await this.ensureExportsDir();
 		const filePath = outputPath || join(this.exportsDir, "raw-telemetry.jsonl");
-		const lines = entries.map((e) => JSON.stringify(e)).join("\n") + "\n";
+		const lines = `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`;
 		await writeFile(filePath, lines, "utf-8");
 
 		return filePath;
diff --git a/cli/src/telemetry/types.ts b/cli/src/telemetry/types.ts
index 41650f3c..11424fa8 100644
--- a/cli/src/telemetry/types.ts
+++ b/cli/src/telemetry/types.ts
@@ -78,6 +78,51 @@ export interface ToolCall {
  */
 export type TelemetryLevel = "anonymous" | "full";
 
+/**
+ * Full session data for webhook
+ */
+export interface WebhookSessionData {
+	sessionId: string;
+	engine: string;
+	mode: string;
+	cliVersion: string;
+	platform: string;
+	totalTokensIn: number;
+	totalTokensOut: number;
+	totalDurationMs: number;
+	taskCount: number;
+	successCount: number;
+	failedCount: number;
+	toolCalls: {
+		toolName: string;
+		callCount: number;
+		successCount: number;
+		failedCount: number;
+		avgDurationMs: number;
+	}[];
+	tags?: string[];
+}
+
+/**
+ * Full session details for webhook (full privacy mode)
+ */
+export interface WebhookSessionDetails {
+	prompt?: string;
+	response?: string;
+	filePaths?: string[];
+}
+
+/**
+ * Telemetry webhook payload
+ */
+export interface TelemetryWebhookPayload {
+	event: string;
+	version: string;
+	timestamp: string;
+	session: WebhookSessionData;
+	details?: WebhookSessionDetails;
+}
+
 /**
  * Telemetry configuration
  */
diff --git a/cli/src/telemetry/webhook.ts b/cli/src/telemetry/webhook.ts
index c305e0e0..6ce965d3 100644
--- a/cli/src/telemetry/webhook.ts
+++ b/cli/src/telemetry/webhook.ts
@@ -76,11 +76,18 @@ export async function sendTelemetryWebhook(
 	}
 
 	const payload = buildPayload(session, level);
+	const controller = new AbortController();
+	const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
+	const safeWebhookTarget = (() => {
+		try {
+			const parsed = new URL(webhookUrl);
+			return `${parsed.protocol}//${parsed.host}`;
+		} catch {
+			return "[invalid-webhook-url]";
+		}
+	})();
 
 	try {
-		const controller = new AbortController();
-		const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
-
 		const response = await fetch(webhookUrl, {
 			method: "POST",
 			headers: {
@@ -90,14 +97,12 @@ export async function sendTelemetryWebhook(
 			signal: controller.signal,
 		});
 
-		clearTimeout(timeoutId);
-
 		if (!response.ok) {
 			const text = await response.text().catch(() => "");
 			throw new Error(`HTTP ${response.status}${text ? `: ${text}` : ""}`);
 		}
 
-		logDebug(`Telemetry webhook sent successfully to ${webhookUrl}`);
+		logDebug(`Telemetry webhook sent successfully to ${safeWebhookTarget}`);
 	} catch (error) {
 		if (error instanceof Error && error.name === "AbortError") {
 			logError("Telemetry webhook timed out after 10 seconds");
@@ -107,5 +112,7 @@ export async function sendTelemetryWebhook(
 			);
 		}
 		// Don't throw - webhook failures shouldn't break the session
+	} finally {
+		clearTimeout(timeoutId);
 	}
 }
diff --git a/cli/src/telemetry/writer.ts b/cli/src/telemetry/writer.ts
index ab64c48f..8b0d9569 100644
--- a/cli/src/telemetry/writer.ts
+++ b/cli/src/telemetry/writer.ts
@@ -7,6 +7,7 @@
 import { existsSync } from "node:fs";
 import { appendFile, mkdir, readFile, readdir } from "node:fs/promises";
 import { dirname, join } from "node:path";
+import { logDebug } from "../ui/logger.ts";
 import type { Session, SessionFull, ToolCall } from "./types.js";
 
 const DEFAULT_OUTPUT_DIR = ".ralphy/telemetry";
@@ -56,7 +57,7 @@ export class TelemetryWriter {
 	async writeSession(session: Session | SessionFull): Promise<void> {
 		await this.ensureDir();
 		const path = join(this.outputDir, SESSIONS_FILE);
-		const line = JSON.stringify(session) + "\n";
+		const line = `${JSON.stringify(session)}\n`;
 		await appendFile(path, line, "utf-8");
 	}
 
@@ -68,7 +69,7 @@ export class TelemetryWriter {
 
 		await this.ensureDir();
 		const path = join(this.outputDir, TOOL_CALLS_FILE);
-		const lines = toolCalls.map((call) => JSON.stringify(call)).join("\n") + "\n";
+		const lines = `${toolCalls.map((call) => JSON.stringify(call)).join("\n")}\n`;
 		await appendFile(path, lines, "utf-8");
 	}
 
@@ -91,8 +92,16 @@ export class TelemetryWriter {
 
 		const content = await readFile(path, "utf-8");
 		const lines = content.trim().split("\n").filter(Boolean);
+		const sessions: Array<Session | SessionFull> = [];
+		for (const line of lines) {
+			try {
+				sessions.push(JSON.parse(line) as Session | SessionFull);
+			} catch (error) {
+				logDebug(`Skipping invalid telemetry session line: ${error}`);
+			}
+		}
 
-		return lines.map((line) => JSON.parse(line) as Session | SessionFull);
+		return sessions;
 	}
 
 	/**
@@ -107,8 +116,16 @@ export class TelemetryWriter {
 
 		const content = await readFile(path, "utf-8");
 		const lines = content.trim().split("\n").filter(Boolean);
+		const toolCalls: ToolCall[] = [];
+		for (const line of lines) {
+			try {
+				toolCalls.push(JSON.parse(line) as ToolCall);
+			} catch (error) {
+				logDebug(`Skipping invalid telemetry tool-call line: ${error}`);
+			}
+		}
 
-		return lines.map((line) => JSON.parse(line) as ToolCall);
+		return toolCalls;
 	}
 
 	/**
diff --git a/cli/src/ui/static-agent-display.ts b/cli/src/ui/static-agent-display.ts
new file mode 100644
index 00000000..9d684716
--- /dev/null
+++ b/cli/src/ui/static-agent-display.ts
@@ -0,0 +1,304 @@
+import type { AgentProgress, ExecutionPhase } from "../execution/progress-types.ts";
+import { formatDuration } from "./logger.ts";
+
+const c = {
+	rst: "\x1b[0m",
+	bld: "\x1b[1m",
+	dim: "\x1b[2m",
+	red: "\x1b[31m",
+	grn: "\x1b[32m",
+	yel: "\x1b[33m",
+	blu: "\x1b[34m",
+	mag: "\x1b[35m",
+	cyn: "\x1b[36m",
+	wht: "\x1b[37m",
+	gry: "\x1b[90m",
+};
+
+function sanitizeTerminalText(value: string): string {
+	return (
+		value
+			// biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape removal
+			.replace(/\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])/g, "")
+			// biome-ignore lint/suspicious/noControlCharactersInRegex: terminal control chars
+			.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
+	);
+}
+
+export class StaticAgentDisplay {
+	private static instance: StaticAgentDisplay | null = null;
+	private agentProgressMap = new Map<number, AgentProgress>();
+	private displayInterval: NodeJS.Timeout | null = null;
+
+	constructor() {
+		StaticAgentDisplay.instance?.stopDisplay();
+		StaticAgentDisplay.instance = this;
+	}
+	static getInstance(): StaticAgentDisplay | null {
+		return StaticAgentDisplay.instance;
+	}
+
+	log(_message: string): void {
+		// Logs interrupt display - will be redrawn
+	}
+
+	updateAgent(agentNum: number, step: string): void {
+		const current = this.agentProgressMap.get(agentNum);
+		if (!current) return;
+		if (!current.recentSteps) current.recentSteps = [];
+
+		const cleanStep = sanitizeTerminalText(step)
+			.trim()
+			.replace(/^\[RAW OPENCODE OUTPUT\]\s*/i, "")
+			.replace(/^Thinking:\s*/i, "");
+
+		// Skip garbled/encoded content
+		if (cleanStep.match(/^[A-Za-z0-9+/]{30,}$/)) return;
+		if (!cleanStep || cleanStep.length < 3) return;
+
+		if (current.recentSteps[current.recentSteps.length - 1] === cleanStep) return;
+
+		current.recentSteps.push(cleanStep);
+		if (current.recentSteps.length > 5) current.recentSteps.shift();
+	}
+
+	updateAgentFromOpenCode(agentNum: number, jsonLine: string): void {
+		try {
+			// Defensive: ensure jsonLine is a string
+			if (typeof jsonLine !== "string") {
+				return;
+			}
+			// Defensive: check for empty or whitespace-only strings
+			if (!jsonLine || jsonLine.trim().length === 0) {
+				return;
+			}
+			const normalized = jsonLine.replace(/^\[RAW OPENCODE OUTPUT\]\s*/i, "").trim();
+			if (!normalized.startsWith("{")) {
+				return;
+			}
+			const parsed = JSON.parse(normalized);
+			// Defensive: validate parsed is an object
+			if (!parsed || typeof parsed !== "object") {
+				return;
+			}
+			if (parsed.type === "text" && parsed.part?.text) {
+				const text = parsed.part.text.trim();
+				if (text && text.length > 3 && !text.startsWith("{")) {
+					this.updateAgent(agentNum, text);
+				}
+			} else if (parsed.type === "tool_use" && parsed.part?.tool) {
+				const tool = parsed.part.tool;
+				const input = parsed.part.state?.input || {};
+				const file = input.filePath || input.path || "";
+				this.updateAgent(agentNum, file ? `${tool}: ${file}` : tool);
+			} else if (parsed.type === "step_finish" && parsed.part?.tokens) {
+				const t = parsed.part.tokens;
+				// Defensive: validate token values are numbers
+				const inputTokens = typeof t.input === "number" ? t.input : 0;
+				const outputTokens = typeof t.output === "number" ? t.output : 0;
+				this.updateAgent(agentNum, `${inputTokens}→${outputTokens} tokens`);
+			}
+		} catch {
+			// Not JSON - ignore silently
+		}
+	}
+
+	startDisplay(): void {
+		if (this.displayInterval) return;
+		this.render();
+		this.displayInterval = setInterval(() => this.render(), 1000);
+	}
+
+	stopDisplay(): void {
+		if (this.displayInterval) {
+			clearInterval(this.displayInterval);
+			this.displayInterval = null;
+		}
+		this.render();
+		this.agentProgressMap.clear();
+	}
+
+	private render(): void {
+		const agents = Array.from(this.agentProgressMap.values());
+		if (agents.length === 0) return;
+
+		// Get current phase from first agent
+		const currentPhase = agents[0]?.phase || "execution";
+
+		const width = process.stdout.columns || 80;
+
+		// Clear screen and move to top
+		process.stdout.write("\x1b[2J\x1b[0;0H");
+
+		// Workflow bar
+		console.log();
+		console.log(this.renderWorkflowLine(currentPhase, width));
+		console.log();
+
+		// Header
+		const title = " AGENTS ";
+		const side = Math.floor((width - title.length) / 2);
+		console.log(
+			`${c.cyn}${"─".repeat(side)}${c.bld}${title}${c.rst}${c.cyn}${"─".repeat(width - side - title.length)}${c.rst}`,
+		);
+		console.log();
+
+		// Each agent with 5 numbered steps
+		for (const agent of agents) {
+			console.log(this.renderAgentLine(agent));
+
+			const steps = agent.recentSteps || [];
+			// Pad to always show 5 lines
+			for (let i = 0; i < 5; i++) {
+				const num = i + 1;
+				if (i < steps.length) {
+					const formatted = this.formatStepWithColors(steps[steps.length - 1 - i]);
+					console.log(`   ${c.gry}${num}.${c.rst} ${formatted}`);
+				} else {
+					console.log(`   ${c.gry}${num}.${c.rst}`);
+				}
+			}
+			console.log();
+		}
+
+		// Instructions at bottom
+		console.log(`${c.gry}Press Ctrl+C to stop${c.rst}`);
+	}
+
+	private renderWorkflowLine(phase: ExecutionPhase, width: number): string {
+		const phases: ExecutionPhase[] = ["planning", "execution", "testing"];
+		const phaseIndex = phases.indexOf(phase);
+
+		const parts: string[] = [];
+		for (let i = 0; i < phases.length; i++) {
+			const p = phases[i];
+			const isActive = i === phaseIndex;
+			const isPast = i < phaseIndex;
+
+			if (isActive) {
+				const color = p === "planning" ? c.cyn : p === "execution" ? c.mag : c.yel;
+				parts.push(`${c.bld}${color}▓▓▓ ${p.toUpperCase()} ▓▓▓${c.rst}`);
+			} else if (isPast) {
+				parts.push(`${c.gry}░ ${p.toUpperCase()} ░${c.rst}`);
+			} else {
+				parts.push(`${c.gry}${c.dim}  ${p.toUpperCase()}  ${c.rst}`);
+			}
+
+			if (i < phases.length - 1) {
+				parts.push(isPast ? `${c.cyn} → ${c.rst}` : `${c.gry} → ${c.rst}`);
+			}
+		}
+
+		const content = parts.join("");
+		const pad = Math.max(0, Math.floor((width - this.stripAnsi(content).length) / 2));
+		return " ".repeat(pad) + content;
+	}
+
+	private stripAnsi(str: string): string {
+		// biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape sequences are intentional
+		return str.replace(/\x1b\[[0-9;]*m/g, "");
+	}
+
+	private formatStepWithColors(step: string): string {
+		// Match patterns like "Tool: bash: command" or "Glob: pattern" or "Read: filepath"
+		const toolMatch = step.match(
+			/^(Tool|Read|Write|Edit|Create|Delete|Glob|Grep|Search|Analyze|Run|Test|Execute|Build|Fix|Debug)\s*:\s*(.+)/i,
+		);
+		if (toolMatch) {
+			const action = toolMatch[1];
+			const rest = toolMatch[2];
+			// Split rest by first colon if present (e.g., "bash: ls -la")
+			const subMatch = rest.match(/^([^:]+):\s*(.+)/);
+			if (subMatch) {
+				const tool = subMatch[1];
+				const args = subMatch[2];
+				// Color the action type
+				const actionColor = this.getActionColor(action);
+				return `${actionColor}${action}${c.rst}: ${c.cyn}${tool}${c.rst}: ${c.gry}${args.slice(0, 50)}${c.rst}`;
+			}
+			// No sub-colon, just action: rest
+			const actionColor = this.getActionColor(action);
+			return `${actionColor}${action}${c.rst}: ${c.gry}${rest.slice(0, 55)}${c.rst}`;
+		}
+		// For plain text steps, return as-is (will be white)
+		return `${c.wht}${step.slice(0, 60)}${c.rst}`;
+	}
+
+	private getActionColor(action: string): string {
+		const lower = action.toLowerCase();
+		if (lower === "tool" || lower === "run" || lower === "execute") return c.yel;
+		if (
+			lower === "read" ||
+			lower === "glob" ||
+			lower === "grep" ||
+			lower === "search" ||
+			lower === "analyze"
+		)
+			return c.blu;
+		if (lower === "write" || lower === "edit" || lower === "create" || lower === "delete")
+			return c.mag;
+		if (lower === "test" || lower === "build") return c.grn;
+		if (lower === "fix" || lower === "debug") return c.red;
+		return c.wht;
+	}
+
+	private renderAgentLine(agent: AgentProgress): string {
+		const phase = agent.phase || "execution";
+		const model = agent.modelName || "main";
+		const elapsed = formatDuration(Date.now() - agent.startTime);
+		const status =
+			agent.status === "completed"
+				? `${c.grn}✓${c.rst}`
+				: agent.status === "failed"
+					? `${c.red}✗${c.rst}`
+					: `${c.cyn}●${c.rst}`;
+
+		const phaseColor = phase === "planning" ? c.cyn : phase === "execution" ? c.mag : c.yel;
+		const phaseTag = `${phaseColor}[${phase.toUpperCase()}]${c.rst}`;
+		const modelTag = `${c.gry}[${c.blu}${model}${c.gry}]${c.rst}`;
+		const title =
+			agent.taskTitle.length > 30 ? `${agent.taskTitle.slice(0, 27)}...` : agent.taskTitle;
+
+		return `${status} ${c.bld}Agent ${agent.agentNum}${c.rst} ${phaseTag} ${c.wht}${title}${c.rst} ${modelTag} ${c.gry}${elapsed}${c.rst}`;
+	}
+
+	setAgentStatus(
+		agentNum: number,
+		taskTitle: string,
+		status: "planning" | "working" | "completed" | "failed",
+		phase?: ExecutionPhase,
+		modelName?: string,
+	): void {
+		const current = this.agentProgressMap.get(agentNum);
+		if (!current) {
+			this.agentProgressMap.set(agentNum, {
+				agentNum,
+				taskTitle,
+				status,
+				phase: phase || "execution",
+				modelName: modelName || "main",
+				worktreeDir: "",
+				startTime: Date.now(),
+				recentSteps: [],
+			});
+		} else {
+			current.taskTitle = taskTitle;
+			current.status = status;
+			if (phase) current.phase = phase;
+			if (modelName) current.modelName = modelName;
+		}
+	}
+
+	getAgentTaskTitle(agentNum: number): string | undefined {
+		return this.agentProgressMap.get(agentNum)?.taskTitle;
+	}
+
+	clearAgentSteps(agentNum: number): void {
+		const current = this.agentProgressMap.get(agentNum);
+		if (current) current.recentSteps = [];
+	}
+
+	agentComplete(agentNum: number): void {
+		this.agentProgressMap.delete(agentNum);
+	}
+}
diff --git a/cli/src/utils/cleanup.ts b/cli/src/utils/cleanup.ts
new file mode 100644
index 00000000..211458cb
--- /dev/null
+++ b/cli/src/utils/cleanup.ts
@@ -0,0 +1,157 @@
+import type { ChildProcess } from "node:child_process";
+import { spawnSync } from "node:child_process";
+import { logDebug, logWarn } from "../ui/logger.ts";
+
+type CleanupFn = () => Promise<void> | void;
+
+const cleanupRegistry: Set<CleanupFn> = new Set();
+const trackedProcesses: Set<ChildProcess> = new Set();
+let isCleaningUp = false;
+
+function isProcessRunning(proc: ChildProcess): boolean {
+	return proc.exitCode === null && proc.signalCode === null;
+}
+
+/**
+ * Register a function to be called on process exit or manual cleanup
+ */
+export function registerCleanup(fn: CleanupFn): () => void {
+	cleanupRegistry.add(fn);
+	return () => cleanupRegistry.delete(fn);
+}
+
+/**
+ * Register a child process to be tracked and killed on exit
+ */
+export function registerProcess(proc: ChildProcess): () => void {
+	trackedProcesses.add(proc);
+
+	const remove = () => trackedProcesses.delete(proc);
+
+	proc.on("exit", remove);
+	proc.on("error", remove);
+
+	return remove;
+}
+
+/**
+ * Run all registered cleanup functions and kill tracked processes
+ */
+export async function runCleanup(): Promise<void> {
+	if (isCleaningUp) return;
+	isCleaningUp = true;
+
+	// 1. Kill all tracked child processes with verification
+	for (const proc of trackedProcesses) {
+		try {
+			if (proc.pid && isProcessRunning(proc)) {
+				const pid = proc.pid;
+
+				if (process.platform === "win32") {
+					// Windows needs taskkill for robust child tree termination
+					const result = spawnSync("taskkill", ["/pid", String(pid), "/f", "/t"], {
+						stdio: "pipe",
+					});
+
+					// Verify the process was actually killed
+					// Status 128 = process already exited, which is fine
+					if (result.status !== 0 && result.status !== 128) {
+						logWarn(`taskkill may have failed for PID ${pid} (exit code: ${result.status})`);
+						if (result.stderr) {
+							logDebug(`taskkill stderr: ${result.stderr.toString()}`);
+						}
+					}
+
+					await new Promise((resolve) => setTimeout(resolve, 500));
+					if (isProcessRunning(proc)) {
+						logWarn(`Process ${pid} may still be running after taskkill`);
+					}
+				} else {
+					// Try graceful termination first
+					proc.kill("SIGTERM");
+
+					// Wait a bit and verify it's dead
+					await new Promise((resolve) => setTimeout(resolve, 1000));
+
+					// Check if process is still running
+					if (isProcessRunning(proc)) {
+						proc.kill("SIGKILL");
+
+						// Final verification
+						await new Promise((resolve) => setTimeout(resolve, 500));
+						if (isProcessRunning(proc)) {
+							logWarn(`Failed to terminate process ${pid} after SIGKILL`);
+						}
+					}
+				}
+			}
+		} catch (err) {
+			// Process termination failed, continue cleanup
+			logDebug(`Failed to terminate process ${proc.pid}: ${err}`);
+		}
+	}
+	trackedProcesses.clear();
+
+	// 2. Run registered cleanup functions
+	const promises: Promise<void>[] = [];
+	for (const fn of cleanupRegistry) {
+		try {
+			const result = fn();
+			if (result instanceof Promise) {
+				promises.push(result);
+			}
+		} catch (err) {
+			// Log sync errors but continue with other cleanup functions
+			promises.push(Promise.reject(err));
+		}
+	}
+
+	const results = await Promise.allSettled(promises);
+	for (const result of results) {
+		if (result.status === "rejected") {
+			logWarn(`Cleanup task failed: ${result.reason}`);
+		}
+	}
+	cleanupRegistry.clear();
+	isCleaningUp = false;
+}
+
+let isShuttingDown = false;
+let handlersRegistered = false;
+
+/**
+ * Setup process signal handlers for cleanup
+ */
+export function setupSignalHandlers(): void {
+	if (handlersRegistered) {
+		return;
+	}
+	handlersRegistered = true;
+
+	const signals: NodeJS.Signals[] = ["SIGINT", "SIGTERM"];
+
+	for (const signal of signals) {
+		process.on(signal, async () => {
+			// Prevent duplicate cleanup runs
+			if (isShuttingDown) {
+				process.stdout.write(`\nReceived ${signal}, cleanup already in progress...\n`);
+				return;
+			}
+			isShuttingDown = true;
+
+			// Use writeSync to avoid event loop issues during exit
+			process.stdout.write(`\nReceived ${signal}, cleaning up processes and files...\n`);
+
+			try {
+				await runCleanup();
+				process.exit(0);
+			} catch (error) {
+				process.stderr.write(`\nCleanup failed: ${error}\n`);
+				process.exit(1);
+			}
+		});
+	}
+
+	// Note: uncaughtException is handled in cli/src/index.ts for the main process
+	// This avoids duplicate handlers and ensures consistent error handling
+}
diff --git a/cli/src/utils/errors.ts b/cli/src/utils/errors.ts
new file mode 100644
index 00000000..d5aa81a6
--- /dev/null
+++ b/cli/src/utils/errors.ts
@@ -0,0 +1,131 @@
+/**
+ * Standardized error handling utilities for consistent error types across the codebase
+ */
+
+export class RalphyError extends Error {
+	public readonly code: string;
+	public readonly context?: Record<string, unknown>;
+
+	constructor(message: string, code = "RALPHY_ERROR", context?: Record<string, unknown>) {
+		super(message);
+		this.name = "RalphyError";
+		this.code = code;
+		this.context = context;
+
+		// Maintains proper stack trace for where our error was thrown (only available on V8)
+		if (Error.captureStackTrace) {
+			Error.captureStackTrace(this, RalphyError);
+		}
+	}
+}
+
+export class ValidationError extends RalphyError {
+	constructor(message: string, context?: Record<string, unknown>) {
+		super(message, "VALIDATION_ERROR", context);
+		this.name = "ValidationError";
+	}
+}
+
+export class TimeoutError extends RalphyError {
+	constructor(message: string, context?: Record<string, unknown>) {
+		super(message, "TIMEOUT_ERROR", context);
+		this.name = "TimeoutError";
+	}
+}
+
+export class LockError extends RalphyError {
+	constructor(message: string, context?: Record<string, unknown>) {
+		super(message, "LOCK_ERROR", context);
+		this.name = "LockError";
+	}
+}
+
+export class ProcessError extends RalphyError {
+	constructor(message: string, context?: Record<string, unknown>) {
+		super(message, "PROCESS_ERROR", context);
+		this.name = "ProcessError";
+	}
+}
+
+export class SandboxError extends RalphyError {
+	constructor(message: string, context?: Record<string, unknown>) {
+		super(message, "SANDBOX_ERROR", context);
+		this.name = "SandboxError";
+	}
+}
+
+/**
+ * Convert any error to a standardized format
+ */
+export function standardizeError(error: unknown): RalphyError {
+	if (error instanceof RalphyError) {
+		return error;
+	}
+
+	if (error instanceof Error) {
+		return new RalphyError(error.message, "UNKNOWN_ERROR", {
+			originalName: error.name,
+			originalStack: error.stack,
+		});
+	}
+
+	if (typeof error === "string") {
+		return new RalphyError(error, "STRING_ERROR");
+	}
+
+	return new RalphyError(String(error), "UNKNOWN_ERROR", { originalType: typeof error });
+}
+
+/**
+ * Check if an error is retryable
+ */
+export function isRetryableError(error: unknown): boolean {
+	const standardized = standardizeError(error);
+
+	const retryableCodes = ["TIMEOUT_ERROR", "LOCK_ERROR", "PROCESS_ERROR", "NETWORK_ERROR", "RATE_LIMIT_ERROR"];
+
+	const retryableMessages = [
+		"timeout",
+		"connection refused",
+		"network",
+		"rate limit",
+		"too many requests",
+		"temporary failure",
+		"try again",
+		"locked",
+		"conflict",
+		"connection error",
+		"unable to connect",
+		"internet connection",
+		"econnrefused",
+		"econnreset",
+		"socket hang up",
+		"fetch failed",
+	];
+
+	const message = standardized.message.toLowerCase();
+
+	// Check error code
+	if (retryableCodes.includes(standardized.code)) {
+		return true;
+	}
+
+	// Check error message
+	return retryableMessages.some((pattern) => message.includes(pattern));
+}
+
+/**
+ * Create error with context for logging
+ */
+export function createErrorWithContext(error: unknown, context: Record<string, unknown>): RalphyError {
+	const standardized = standardizeError(error);
+
+	if (standardized.context) {
+		return new RalphyError(standardized.message, standardized.code, {
+			...standardized.context,
+			...context,
+		});
+	}
+
+	return new RalphyError(standardized.message, standardized.code, context);
+}
diff --git a/cli/src/utils/file-indexer.ts b/cli/src/utils/file-indexer.ts
new file mode 100644
index 00000000..024dd3ce
--- /dev/null
+++ b/cli/src/utils/file-indexer.ts
@@ -0,0 +1,1025 @@
+/**
+ * File Indexer Module
+ *
+ * Provides semantic chunking for large codebases and file hash caching for unchanged files.
+ * This module indexes the codebase with file metadata (path, hash, size, mtime, keywords)
+ * and provides semantic search to find relevant files based on task keywords.
+ */
+
+import { createHash } from "node:crypto";
+import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs";
+import { join, relative } from "node:path";
+import { DEFAULT_IGNORE_PATTERNS, MAX_FILE_SIZE_FOR_HASH } from "../config/constants.ts";
+import { RALPHY_DIR } from "../config/loader.ts";
+import { logDebug } from "../ui/logger.ts";
+
+// Constants
+const FILE_INDEX_CACHE = "file-index.json";
+const MAX_KEYWORDS_PER_FILE = 20;
+const MAX_CONTENT_PREVIEW_LENGTH = 500;
+const RELEVANCE_THRESHOLD = 0.1;
+
+/**
+ * Maximum glob pattern length to prevent ReDoS attacks
+ */
+const MAX_GLOB_PATTERN_LENGTH = 1000;
+
+/**
+ * File metadata entry in the index
+ */
+export interface FileIndexEntry {
+	/** Relative path from workspace root */
+	path: string;
+	/** File content hash (sha256, first 16 chars) */
+	hash: string;
+	/** File size in bytes */
+	size: number;
+	/** Last modification time (ms since epoch) */
+	mtime: number;
+	/** Extracted keywords from path and content */
+	keywords: string[];
+	/** Content preview for semantic analysis */
+	preview?: string;
+	/** File extension */
+	extension: string;
+	/** Directory depth */
+	depth: number;
+}
+
+/**
+ * The complete file index for a workspace
+ */
+export interface FileIndex {
+	/** Version for cache invalidation */
+	version: number;
+	/** Timestamp of index creation */
+	timestamp: number;
+	/** Workspace root path */
+	workDir: string;
+	/** Map of relative paths to file entries */
+	files: Map<string, FileIndexEntry>;
+	/** Total files indexed */
+	totalFiles: number;
+	/** Total size of all indexed files */
+	totalSize: number;
+}
+
+/**
+ * Serialized version of FileIndex for JSON storage
+ */
+interface SerializedFileIndex {
+	version: number;
+	timestamp: number;
+	workDir: string;
+	files: Record<string, FileIndexEntry>;
+	totalFiles: number;
+	totalSize: number;
+}
+
+// In-memory cache of file indexes
+const indexCache = new Map<string, FileIndex>();
+
+// Track promises for workspaces being indexed to allow waiting
+const indexingPromises = new Map<string, Promise<FileIndex>>();
+
+/**
+ * Deep clone a FileIndex to return an immutable copy
+ * Prevents callers from modifying the shared cache
+ */
+function cloneFileIndex(index: FileIndex): FileIndex {
+	return {
+		version: index.version,
+		timestamp: index.timestamp,
+		workDir: index.workDir,
+		files: new Map(index.files),
+		totalFiles: index.totalFiles,
+		totalSize: index.totalSize,
+	};
+}
+
+/**
+ * Get the path to the file index cache
+ */
+function getIndexCachePath(workDir: string): string {
+	return join(workDir, RALPHY_DIR, FILE_INDEX_CACHE);
+}
+
+/**
+ * Check if a file should be ignored based on patterns
+ */
+function shouldIgnoreFile(filePath: string, ignorePatterns: string[]): boolean {
+	const normalizedPath = filePath.replace(/\\/g, "/");
+
+	for (const pattern of ignorePatterns) {
+		if (matchesGlob(normalizedPath, pattern)) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/**
+ * Convert glob pattern to regex
+ */
+function matchesGlob(filePath: string, pattern: string): boolean {
+	// Handle ** patterns properly
+	const regexPattern = globToRegex(pattern);
+	return regexPattern.test(filePath);
+}
+
+/**
+ * Convert glob pattern to regex
+ *
+ * SECURITY NOTE: This function includes protections against ReDoS attacks:
+ * - Input length is limited to MAX_GLOB_PATTERN_LENGTH
+ * - Uses non-backtracking patterns where possible
+ */
+function globToRegex(pattern: string): RegExp {
+	const safePattern =
+		pattern.length > MAX_GLOB_PATTERN_LENGTH
+			? pattern.slice(0, MAX_GLOB_PATTERN_LENGTH)
+			: pattern;
+
+	// Limit pattern length to prevent ReDoS attacks
+	if (safePattern.length < pattern.length) {
+		logDebug(`Glob pattern too long (${pattern.length} > ${MAX_GLOB_PATTERN_LENGTH}), truncating`);
+	}
+
+	// Escape special regex characters except * and ?
+	// Use a bounded approach to prevent catastrophic backtracking
+	let regex = safePattern
+		.replace(/[.+^${}()|[\]\\]/g, "\\$&")
+		.replace(/\*\*/g, "\0DOUBLESTAR\0") // Temporarily mark **
+		.replace(/\*/g, "[^/]*") // Single * matches anything except /
+		.replace(/\?/g, "[^/]"); // ? matches single char except /
+
+	// Handle ** (match any number of directories) using non-capturing group
+	// The (?:.*/)? pattern is bounded - it won't cause catastrophic backtracking
+	regex = regex.replace(/\0DOUBLESTAR\0/g, "(?:.*/)?");
+
+	// Handle directory separators
+	regex = regex.replace(/\//g, "[/\\\\]");
+
+	// Anchor to start
+	regex = `^${regex}`;
+
+	// Match at end if pattern doesn't end with /**
+	if (!safePattern.endsWith("/**")) {
+		regex += "$";
+	}
+
+	return new RegExp(regex, "i");
+}
+
+/**
+ * Extract keywords from a file path
+ */
+function extractPathKeywords(filePath: string): string[] {
+	const keywords = new Set<string>();
+
+	// Split path into components
+	const parts = filePath.split(/[/\\]/);
+
+	for (const part of parts) {
+		// Skip empty parts and common non-descriptive names
+		if (!part || part === "." || part === "..") continue;
+
+		// Extract words from camelCase, PascalCase, snake_case, kebab-case
+		const words = part
+			.replace(/\.[^.]+$/, "") // Remove extension
+			.split(/[_-]/) // Split by underscore and hyphen
+			.flatMap((word) => {
+				// Split camelCase/PascalCase
+				return word
+					.replace(/([a-z])([A-Z])/g, "$1 $2")
+					.split(/\s+/)
+					.filter((w) => w.length > 2);
+			});
+
+		for (const word of words) {
+			const lower = word.toLowerCase();
+			if (isSignificantKeyword(lower)) {
+				keywords.add(lower);
+			}
+		}
+
+		// Add the full filename (without extension) as a keyword
+		const nameWithoutExt = part.replace(/\.[^.]+$/, "").toLowerCase();
+		if (nameWithoutExt.length > 2 && !isCommonWord(nameWithoutExt)) {
+			keywords.add(nameWithoutExt);
+		}
+	}
+
+	// Add extension as keyword
+	const ext = filePath.split(".").pop()?.toLowerCase();
+	if (ext && ext !== filePath) {
+		keywords.add(ext);
+	}
+
+	return Array.from(keywords);
+}
+
+/**
+ * Extract keywords from file content
+ */
+function extractContentKeywords(content: string, maxKeywords = 10): string[] {
+	const keywords = new Set<string>();
+
+	// Extract function/class/variable names from code
+	const patterns = [
+		// Function declarations
+		/(?:function|def|fn|func)\s+(\w+)/g,
+		// Class declarations
+		/(?:class|interface|type|struct)\s+(\w+)/g,
+		// Variable declarations (const, let, var)
+		/(?:const|let|var)\s+(\w+)\s*[=:]/g,
+		// Export declarations
+		/export\s+(?:default\s+)?(?:class|function|const|let|var)?\s*(\w+)/g,
+		// Import statements - extract imported names
+		/import\s+{([^}]+)}/g,
+		// Python imports
+		/from\s+\S+\s+import\s+([^\n]+)/g,
+		// Go/Rust function signatures
+		/fn\s+(\w+)\s*\(/g,
+		// React components (PascalCase functions)
+		/const\s+([A-Z][a-zA-Z0-9]*)\s*[:=]/g,
+	];
+
+	for (const pattern of patterns) {
+		let match: RegExpExecArray | null = null;
+		// biome-ignore lint/suspicious/noAssignInExpressions: Standard regex loop pattern
+		while ((match = pattern.exec(content)) !== null) {
+			const names = match[1]
+				.split(/[,\s]+/)
+				.map((n) => n.trim())
+				.filter((n) => n.length > 2 && isSignificantKeyword(n.toLowerCase()));
+
+			for (const name of names) {
+				keywords.add(name.toLowerCase());
+			}
+		}
+	}
+
+	// Extract common words that appear frequently
+	const words = content.toLowerCase().match(/\b[a-z]{3,}\b/g) || [];
+
+	const wordFreq = new Map<string, number>();
+	for (const word of words) {
+		if (!isCommonWord(word) && isSignificantKeyword(word)) {
+			wordFreq.set(word, (wordFreq.get(word) || 0) + 1);
+		}
+	}
+
+	// Add most frequent words
+	const sortedWords = Array.from(wordFreq.entries())
+		.sort((a, b) => b[1] - a[1])
+		.slice(0, maxKeywords);
+
+	for (const [word] of sortedWords) {
+		keywords.add(word);
+	}
+
+	return Array.from(keywords).slice(0, maxKeywords);
+}
+
+/**
+ * Check if a word is a common/insignificant word
+ */
+function isCommonWord(word: string): boolean {
+	const commonWords = new Set([
+		"the",
+		"and",
+		"for",
+		"are",
+		"but",
+		"not",
+		"you",
+		"all",
+		"can",
+		"had",
+		"her",
+		"was",
+		"one",
+		"our",
+		"out",
+		"day",
+		"get",
+		"has",
+		"him",
+		"his",
+		"how",
+		"its",
+		"may",
+		"new",
+		"now",
+		"old",
+		"see",
+		"two",
+		"who",
+		"boy",
+		"did",
+		"she",
+		"use",
+		"way",
+		"many",
+		"oil",
+		"sit",
+		"set",
+		"run",
+		"eat",
+		"far",
+		"sea",
+		"eye",
+		"ago",
+		"off",
+		"too",
+		"any",
+		"say",
+		"man",
+		"try",
+		"ask",
+		"end",
+		"why",
+		"let",
+		"put",
+		"own",
+		"tell",
+		"very",
+		"when",
+		"come",
+		"here",
+		"just",
+		"like",
+		"long",
+		"make",
+		"over",
+		"such",
+		"take",
+		"than",
+		"them",
+		"well",
+		"were",
+		"will",
+		"with",
+		"have",
+		"from",
+		"they",
+		"know",
+		"want",
+		"been",
+		"good",
+		"much",
+		"some",
+		"time",
+		"this",
+		"that",
+		"would",
+		"there",
+		"their",
+		"what",
+		"said",
+		"each",
+		"which",
+		"about",
+		"could",
+		"other",
+		"after",
+		"first",
+		"never",
+		"these",
+		"think",
+		"where",
+		"being",
+		"every",
+		"great",
+		"might",
+		"shall",
+		"still",
+		"those",
+		"while",
+		"true",
+		"false",
+		"null",
+		"undefined",
+		"return",
+		"import",
+		"export",
+		"default",
+		"async",
+		"await",
+		"yield",
+		"throw",
+		"catch",
+		"finally",
+		"break",
+		"continue",
+		"switch",
+		"case",
+		"try",
+		"new",
+	]);
+	return commonWords.has(word.toLowerCase());
+}
+
+/**
+ * Check if a keyword is significant (not too short, not numeric)
+ */
+function isSignificantKeyword(word: string): boolean {
+	if (word.length < 3) return false;
+	if (/^\d+$/.test(word)) return false;
+	if (/^[0-9a-f]{8,}$/i.test(word)) return false; // Likely a hash
+	return true;
+}
+
+/**
+ * Extract keywords from a task description
+ */
+export function extractTaskKeywords(taskDescription: string): string[] {
+	const keywords = new Set<string>();
+
+	// Extract file paths mentioned in the task
+	const pathMatches = taskDescription.match(/[\w\-./\\]+\.[\w]+/g) || [];
+	for (const path of pathMatches) {
+		const pathKeywords = extractPathKeywords(path);
+		for (const kw of pathKeywords) {
+			keywords.add(kw);
+		}
+	}
+
+	// Extract camelCase/PascalCase words (likely identifiers)
+	const identifierMatches = taskDescription.match(/\b[a-z]+[A-Z][a-zA-Z0-9]*\b/g) || [];
+	for (const id of identifierMatches) {
+		const words = id
+			.replace(/([a-z])([A-Z])/g, "$1 $2")
+			.split(/\s+/)
+			.filter((w) => w.length > 2);
+		for (const word of words) {
+			keywords.add(word.toLowerCase());
+		}
+	}
+
+	// Extract technical terms and concepts
+	const techTerms = taskDescription.match(/\b[A-Z][a-z]+[A-Z][a-zA-Z]+\b/g) || [];
+	for (const term of techTerms) {
+		keywords.add(term.toLowerCase());
+	}
+
+	// Extract words that look like file names or components
+	const componentMatches =
+		taskDescription.match(
+			/\b[A-Z][a-zA-Z0-9]*(?:Component|Module|Service|Handler|Controller|Model|View|Util|Helper|Manager|Store|Context|Provider|Hook)\b/g,
+		) || [];
+	for (const comp of componentMatches) {
+		keywords.add(comp.toLowerCase());
+	}
+
+	// Extract all significant words
+	const allWords = taskDescription.toLowerCase().match(/\b[a-z]{3,}\b/g) || [];
+
+	for (const word of allWords) {
+		if (!isCommonWord(word) && isSignificantKeyword(word)) {
+			keywords.add(word);
+		}
+	}
+
+	return Array.from(keywords);
+}
+
+/**
+ * Calculate relevance score between task keywords and file entry
+ */
+function calculateRelevanceScore(taskKeywords: string[], fileEntry: FileIndexEntry): number {
+	let score = 0;
+	const fileKeywords = new Set(fileEntry.keywords);
+
+	for (const taskKw of taskKeywords) {
+		// Exact match in file keywords
+		if (fileKeywords.has(taskKw)) {
+			score += 1.0;
+			continue;
+		}
+
+		// Partial match (task keyword is substring of file keyword or vice versa)
+		for (const fileKw of fileKeywords) {
+			if (fileKw.includes(taskKw) || taskKw.includes(fileKw)) {
+				score += 0.5;
+				break;
+			}
+		}
+
+		// Check if keyword appears in path
+		if (fileEntry.path.toLowerCase().includes(taskKw)) {
+			score += 0.3;
+		}
+	}
+
+	// Normalize by number of task keywords
+	return taskKeywords.length > 0 ? score / taskKeywords.length : 0;
+}
+
+/**
+ * Create a file index entry for a single file
+ */
+function createFileIndexEntry(
+	filePath: string,
+	relPath: string,
+	maxSizeForContent = MAX_FILE_SIZE_FOR_HASH,
+): FileIndexEntry | null {
+	try {
+		const stat = statSync(filePath);
+
+		if (!stat.isFile()) return null;
+
+		// Calculate hash
+		let hash = "";
+		let preview = "";
+		let contentKeywords: string[] = [];
+
+		if (stat.size <= maxSizeForContent) {
+			try {
+				const content = readFileSync(filePath, "utf-8");
+				hash = createHash("sha256").update(content).digest("hex").slice(0, 16);
+				preview = content.slice(0, MAX_CONTENT_PREVIEW_LENGTH);
+				contentKeywords = extractContentKeywords(content, 10);
+			} catch {
+				// Binary or unreadable file - use mtime+size as pseudo-hash
+				hash = createHash("sha256").update(`${stat.mtimeMs}-${stat.size}`).digest("hex").slice(0, 16);
+			}
+		} else {
+			// Large file - use mtime+size as pseudo-hash
+			hash = createHash("sha256").update(`${stat.mtimeMs}-${stat.size}`).digest("hex").slice(0, 16);
+		}
+
+		// Extract path keywords
+		const pathKeywords = extractPathKeywords(relPath);
+
+		// Combine keywords
+		const allKeywords = [...new Set([...pathKeywords, ...contentKeywords])].slice(0, MAX_KEYWORDS_PER_FILE);
+
+		// Get extension
+		const ext = relPath.split(".").pop()?.toLowerCase() || "";
+
+		// Calculate depth
+		const depth = relPath.split(/[/\\]/).length - 1;
+
+		return {
+			path: relPath,
+			hash,
+			size: stat.size,
+			mtime: stat.mtimeMs,
+			keywords: allKeywords,
+			preview,
+			extension: ext,
+			depth,
+		};
+	} catch (error) {
+		logDebug(`Failed to index file ${filePath}: ${error}`);
+		return null;
+	}
+}
+
+/**
+ * Index all files in a directory recursively
+ *
+ * Thread-safe: Returns a cloned copy to prevent cache corruption.
+ * Concurrent calls for the same workspace will wait for a single indexing operation.
+ */
+export async function indexWorkspace(
+	workDir: string,
+	options: {
+		ignorePatterns?: string[];
+		forceRebuild?: boolean;
+		maxDepth?: number;
+	} = {},
+): Promise<FileIndex> {
+	const { ignorePatterns = DEFAULT_IGNORE_PATTERNS, forceRebuild = false, maxDepth = 50 } = options;
+
+	// Check memory cache first - return a clone to prevent mutation
+	const cached = indexCache.get(workDir);
+	if (!forceRebuild && cached) {
+		return cloneFileIndex(cached);
+	}
+
+	// Check if another operation is already indexing this workspace
+	const existingPromise = indexingPromises.get(workDir);
+	if (existingPromise) {
+		logDebug(`Waiting for concurrent indexing of ${workDir}...`);
+		const result = await existingPromise;
+		// Return a clone even from the concurrent operation's result
+		return cloneFileIndex(result);
+	}
+
+	// Create the indexing promise to lock this workspace
+	const indexingPromise = performIndexing(workDir, ignorePatterns, forceRebuild, maxDepth);
+	indexingPromises.set(workDir, indexingPromise);
+
+	try {
+		const result = await indexingPromise;
+		// Return a cloned copy to prevent cache corruption
+		return cloneFileIndex(result);
+	} finally {
+		// Always clean up the promise lock
+		indexingPromises.delete(workDir);
+	}
+}
+
+/**
+ * Perform the actual indexing operation
+ */
+async function performIndexing(
+	workDir: string,
+	ignorePatterns: string[],
+	forceRebuild: boolean,
+	maxDepth: number,
+): Promise<FileIndex> {
+	// Double-check cache after acquiring lock (another thread may have completed)
+	const cached = indexCache.get(workDir);
+	if (!forceRebuild && cached) {
+		return cached;
+	}
+
+	// Try to load from disk cache
+	if (!forceRebuild) {
+		const diskCache = loadIndexFromDisk(workDir);
+		if (diskCache) {
+			// Perform incremental update
+			const updated = await incrementalUpdateIndex(workDir, diskCache, ignorePatterns, maxDepth);
+			indexCache.set(workDir, updated);
+			saveIndexToDisk(workDir, updated);
+			return updated;
+		}
+	}
+
+	// Build fresh index
+	const index: FileIndex = {
+		version: 1,
+		timestamp: Date.now(),
+		workDir,
+		files: new Map(),
+		totalFiles: 0,
+		totalSize: 0,
+	};
+
+	// Ensure .ralphy directory exists
+	const ralphyDir = join(workDir, RALPHY_DIR);
+	if (!existsSync(ralphyDir)) {
+		mkdirSync(ralphyDir, { recursive: true });
+	}
+
+	// Collect all files
+	const filesToIndex: string[] = [];
+
+	function collectFiles(dir: string, currentDepth: number) {
+		if (currentDepth > maxDepth) return;
+
+		try {
+			const entries = readdirSync(dir, { withFileTypes: true });
+			for (const entry of entries) {
+				const fullPath = join(dir, entry.name);
+				const relPath = relative(workDir, fullPath);
+
+				if (shouldIgnoreFile(relPath, ignorePatterns)) {
+					continue;
+				}
+
+				if (entry.isDirectory()) {
+					collectFiles(fullPath, currentDepth + 1);
+				} else if (entry.isFile()) {
+					filesToIndex.push(fullPath);
+				}
+			}
+		} catch (error) {
+			logDebug(`Failed to read directory ${dir}: ${error}`);
+		}
+	}
+
+	collectFiles(workDir, 0);
+
+	// Index all collected files
+	for (const filePath of filesToIndex) {
+		const relPath = relative(workDir, filePath);
+		const entry = createFileIndexEntry(filePath, relPath);
+		if (entry) {
+			index.files.set(relPath, entry);
+			index.totalFiles++;
+			index.totalSize += entry.size;
+		}
+	}
+
+	// Cache and save
+	indexCache.set(workDir, index);
+	saveIndexToDisk(workDir, index);
+
+	logDebug(`Indexed ${index.totalFiles} files (${(index.totalSize / 1024 / 1024).toFixed(2)} MB)`);
+
+	return index;
+}
+
+/**
+ * Perform incremental update of file index
+ */
+async function incrementalUpdateIndex(
+	workDir: string,
+	existingIndex: FileIndex,
+	ignorePatterns: string[],
+	maxDepth: number,
+): Promise<FileIndex> {
+	const updatedIndex: FileIndex = {
+		version: existingIndex.version,
+		timestamp: Date.now(),
+		workDir,
+		files: new Map(existingIndex.files),
+		totalFiles: 0,
+		totalSize: 0,
+	};
+
+	const currentFiles = new Set<string>();
+	let reindexedCount = 0;
+	let unchangedCount = 0;
+	let removedCount = 0;
+
+	function scanDirectory(dir: string, currentDepth: number) {
+		if (currentDepth > maxDepth) return;
+
+		try {
+			const entries = readdirSync(dir, { withFileTypes: true });
+			for (const entry of entries) {
+				const fullPath = join(dir, entry.name);
+				const relPath = relative(workDir, fullPath);
+
+				if (shouldIgnoreFile(relPath, ignorePatterns)) {
+					continue;
+				}
+
+				if (entry.isDirectory()) {
+					scanDirectory(fullPath, currentDepth + 1);
+				} else if (entry.isFile()) {
+					currentFiles.add(relPath);
+
+					const existingEntry = updatedIndex.files.get(relPath);
+					const stat = statSync(fullPath);
+
+					if (existingEntry && existingEntry.mtime === stat.mtimeMs && existingEntry.size === stat.size) {
+						// File unchanged - keep existing entry
+						unchangedCount++;
+					} else {
+						// File changed or new - reindex
+						const newEntry = createFileIndexEntry(fullPath, relPath);
+						if (newEntry) {
+							updatedIndex.files.set(relPath, newEntry);
+							reindexedCount++;
+						}
+					}
+				}
+			}
+		} catch (error) {
+			logDebug(`Failed to scan directory ${dir}: ${error}`);
+		}
+	}
+
+	scanDirectory(workDir, 0);
+
+	// Remove deleted files from index
+	for (const [relPath] of updatedIndex.files) {
+		if (!currentFiles.has(relPath)) {
+			updatedIndex.files.delete(relPath);
+			removedCount++;
+		}
+	}
+
+	// Recalculate totals
+	for (const entry of updatedIndex.files.values()) {
+		updatedIndex.totalFiles++;
+		updatedIndex.totalSize += entry.size;
+	}
+
+	logDebug(
+		`Incremental index update: ${unchangedCount} unchanged, ${reindexedCount} reindexed, ${removedCount} removed`,
+	);
+
+	return updatedIndex;
+}
+
+/**
+ * Load index from disk cache
+ */
+function loadIndexFromDisk(workDir: string): FileIndex | null {
+	const cachePath = getIndexCachePath(workDir);
+
+	if (!existsSync(cachePath)) {
+		return null;
+	}
+
+	try {
+		const content = readFileSync(cachePath, "utf-8");
+		const serialized: SerializedFileIndex = JSON.parse(content);
+
+		return {
+			version: serialized.version,
+			timestamp: serialized.timestamp,
+			workDir: serialized.workDir,
+			files: new Map(Object.entries(serialized.files)),
+			totalFiles: serialized.totalFiles,
+			totalSize: serialized.totalSize,
+		};
+	} catch (error) {
+		logDebug(`Failed to load file index from disk: ${error}`);
+		return null;
+	}
+}
+
+/**
+ * Save index to disk cache
+ */
+function saveIndexToDisk(workDir: string, index: FileIndex): void {
+	const cachePath = getIndexCachePath(workDir);
+
+	try {
+		const serialized: SerializedFileIndex = {
+			version: index.version,
+			timestamp: index.timestamp,
+			workDir: index.workDir,
+			files: Object.fromEntries(index.files),
+			totalFiles: index.totalFiles,
+			totalSize: index.totalSize,
+		};
+
+		writeFileSync(cachePath, JSON.stringify(serialized, null, 2));
+	} catch (error) {
+		logDebug(`Failed to save file index to disk: ${error}`);
+	}
+}
+
+/**
+ * Get relevant files for a task based on semantic matching
+ */
+export async function getRelevantFilesForTask(
+	workDir: string,
+	taskDescription: string,
+	options: {
+		maxFiles?: number;
+		minRelevance?: number;
+		includeExtensions?: string[];
+		excludeExtensions?: string[];
+	} = {},
+): Promise<string[]> {
+	const {
+		maxFiles = 50,
+		minRelevance = RELEVANCE_THRESHOLD,
+		includeExtensions,
+		excludeExtensions = ["log", "lock", "map", "min.js", "min.css"],
+	} = options;
+
+	// Get or build file index
+	const index = await indexWorkspace(workDir);
+
+	// Extract keywords from task
+	const taskKeywords = extractTaskKeywords(taskDescription);
+	logDebug(`Task keywords: ${taskKeywords.join(", ")}`);
+
+	if (taskKeywords.length === 0) {
+		// No keywords extracted - return most recently modified files as fallback
+		return Array.from(index.files.values())
+			.sort((a, b) => b.mtime - a.mtime)
+			.slice(0, maxFiles)
+			.map((e) => e.path);
+	}
+
+	// Score all files
+	const scoredFiles: Array<{ path: string; score: number; entry: FileIndexEntry }> = [];
+
+	for (const [path, entry] of index.files) {
+		// Filter by extension
+		if (includeExtensions && !includeExtensions.includes(entry.extension)) {
+			continue;
+		}
+		if (excludeExtensions.includes(entry.extension)) {
+			continue;
+		}
+
+		const score = calculateRelevanceScore(taskKeywords, entry);
+		if (score >= minRelevance) {
+			scoredFiles.push({ path, score, entry });
+		}
+	}
+
+	// Sort by score (descending), then by mtime (most recent first for ties)
+	scoredFiles.sort((a, b) => {
+		if (b.score !== a.score) {
+			return b.score - a.score;
+		}
+		return b.entry.mtime - a.entry.mtime;
+	});
+
+	// Take top N files
+	const relevantFiles = scoredFiles.slice(0, maxFiles).map((s) => s.path);
+
+	logDebug(`Found ${relevantFiles.length} relevant files for task (scored ${scoredFiles.length} total)`);
+
+	return relevantFiles;
+}
+
+/**
+ * Get file hash from index (useful for caching unchanged files)
+ */
+export async function getFileHashFromIndex(workDir: string, relPath: string): Promise<string | null> {
+	const index = await indexWorkspace(workDir);
+	const entry = index.files.get(relPath);
+	return entry?.hash ?? null;
+}
+
+/**
+ * Check if a file has changed based on index
+ */
+export async function hasFileChanged(workDir: string, relPath: string, expectedHash: string): Promise<boolean> {
+	const currentHash = await getFileHashFromIndex(workDir, relPath);
+	if (currentHash === null) {
+		return true; // File not in index, assume changed
+	}
+	return currentHash !== expectedHash;
+}
+
+/**
+ * Get file metadata from index
+ */
+export async function getFileMetadata(workDir: string, relPath: string): Promise<FileIndexEntry | null> {
+	const index = await indexWorkspace(workDir);
+	return index.files.get(relPath) ?? null;
+}
+
+/**
+ * Clear the file index cache (both memory and disk)
+ */
+export function clearFileIndexCache(workDir: string): void {
+	indexCache.delete(workDir);
+	const cachePath = getIndexCachePath(workDir);
+	try {
+		if (existsSync(cachePath)) {
+			rmSync(cachePath);
+		}
+	} catch (error) {
+		logDebug(`Failed to clear file index cache: ${error}`);
+	}
+}
+
+/**
+ * Get index statistics
+ */
+export async function getIndexStats(workDir: string): Promise<{
+	totalFiles: number;
+	totalSize: number;
+	avgFileSize: number;
+	lastUpdated: number;
+}> {
+	const index = await indexWorkspace(workDir);
+	return {
+		totalFiles: index.totalFiles,
+		totalSize: index.totalSize,
+		avgFileSize: index.totalFiles > 0 ? index.totalSize / index.totalFiles : 0,
+		lastUpdated: index.timestamp,
+	};
+}
+
+/**
+ * Force rebuild the file index
+ */
+export async function rebuildFileIndex(workDir: string): Promise<FileIndex> {
+	clearFileIndexCache(workDir);
+	return indexWorkspace(workDir, { forceRebuild: true });
+}
+
+/**
+ * Find files by keyword (simple search)
+ */
+export async function findFilesByKeyword(
+	workDir: string,
+	keyword: string,
+	options: { maxResults?: number } = {},
+): Promise<FileIndexEntry[]> {
+	const { maxResults = 20 } = options;
+	const index = await indexWorkspace(workDir);
+	const results: FileIndexEntry[] = [];
+	const lowerKeyword = keyword.toLowerCase();
+
+	for (const entry of index.files.values()) {
+		// Check if keyword is in path
+		if (entry.path.toLowerCase().includes(lowerKeyword)) {
+			results.push(entry);
+			continue;
+		}
+
+		// Check if keyword is in keywords
+		if (entry.keywords.some((k) => k.includes(lowerKeyword) || lowerKeyword.includes(k))) {
+			results.push(entry);
+			continue;
+		}
+
+		// Check preview for code files
+		if (entry.preview?.toLowerCase().includes(lowerKeyword)) {
+			results.push(entry);
+		}
+	}
+
+	return results.slice(0, maxResults);
+}