diff --git a/cli/src/execution/parallel.ts b/cli/src/execution/parallel.ts index 5318088e..e7f9c31e 100644 --- a/cli/src/execution/parallel.ts +++ b/cli/src/execution/parallel.ts @@ -1,5 +1,5 @@ import { copyFileSync, cpSync, existsSync, mkdirSync } from "node:fs"; -import { join } from "node:path"; +import { basename, join } from "node:path"; import simpleGit from "simple-git"; import { PROGRESS_FILE, RALPHY_DIR } from "../config/loader.ts"; import { logTaskProgress } from "../config/writer.ts"; @@ -27,7 +27,15 @@ import { clearDeferredTask, recordDeferredTask } from "./deferred.ts"; import { buildParallelPrompt } from "./prompt.ts"; import { isRetryableError, withRetry } from "./retry.ts"; import { commitSandboxChanges } from "./sandbox-git.ts"; -import { cleanupSandbox, createSandbox, getModifiedFiles, getSandboxBase } from "./sandbox.ts"; +import { + cleanupSandbox, + createSandbox, + DEFAULT_IGNORED, + getModifiedFiles, + getSandboxBase, + isIgnored, + matchesPattern, +} from "./sandbox.ts"; import type { ExecutionOptions, ExecutionResult } from "./sequential.ts"; interface ParallelAgentResult { @@ -459,10 +467,47 @@ export async function runParallel( if (!failureReason && aiResult?.success && agentUsedSandbox && worktreeDir) { try { const modifiedFiles = await getModifiedFiles(worktreeDir, workDir); - if (modifiedFiles.length > 0) { + const filteredFiles = modifiedFiles.filter((f) => { + + if (f.trim() === "") { + return false; + } + + const normalized = f.replace(/\\/g, "/"); + + // Check against all default ignore patterns + for (const pattern of DEFAULT_IGNORED) { + if (pattern.endsWith("/")) { + const dir = pattern.slice(0, -1); + // Directory Patterns (e.g. ".ralphy/") + // Check for exact directory match or paths strictly inside it. + // We append "/" to the prefix check to ensure strict boundary matching, + // preventing false positives for lookalike directories (e.g. ".ralphy-config"). + if (normalized === dir || normalized.startsWith(dir + "/")) { + logDebug(`Agent ${agentNum}: Filtered infrastructure file: ${f}`); + return false; + } + } else { + // File/Glob Patterns (e.g. "nul", "*.log") + // We pass the basename to matchesPattern() to support recursive filtering. + // This ensures patterns like "*.log" match "src/debug.log" anywhere in the tree, + // mimicking standard gitignore behavior for patterns without slashes. + // Note: Per git docs, patterns WITHOUT a leading slash match at ANY level. + // Only patterns WITH a slash (e.g. "/nul") would be root-anchored. + const baseName = normalized.split("/").pop() || ""; + if (matchesPattern(baseName, pattern, false)) { + logDebug(`Agent ${agentNum}: Filtered ignored file: ${f}`); + return false; + } + } + } + + return true; + }); + if (filteredFiles.length > 0) { const commitResult = await commitSandboxChanges( workDir, - modifiedFiles, + filteredFiles, worktreeDir, task.title, agentNum, diff --git a/cli/src/execution/prompt.ts b/cli/src/execution/prompt.ts index a396c09d..186a10fa 100644 --- a/cli/src/execution/prompt.ts +++ b/cli/src/execution/prompt.ts @@ -181,8 +181,6 @@ export function buildParallelPrompt(options: ParallelPromptOptions): string { step++; } - instructions.push(`${step}. Update ${progressFile} with what you did`); - step++; if (allowCommit) { instructions.push(`${step}. Commit your changes with a descriptive message`); } else { diff --git a/cli/src/execution/sandbox-git.ts b/cli/src/execution/sandbox-git.ts index 45ceb1e9..5b5d8431 100644 --- a/cli/src/execution/sandbox-git.ts +++ b/cli/src/execution/sandbox-git.ts @@ -3,6 +3,7 @@ import { dirname, join } from "node:path"; import simpleGit, { type SimpleGit } from "simple-git"; import { slugify } from "../git/branch.ts"; import { logDebug } from "../ui/logger.ts"; +import { DEFAULT_IGNORED, matchesPattern } from "./sandbox.ts"; /** * Simple mutex to serialize git operations across sandbox agents. @@ -38,6 +39,44 @@ function generateUniqueId(): string { return `${timestamp}-${random}`; } +/** Safe git add: batches files (avoids ENAMETOOLONG) and filters ignored files */ +async function safeGitAdd(git: SimpleGit, files: string[], batchSize = 20): Promise { + if (!files?.length) return 0; + + // Filter out ignored files first + const ignoredSet = new Set(); + for (let i = 0; i < files.length; i += batchSize) { + try { + const result = await git.checkIgnore(files.slice(i, i + batchSize)); + result.forEach((f) => ignoredSet.add(f.replace(/\\/g, "/"))); + } catch { /* check-ignore exits 1 if nothing ignored */ } + } + const filtered = ignoredSet.size > 0 + ? files.filter((f) => !ignoredSet.has(f.replace(/\\/g, "/"))) + : files; + + if (!filtered.length) return 0; + + // Add in batches with retry for lock contention + for (let i = 0; i < filtered.length; i += batchSize) { + const batch = filtered.slice(i, i + batchSize); + for (let attempt = 1; attempt <= 3; attempt++) { + try { + await git.add(batch); + break; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if ((msg.includes("index.lock") || msg.includes("lock file")) && attempt < 3) { + await new Promise((r) => setTimeout(r, attempt * 500)); + continue; + } + throw err; + } + } + } + return filtered.length; +} + /** * Result of committing sandbox changes to a branch */ @@ -65,13 +104,9 @@ export async function commitSandboxChanges( agentNum: number, baseBranch: string, ): Promise { - if (modifiedFiles.length === 0) { - return { - success: true, - branchName: "", - filesCommitted: 0, - }; - } + // Note: We don't return early even if modifiedFiles is empty, + // because composer/npm may have installed files in symlinked directories + // that we need to detect via git status in Phase 2. const uniqueId = generateUniqueId(); const branchName = `ralphy/agent-${agentNum}-${uniqueId}-${slugify(taskName)}`; @@ -104,14 +139,81 @@ export async function commitSandboxChanges( } } - // Stage all modified files - await git.add(modifiedFiles); + // TWO-PHASE STAGING APPROACH: + // Phase 1: Stage the explicitly filtered files passed from parallel.ts + // These are source files the agent intentionally modified + if (modifiedFiles.length > 0) { + const filesToStage = modifiedFiles.filter((f) => { + const fullPath = join(originalDir, f); + return existsSync(fullPath); + }); + const staged = await safeGitAdd(git, filesToStage); + if (staged > 0) { + logDebug(`Agent ${agentNum}: Staged ${staged} filtered files`); + } + } + + // Phase 2: Find additional untracked/modified files via git status + // This catches files installed by composer/npm in symlinked directories + // that aren't detected by the sandbox file comparison + const status = await git.status(); + const additionalFiles: string[] = []; + + // Collect untracked and modified files, excluding infrastructure + // Uses same logic as parallel.ts for consistency + for (const file of [...status.not_added, ...status.modified, ...status.created]) { + const normalized = file.replace(/\\/g, "/"); + let isInfra = false; + + for (const pattern of DEFAULT_IGNORED) { + if (pattern.endsWith("/")) { + const dir = pattern.slice(0, -1); + if (normalized === dir || normalized.startsWith(dir + "/")) { + isInfra = true; + break; + } + } else { + const baseName = normalized.split("/").pop() || ""; + if (matchesPattern(baseName, pattern, false)) { + isInfra = true; + break; + } + } + } + + // Exclude files already staged in Phase 1? + // NO: We purposefully check EVERYTHING in Phase 2 as a safety net. + // If Phase 1 failed to stage a file (e.g. path issues), Phase 2 MUST catch it. + // Git handles duplicate 'add' operations gracefully (idempotent). + if (!isInfra) { + additionalFiles.push(file); + } + } + + if (additionalFiles.length > 0) { + const staged = await safeGitAdd(git, additionalFiles); + if (staged > 0) { + logDebug(`Agent ${agentNum}: Staged ${staged} additional files (composer/npm installs)`); + } + } + + // Check if we have anything to commit + const finalStatus = await git.status(); + if (finalStatus.staged.length === 0) { + logDebug(`Agent ${agentNum}: No changes to commit after staging`); + await git.checkout(currentBranch); + return { + success: true, + branchName, + filesCommitted: 0, + }; + } // Commit const commitMessage = `feat: ${taskName}\n\nAutomated commit by Ralphy agent ${agentNum}`; await git.commit(commitMessage); - logDebug(`Agent ${agentNum}: Committed ${modifiedFiles.length} files to ${branchName}`); + logDebug(`Agent ${agentNum}: Committed ${finalStatus.staged.length} files to ${branchName}`); // Return to original branch await git.checkout(currentBranch); @@ -119,7 +221,7 @@ export async function commitSandboxChanges( return { success: true, branchName, - filesCommitted: modifiedFiles.length, + filesCommitted: finalStatus.staged.length, }; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); diff --git a/cli/src/execution/sandbox.test.ts b/cli/src/execution/sandbox.test.ts new file mode 100644 index 00000000..a773da54 --- /dev/null +++ b/cli/src/execution/sandbox.test.ts @@ -0,0 +1,151 @@ +import { describe, expect, it } from "bun:test"; +import { matchesPattern, isIgnored } from "./sandbox.ts"; + +describe("matchesPattern", () => { + describe("directory match (pattern/)", () => { + it("should match directory when isDirectory is true or undefined", () => { + expect(matchesPattern("node_modules", "node_modules/")).toBe(true); + expect(matchesPattern("node_modules", "node_modules/", true)).toBe(true); + expect(matchesPattern(".ralphy", ".ralphy/", true)).toBe(true); + }); + + it("should NOT match file when isDirectory is false", () => { + expect(matchesPattern("node_modules", "node_modules/", false)).toBe(false); + expect(matchesPattern(".ralphy", ".ralphy/", false)).toBe(false); + }); + }); + + describe("exact match", () => { + it("should match exact filenames", () => { + expect(matchesPattern("node_modules", "node_modules")).toBe(true); + expect(matchesPattern(".ralphy", ".ralphy")).toBe(true); + }); + + it("should not match different filenames", () => { + expect(matchesPattern("node_modules", "vendor")).toBe(false); + }); + }); + + describe("suffix match (*.ext)", () => { + it("should match files ending with extension", () => { + expect(matchesPattern("debug.log", "*.log")).toBe(true); + expect(matchesPattern("error.log", "*.log")).toBe(true); + expect(matchesPattern("test.sqlite", "*.sqlite")).toBe(true); + }); + + it("should NOT match when dot is treated as regex wildcard", () => { + // This is the key test - "*.log" should NOT match "testXlog" + // because we use string comparison, not regex + expect(matchesPattern("testXlog", "*.log")).toBe(false); + expect(matchesPattern("test-log", "*.log")).toBe(false); + }); + + it("should not match files without the extension", () => { + expect(matchesPattern("logfile", "*.log")).toBe(false); + expect(matchesPattern("log", "*.log")).toBe(false); + }); + }); + + describe("prefix match (prefix*)", () => { + it("should match files starting with prefix", () => { + expect(matchesPattern("test123", "test*")).toBe(true); + expect(matchesPattern("test.js", "test*")).toBe(true); + }); + + it("should NOT match when dot is treated as regex wildcard", () => { + // "test.*" as prefix should use string comparison + expect(matchesPattern("testXjs", "test.")).toBe(false); + }); + + it("should not match files without the prefix", () => { + expect(matchesPattern("mytest", "test*")).toBe(false); + }); + }); + + describe("tree match (dir/**)", () => { + it("should match all files under directory", () => { + expect(matchesPattern("src/foo.js", "src/**")).toBe(true); + expect(matchesPattern("src/nested/bar.ts", "src/**")).toBe(true); + }); + + it("should not match files outside directory", () => { + expect(matchesPattern("lib/foo.js", "src/**")).toBe(false); + }); + + it("should NOT match directories with same prefix (boundary check)", () => { + expect(matchesPattern("srcXtra/foo.js", "src/**")).toBe(false); + expect(matchesPattern("src-folder/foo.js", "src/**")).toBe(false); + }); + }); + + describe("full path vs basename behavior", () => { + it("suffix (*) should match recursively (match full path)", () => { + expect(matchesPattern("path/to/debug.log", "*.log")).toBe(true); + }); + + it("exact match should NOT match recursively if full path is passed (strict internal logic)", () => { + expect(matchesPattern("path/to/nul", "nul")).toBe(false); + // In parallel.ts, we pass the basename ("nul") which DOES match, creating recursive behavior via the caller + expect(matchesPattern("nul", "nul")).toBe(true); + }); + + it("directory match should NOT match prefix-lookalikes", () => { + expect(matchesPattern(".ralphy-config", ".ralphy/", true)).toBe(false); + }); + }); + + describe("middle wildcard (test.*.js)", () => { + it("should match with proper regex escaping", () => { + expect(matchesPattern("test.foo.js", "test.*.js")).toBe(true); + expect(matchesPattern("test.bar.js", "test.*.js")).toBe(true); + }); + + it("should escape dots properly - not match when dot is wildcard", () => { + // "test.*.js" should NOT match "testXfooXjs" because dots are escaped + expect(matchesPattern("testXfooXjs", "test.*.js")).toBe(false); + }); + + it("should escape other metacharacters", () => { + // Pattern with special chars should work correctly + expect(matchesPattern("file[1].txt", "file[*].txt")).toBe(true); + expect(matchesPattern("file(test).log", "file(*).log")).toBe(true); + }); + }); +}); + +describe("isIgnored", () => { + it("should return true if any pattern matches", () => { + expect(isIgnored(".ralphy", [".ralphy-sandboxes", ".ralphy"])).toBe(true); + expect(isIgnored("debug.log", ["*.log", "*.sqlite"])).toBe(true); + }); + + it("should return false if no pattern matches", () => { + expect(isIgnored("src", [".ralphy", "node_modules"])).toBe(false); + }); + + it("should handle DEFAULT_IGNORED patterns correctly", () => { + const DEFAULT_IGNORED = [ + ".ralphy-sandboxes/", + ".ralphy-worktrees/", + ".ralphy/", + "nul", + ]; + + // Matches because strict=undefined allows directory patterns to match + // This simulates typical lenient behavior if type isn't known, or just directory check + expect(isIgnored(".ralphy", DEFAULT_IGNORED)).toBe(true); + + // Should explicitly match if isDirectory=true + expect(isIgnored(".ralphy", DEFAULT_IGNORED, true)).toBe(true); + + // Should NOT match if isDirectory=false + expect(isIgnored(".ralphy", DEFAULT_IGNORED, false)).toBe(false); + + // Removed *.log from ignore list, so these should return false + expect(isIgnored("debug.log", DEFAULT_IGNORED)).toBe(false); + + expect(isIgnored("nul", DEFAULT_IGNORED)).toBe(true); + expect(isIgnored("src", DEFAULT_IGNORED)).toBe(false); + expect(isIgnored("package.json", DEFAULT_IGNORED)).toBe(false); + }); +}); diff --git a/cli/src/execution/sandbox.ts b/cli/src/execution/sandbox.ts index 52c6d4a8..2177bf77 100644 --- a/cli/src/execution/sandbox.ts +++ b/cli/src/execution/sandbox.ts @@ -14,6 +14,61 @@ import { import { dirname, join, sep } from "node:path"; import { logDebug } from "../ui/logger.ts"; +/** + * Simple glob matcher to avoid adding heavy dependencies. + * Supports: + * - Suffix: "*.log" (matches "debug.log", "path/to/debug.log") + * - Prefix: "test*" (matches "test1", "test-file") + * - Tree: "node_modules/**" (matches "node_modules", "node_modules/file.js") - Checks strict directory boundary + * - Exact: "node_modules" (matches "node_modules") + * - Middle: "test.*.js" (matches "test.foo.js") - Uses regex escaping + * + * @internal Exported for testing + */ +export function matchesPattern(filename: string, pattern: string, isDirectory?: boolean): boolean { + // Exact match: "node_modules" matches "node_modules" + if (pattern === filename) return true; + + // Directory match: "node_modules/" matches "node_modules" if it is a directory + if (pattern.endsWith("/")) { + // If we know it's a file, it can't match a directory pattern + if (isDirectory === false) return false; + // Check if filename matches pattern without trailing slash + return filename === pattern.slice(0, -1); + } + + // Tree match: "dir/**" matches "dir/foo/bar.js" + if (pattern.endsWith("/**")) { + const dir = pattern.slice(0, -3); + return filename === dir || filename.startsWith(dir + "/"); + } + + // Suffix match: "*.log" matches "debug.log" (single wildcard at start only) + // Uses endsWith() string comparison, not regex - so "." is treated literally + if (pattern.startsWith("*") && !pattern.includes("*", 1)) return filename.endsWith(pattern.slice(1)); + + // Prefix match: "test*" matches "test123" (single wildcard at end only) + // Uses startsWith() string comparison, not regex - so "." is treated literally + if (pattern.endsWith("*") && !pattern.slice(0, -1).includes("*")) return filename.startsWith(pattern.slice(0, -1)); + + // Middle/complex wildcards: "test.*.js" matches "test.foo.js" + // Escape regex metacharacters, then convert * to .* + if (pattern.includes("*")) { + const escaped = pattern.replace(/[.+?^${}()|[\]\\]/g, '\\$&').replace(/\*/g, ".*"); + return new RegExp(`^${escaped}$`).test(filename); + } + + return false; +} + +/** + * Check if a file should be ignored based on a list of patterns. + * @internal Exported for testing + */ +export function isIgnored(item: string, patterns: string[], isDirectory?: boolean): boolean { + return patterns.some((p) => matchesPattern(item, p, isDirectory)); +} + /** * Default directories to symlink (read-only dependencies). * These are never modified by agents, so sharing them saves disk space. @@ -69,6 +124,17 @@ export const DEFAULT_COPY_PATTERNS = [ "pyproject.toml", ]; +/** + * Directories/files that should ALWAYS be ignored (neither copied nor symlinked). + * Agents don't need .ralphy/ - config is read by main runner, progress is tracked by main runner. + */ +export const DEFAULT_IGNORED = [ + ".ralphy-sandboxes/", + ".ralphy-worktrees/", + ".ralphy/", + "nul", +]; + export interface SandboxOptions { /** Original working directory */ originalDir: string; @@ -119,8 +185,19 @@ export async function createSandbox(options: SandboxOptions): Promise { + const itemPath = join(originalDir, item); + // We need to check if it's a directory to support trailing slash patterns + let isDir = false; + try { + const stat = lstatSync(itemPath); + isDir = stat.isDirectory(); + } catch { + // If stat fails, assume false (or skip) + } + return !isIgnored(item, DEFAULT_IGNORED, isDir); + }); // Track which items we've handled const handled = new Set(); @@ -153,7 +230,7 @@ export async function createSandbox(options: SandboxOptions): Promise ${target}`); } } else if (stat.isDirectory()) { - // Copy directory recursively, preserving timestamps for change detection - cpSync(originalPath, sandboxPath, { recursive: true, preserveTimestamps: true }); + // Copy directory recursively using smart copy + const stats = copyRecursive( + originalPath, + sandboxPathItem, + DEFAULT_IGNORED, + symlinkDirs, + agentNum, + ); + // Count top-level directory as 1 copy (ignoring internal file count) filesCopied++; + symlinksCreated += stats.symlinks; } else if (stat.isFile()) { // Copy file and preserve timestamps for change detection - copyFileSync(originalPath, sandboxPath); + copyFileSync(originalPath, sandboxPathItem); try { - utimesSync(sandboxPath, stat.atime, stat.mtime); + utimesSync(sandboxPathItem, stat.atime, stat.mtime); } catch (utimeErr) { logDebug(`Agent ${agentNum}: Failed to preserve timestamps for ${item}: ${utimeErr}`); } @@ -336,3 +421,98 @@ export function getSandboxBase(workDir: string): string { } return sandboxBase; } + +/** + * Recursively copy a directory: + * - Skips directories in 'ignoreNames' + * - Creates symlinks for directories in 'symlinkNames' (instead of recursing) + * - Copies everything else + */ +function copyRecursive( + src: string, + dest: string, + ignoreNames: string[], + symlinkNames: string[], + agentNum: number, +): { files: number; symlinks: number } { + let files = 0; + let symlinks = 0; + + if (!existsSync(src)) return { files, symlinks }; + + if (!existsSync(dest)) { + mkdirSync(dest, { recursive: true }); + } + + const items = readdirSync(src); + for (const item of items) { + const srcPath = join(src, item); + + let stat; + try { + stat = lstatSync(srcPath); + } catch { + continue; + } + + // Skip ignored items (pass isDirectory flag) + if (isIgnored(item, ignoreNames, stat.isDirectory())) { + continue; + } + + const destPath = join(dest, item); + + try { + if (stat.isDirectory()) { + // Symlink read-only dependency dirs (node_modules, vendor, etc.) even when nested. + // This is intentional for performance - agents don't modify dependencies, only source files. + // Sharing these across sandboxes avoids duplicating GBs of packages per agent. + if (symlinkNames.includes(item)) { + try { + // Create a junction/symlink to the SOURCE directory + symlinkSync(srcPath, destPath, "junction"); + logDebug(`Agent ${agentNum}: Symlinked nested dir ${item}`); + symlinks++; + } catch (symlinkErr) { + // Fallback: if symlink fails, try to copy responsibly + logDebug( + `Agent ${agentNum}: Failed to symlink nested ${item}, falling back to copy: ${symlinkErr}`, + ); + const subStats = copyRecursive( + srcPath, + destPath, + ignoreNames, + symlinkNames, + agentNum, + ); + files += subStats.files; + symlinks += subStats.symlinks; + } + } else { + // Normal directory: recurse + const subStats = copyRecursive( + srcPath, + destPath, + ignoreNames, + symlinkNames, + agentNum, + ); + files += subStats.files; + symlinks += subStats.symlinks; + } + } else if (stat.isFile()) { + copyFileSync(srcPath, destPath); + files++; + try { + utimesSync(destPath, stat.atime, stat.mtime); + } catch { + // Ignore timestamp errors + } + } + } catch (err) { + logDebug(`Agent ${agentNum}: Failed to copy ${item} in recursive copy: ${err}`); + } + } + + return { files, symlinks }; +}