From ad2e1acab8a13b3b31d9eb99ce472a7e2ec8b6a4 Mon Sep 17 00:00:00 2001 From: philipgraffshapiro Date: Thu, 11 Jun 2026 18:42:49 -0400 Subject: [PATCH 1/5] fix: guard unsafe lumen index roots --- cmd/ancestor.go | 5 + cmd/ancestor_test.go | 53 +++++++---- cmd/hook.go | 46 +++++++-- cmd/hook_test.go | 169 ++++++++++++++++++++++++++------- cmd/index.go | 6 +- cmd/purge.go | 7 +- cmd/resolve.go | 15 ++- cmd/resolve_test.go | 33 +++++++ cmd/stdio.go | 11 ++- cmd/stdio_test.go | 90 ++++++++++++------ internal/git/worktree_test.go | 4 + internal/index/index_test.go | 5 +- internal/merkle/ignore.go | 21 +++- internal/merkle/ignore_test.go | 22 +++-- internal/merkle/merkle_test.go | 4 + 15 files changed, 382 insertions(+), 109 deletions(-) diff --git a/cmd/ancestor.go b/cmd/ancestor.go index 678e46b3..46c0686d 100644 --- a/cmd/ancestor.go +++ b/cmd/ancestor.go @@ -49,3 +49,8 @@ func findAncestorIndex(path, model string) string { } return "" } + +func hasLumenBoundaryFile(path string) bool { + info, err := os.Stat(filepath.Join(path, ".lumenignore")) + return err == nil && !info.IsDir() +} diff --git a/cmd/ancestor_test.go b/cmd/ancestor_test.go index f202bce9..ddfa80fa 100644 --- a/cmd/ancestor_test.go +++ b/cmd/ancestor_test.go @@ -29,7 +29,7 @@ func TestFindAncestorIndex(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - got := findAncestorIndex("/some/deep/nonexistent/path", model) + got := findAncestorIndex(filepath.Join(t.TempDir(), "some", "deep", "path"), model) if got != "" { t.Fatalf("expected empty string, got %q", got) } @@ -39,8 +39,13 @@ func TestFindAncestorIndex(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - // Create a fake DB for /project. - parentDBPath := config.DBPathForProject("/project", model) + project := filepath.Join(t.TempDir(), "project") + child := filepath.Join(project, "scripts", "util") + if err := os.MkdirAll(child, 0o755); err != nil { + t.Fatal(err) + } + + parentDBPath := config.DBPathForProject(project, model) if err := os.MkdirAll(filepath.Dir(parentDBPath), 0o755); err != nil { t.Fatal(err) } @@ -48,9 +53,9 @@ func TestFindAncestorIndex(t *testing.T) { t.Fatal(err) } - got := findAncestorIndex("/project/scripts/util", model) - if got != "/project" { - t.Fatalf("expected /project, got %q", got) + got := findAncestorIndex(child, model) + if got != project { + t.Fatalf("expected %q, got %q", project, got) } }) @@ -58,8 +63,13 @@ func TestFindAncestorIndex(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - // Create a fake DB for /project. - parentDBPath := config.DBPathForProject("/project", model) + project := filepath.Join(t.TempDir(), "project") + child := filepath.Join(project, "testdata", "fixtures", "go") + if err := os.MkdirAll(child, 0o755); err != nil { + t.Fatal(err) + } + + parentDBPath := config.DBPathForProject(project, model) if err := os.MkdirAll(filepath.Dir(parentDBPath), 0o755); err != nil { t.Fatal(err) } @@ -67,9 +77,9 @@ func TestFindAncestorIndex(t *testing.T) { t.Fatal(err) } - // "testdata" is in merkle.SkipDirs — the parent index would never - // contain these files, so findAncestorIndex must return "". - got := findAncestorIndex("/project/testdata/fixtures/go", model) + // "testdata" is in merkle.SkipDirs; the parent index would never contain + // these files, so findAncestorIndex must return "". + got := findAncestorIndex(child, model) if got != "" { t.Fatalf("expected empty string (skip dir in route), got %q", got) } @@ -79,8 +89,8 @@ func TestFindAncestorIndex(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - // No DBs exist anywhere — should return "" without panic. - got := findAncestorIndex("/a/b/c/d/e", model) + // No DBs exist anywhere; should return "" without panic. + got := findAncestorIndex(filepath.Join(t.TempDir(), "a", "b", "c", "d", "e"), model) if got != "" { t.Fatalf("expected empty string, got %q", got) } @@ -119,8 +129,14 @@ func TestFindAncestorIndex(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - // Create fake DBs for both /project and /project/src. - for _, dir := range []string{"/project", "/project/src"} { + project := filepath.Join(t.TempDir(), "project") + src := filepath.Join(project, "src") + pkg := filepath.Join(src, "pkg") + if err := os.MkdirAll(pkg, 0o755); err != nil { + t.Fatal(err) + } + + for _, dir := range []string{project, src} { dbPath := config.DBPathForProject(dir, model) if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { t.Fatal(err) @@ -130,10 +146,9 @@ func TestFindAncestorIndex(t *testing.T) { } } - // Searching from /project/src/pkg should find /project/src (nearest). - got := findAncestorIndex("/project/src/pkg", model) - if got != "/project/src" { - t.Fatalf("expected /project/src (nearest ancestor), got %q", got) + got := findAncestorIndex(pkg, model) + if got != src { + t.Fatalf("expected nearest ancestor %q, got %q", src, got) } }) } diff --git a/cmd/hook.go b/cmd/hook.go index 1f54c519..80b193ea 100644 --- a/cmd/hook.go +++ b/cmd/hook.go @@ -26,6 +26,7 @@ import ( "github.com/ory/lumen/internal/config" "github.com/ory/lumen/internal/git" + "github.com/ory/lumen/internal/merkle" "github.com/ory/lumen/internal/store" ) @@ -132,18 +133,43 @@ func generateSessionContextInternalWithDirective(directive, cwd string, findDono emb := newEmbedder(cfg) modelName := emb.ModelName() dims := cfg.ServerDims(0) + allowBackgroundIndex := false // Normalize cwd to the git repository root so the DB path matches what - // `lumen index` and the MCP handler use. For non-git directories, walk - // up to reuse an existing ancestor index. + // `lumen index` and the MCP handler use. For non-git directories, reuse an + // existing ancestor index, but do not invent a new background crawl at a + // plain parent/root just because a host opened a session there. if root, err := git.RepoRoot(cwd); err == nil { - cwd = root - } else if ancestor := findAncestorIndex(cwd, modelName); ancestor != "" { - cwd = ancestor + if unindexable, _ := merkle.IsRootUnindexable(root); !unindexable { + cwd = root + allowBackgroundIndex = true + } else if !hasLumenBoundaryFile(cwd) { + if ancestor := findAncestorIndex(cwd, modelName); ancestor != "" { + cwd = ancestor + allowBackgroundIndex = true + } + } + } else if !hasLumenBoundaryFile(cwd) { + if ancestor := findAncestorIndex(cwd, modelName); ancestor != "" { + cwd = ancestor + allowBackgroundIndex = true + } + } else { + allowBackgroundIndex = true + } + if hasLumenBoundaryFile(cwd) { + allowBackgroundIndex = true + } + if unindexable, reason := merkle.IsRootUnindexable(cwd); unindexable { + return directive + " Index root blocked: " + reason + "." } dbPath := config.DBPathForProject(cwd, modelName) if _, err := os.Stat(dbPath); err != nil { + if !allowBackgroundIndex { + return directive + " No index yet — auto-created on first semantic_search call." + } + // No index yet — kick off background pre-warming so the first search // in this session doesn't pay the full seed + embed cost synchronously. bgIndexer(cwd) @@ -162,10 +188,12 @@ func generateSessionContextInternalWithDirective(directive, cwd string, findDono // Spawn background indexer if the index is stale or has never been // successfully completed. This avoids spawning on every session start // when the index was recently updated. - if val, metaErr := s.GetMeta("last_indexed_at"); metaErr != nil || val == "" { - bgIndexer(cwd) - } else if t, parseErr := time.Parse(time.RFC3339, val); parseErr != nil || time.Since(t) > backgroundIndexStaleness { - bgIndexer(cwd) + if allowBackgroundIndex { + if val, metaErr := s.GetMeta("last_indexed_at"); metaErr != nil || val == "" { + bgIndexer(cwd) + } else if t, parseErr := time.Parse(time.RFC3339, val); parseErr != nil || time.Since(t) > backgroundIndexStaleness { + bgIndexer(cwd) + } } stats, err := s.Stats() diff --git a/cmd/hook_test.go b/cmd/hook_test.go index d0a6be0b..cd9ff538 100644 --- a/cmd/hook_test.go +++ b/cmd/hook_test.go @@ -221,32 +221,111 @@ func TestPreToolUseOutputJSON(t *testing.T) { } } -func TestGenerateSessionContextInternal_SpawnsWhenNoDB(t *testing.T) { - // No DB exists → bgIndexer must be called regardless of donor presence. +func TestGenerateSessionContextInternal_DoesNotSpawnForPlainRootWhenNoDB(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) + t.Setenv("XDG_CONFIG_HOME", tmpDir) + + plainDir := filepath.Join(t.TempDir(), "plain-root") + if err := os.MkdirAll(plainDir, 0o755); err != nil { + t.Fatal(err) + } - t.Run("with donor", func(t *testing.T) { - var bgCwd string - generateSessionContextInternal("/my/worktree", - func(_, _ string) string { return "/some/donor.db" }, - func(cwd string) { bgCwd = cwd }, - ) - if bgCwd != "/my/worktree" { - t.Fatalf("expected bgIndexer called with /my/worktree, got %q", bgCwd) - } - }) - - t.Run("without donor", func(t *testing.T) { - var bgCwd string - generateSessionContextInternal("/my/worktree", - func(_, _ string) string { return "" }, - func(cwd string) { bgCwd = cwd }, - ) - if bgCwd != "/my/worktree" { - t.Fatalf("expected bgIndexer called even without donor, got %q", bgCwd) - } - }) + var bgCwd string + result := generateSessionContextInternal(plainDir, + func(_, _ string) string { return "/some/donor.db" }, + func(cwd string) { bgCwd = cwd }, + ) + if bgCwd != "" { + t.Fatalf("bgIndexer must not be called for a plain fresh root, got %q", bgCwd) + } + if !strings.Contains(result, "auto-created on first semantic_search call") { + t.Fatalf("expected first-search message for plain fresh root, got: %s", result) + } +} + +func TestGenerateSessionContextInternal_SpawnsAtBoundaryWhenNoDB(t *testing.T) { + tmpDir := t.TempDir() + t.Setenv("XDG_DATA_HOME", tmpDir) + t.Setenv("XDG_CONFIG_HOME", tmpDir) + + boundaryDir := filepath.Join(t.TempDir(), "bounded-root") + if err := os.MkdirAll(boundaryDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(boundaryDir, ".lumenignore"), []byte("vendor/\n"), 0o644); err != nil { + t.Fatal(err) + } + + var bgCwd string + result := generateSessionContextInternal(boundaryDir, + func(_, _ string) string { return "/some/donor.db" }, + func(cwd string) { bgCwd = cwd }, + ) + if bgCwd != boundaryDir { + t.Fatalf("expected bgIndexer called with boundary root %q, got %q", boundaryDir, bgCwd) + } + if !strings.Contains(result, "indexing in background") { + t.Fatalf("expected background message for bounded fresh root, got: %s", result) + } +} + +func TestGenerateSessionContextInternal_SpawnsAtGitRootWhenNoDB(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not on PATH") + } + + tmpDir := t.TempDir() + t.Setenv("XDG_DATA_HOME", tmpDir) + t.Setenv("XDG_CONFIG_HOME", tmpDir) + + repoDir := filepath.Join(t.TempDir(), "repo") + subDir := filepath.Join(repoDir, "sub", "deep") + if err := os.MkdirAll(subDir, 0o755); err != nil { + t.Fatal(err) + } + runGit(t, repoDir, "init") + + resolvedRepo, err := filepath.EvalSymlinks(repoDir) + if err != nil { + t.Fatal(err) + } + resolvedSub := filepath.Join(resolvedRepo, "sub", "deep") + + var bgCwd string + result := generateSessionContextInternal(resolvedSub, + func(_, _ string) string { return "" }, + func(cwd string) { bgCwd = cwd }, + ) + if bgCwd != resolvedRepo { + t.Fatalf("expected bgIndexer called with git root %q, got %q", resolvedRepo, bgCwd) + } + if !strings.Contains(result, "indexing in background") { + t.Fatalf("expected background message for fresh git root, got: %s", result) + } +} + +func TestGenerateSessionContextInternal_BlocksUnindexableRoot(t *testing.T) { + tmpDir := t.TempDir() + t.Setenv("XDG_DATA_HOME", tmpDir) + t.Setenv("XDG_CONFIG_HOME", tmpDir) + + home, err := os.UserHomeDir() + if err != nil { + t.Fatal(err) + } + + called := false + result := generateSessionContextInternal(home, + func(_, _ string) string { return "" }, + func(_ string) { called = true }, + ) + if called { + t.Fatal("bgIndexer must not be called for an unindexable root") + } + if !strings.Contains(result, "Index root blocked: user home directory.") { + t.Fatalf("expected blocked-root message, got: %s", result) + } } func TestGenerateSessionContextInternal_NoSpawnWhenFresh(t *testing.T) { @@ -275,54 +354,78 @@ func TestGenerateSessionContextInternal_NoSpawnWhenFresh(t *testing.T) { } } -func TestGenerateSessionContextInternal_SpawnsWhenStale(t *testing.T) { +func TestGenerateSessionContextInternal_SpawnsWhenBoundedIndexIsStale(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) + t.Setenv("XDG_CONFIG_HOME", tmpDir) + + boundaryDir := filepath.Join(t.TempDir(), "bounded-root") + if err := os.MkdirAll(boundaryDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(boundaryDir, ".lumenignore"), []byte("vendor/\n"), 0o644); err != nil { + t.Fatal(err) + } cfg, err := config.NewConfigService("") if err != nil { t.Fatalf("NewConfigService: %v", err) } emb := newEmbedder(cfg) - dbPath := config.DBPathForProject("/myproject", emb.ModelName()) + dbPath := config.DBPathForProject(boundaryDir, emb.ModelName()) if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { t.Fatal(err) } writeHookTestDB(t, dbPath, time.Now().Add(-10*time.Minute)) called := false - generateSessionContextInternal("/myproject", + generateSessionContextInternal(boundaryDir, func(_, _ string) string { return "" }, func(_ string) { called = true }, ) if !called { - t.Fatal("bgIndexer must be called when index is stale") + t.Fatal("bgIndexer must be called when a bounded index is stale") } } func TestGenerateSessionContextInternal_MessageWithDonor(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) + t.Setenv("XDG_CONFIG_HOME", tmpDir) + + boundaryDir := filepath.Join(t.TempDir(), "bounded-root") + if err := os.MkdirAll(boundaryDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(boundaryDir, ".lumenignore"), []byte("vendor/\n"), 0o644); err != nil { + t.Fatal(err) + } - result := generateSessionContextInternal("/my/worktree", + result := generateSessionContextInternal(boundaryDir, func(_, _ string) string { return "/some/donor.db" }, func(_ string) {}, ) - if !strings.Contains(result, "background") { - t.Errorf("expected 'background' in context when donor found, got: %s", result) + if !strings.Contains(result, "Sibling worktree index found") { + t.Errorf("expected donor background message, got: %s", result) } } func TestGenerateSessionContextInternal_MessageWithoutDonor(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) + t.Setenv("XDG_CONFIG_HOME", tmpDir) + + plainDir := filepath.Join(t.TempDir(), "plain-root") + if err := os.MkdirAll(plainDir, 0o755); err != nil { + t.Fatal(err) + } - result := generateSessionContextInternal("/my/worktree", + result := generateSessionContextInternal(plainDir, func(_, _ string) string { return "" }, func(_ string) {}, ) - if !strings.Contains(result, "background") { - t.Errorf("expected 'background' in context when no donor, got: %s", result) + if !strings.Contains(result, "auto-created on first semantic_search call") { + t.Errorf("expected first-search message when no donor, got: %s", result) } } diff --git a/cmd/index.go b/cmd/index.go index 334b64fa..7aad115e 100644 --- a/cmd/index.go +++ b/cmd/index.go @@ -83,8 +83,10 @@ func runIndex(cmd *cobra.Command, args []string) error { // For non-git directories, walk up to reuse an existing ancestor index. if root, err := git.RepoRoot(projectPath); err == nil { projectPath = root - } else if ancestor := findAncestorIndex(projectPath, modelName); ancestor != "" { - projectPath = ancestor + } else if !hasLumenBoundaryFile(projectPath) { + if ancestor := findAncestorIndex(projectPath, modelName); ancestor != "" { + projectPath = ancestor + } } // Re-check after normalization: git.RepoRoot can resolve upward to an diff --git a/cmd/purge.go b/cmd/purge.go index 37ec3cc8..72dd36e3 100644 --- a/cmd/purge.go +++ b/cmd/purge.go @@ -23,6 +23,7 @@ import ( "github.com/ory/lumen/internal/config" "github.com/ory/lumen/internal/git" + "github.com/ory/lumen/internal/merkle" "github.com/ory/lumen/internal/store" "github.com/spf13/cobra" ) @@ -146,8 +147,10 @@ func purgeOneTarget(stderr io.Writer, indexMap map[string][]string, seen map[str target := abs inGitRepo := false if root, err := git.RepoRoot(abs); err == nil { - target = root - inGitRepo = true + if unindexable, _ := merkle.IsRootUnindexable(root); !unindexable { + target = root + inGitRepo = true + } } match := "" diff --git a/cmd/resolve.go b/cmd/resolve.go index cd2e3505..8f0ff587 100644 --- a/cmd/resolve.go +++ b/cmd/resolve.go @@ -21,6 +21,7 @@ import ( "github.com/ory/lumen/internal/config" "github.com/ory/lumen/internal/git" + "github.com/ory/lumen/internal/merkle" ) // resolveIndexRoot determines the index root and search path for a given @@ -64,9 +65,17 @@ func resolveIndexRoot(pathFlag, cwdFlag, model string) (indexRoot, searchPath st // ancestor index walk. indexRoot = searchPath if root, gitErr := git.RepoRoot(searchPath); gitErr == nil { - indexRoot = root - } else if ancestor := findAncestorIndex(searchPath, model); ancestor != "" { - indexRoot = ancestor + if unindexable, _ := merkle.IsRootUnindexable(root); !unindexable { + indexRoot = root + } else if !hasLumenBoundaryFile(searchPath) { + if ancestor := findAncestorIndex(searchPath, model); ancestor != "" { + indexRoot = ancestor + } + } + } else if !hasLumenBoundaryFile(searchPath) { + if ancestor := findAncestorIndex(searchPath, model); ancestor != "" { + indexRoot = ancestor + } } // When cwd is provided and the search path resolved to itself (no git root, diff --git a/cmd/resolve_test.go b/cmd/resolve_test.go index c942d49b..27f0a837 100644 --- a/cmd/resolve_test.go +++ b/cmd/resolve_test.go @@ -194,6 +194,39 @@ func TestResolveIndexRoot(t *testing.T) { t.Fatalf("expected searchPath=%q, got %q", subDir, searchPath) } }) + + t.Run("lumenignore boundary does not adopt ancestor index", func(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + parentDir := filepath.Join(tmp, "workspace") + subDir := filepath.Join(parentDir, "dashboard") + if err := os.MkdirAll(subDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(subDir, ".lumenignore"), []byte("vendor/\n"), 0o644); err != nil { + t.Fatal(err) + } + + dbPath := config.DBPathForProject(parentDir, model) + if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(dbPath, []byte{}, 0o644); err != nil { + t.Fatal(err) + } + + indexRoot, searchPath, err := resolveIndexRoot(subDir, "", model) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if indexRoot != subDir { + t.Fatalf("expected indexRoot=%q (.lumenignore boundary), got %q", subDir, indexRoot) + } + if searchPath != subDir { + t.Fatalf("expected searchPath=%q, got %q", subDir, searchPath) + } + }) } func runGit(t *testing.T, dir string, args ...string) { diff --git a/cmd/stdio.go b/cmd/stdio.go index 551840d5..bb0b6ae5 100644 --- a/cmd/stdio.go +++ b/cmd/stdio.go @@ -336,7 +336,8 @@ func (ic *indexerCache) findEffectiveRoot(path string, model ...string) string { } } - if !pathCrossesSkipDir(candidate, path) { + unindexable, _ := merkle.IsRootUnindexable(candidate) + if !pathCrossesSkipDir(candidate, path) && !unindexable { if _, ok := ic.cacheGet(candidate, modelName); ok { return candidate } @@ -360,7 +361,9 @@ func (ic *indexerCache) findEffectiveRoot(path string, model ...string) string { // search path itself instead, so test fixtures and similar directories get // their own scoped index. if gitErr == nil && !pathCrossesSkipDir(gitRoot, path) { - return gitRoot + if unindexable, _ := merkle.IsRootUnindexable(gitRoot); !unindexable { + return gitRoot + } } return path } @@ -464,6 +467,10 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string, mo effectiveRoot = ic.findEffectiveRoot(projectPath, modelName) } + if unindexable, reason := merkle.IsRootUnindexable(effectiveRoot); unindexable { + return nil, "", "", fmt.Errorf("refusing to index %s: %s", effectiveRoot, reason) + } + // If a parent index is already cached, alias and return. // Guard: only reuse the cached entry if it is the actual owner of effectiveRoot // (entry.effectiveRoot == effectiveRoot). If the cache holds a guest-alias entry diff --git a/cmd/stdio_test.go b/cmd/stdio_test.go index 2b313ff9..b6a11882 100644 --- a/cmd/stdio_test.go +++ b/cmd/stdio_test.go @@ -49,7 +49,7 @@ var ( // is set, it writes got to the golden file instead. func assertGolden(t *testing.T, goldenPath, got string) { t.Helper() - got = strings.TrimRight(got, "\n") + got = strings.TrimRight(got, "\r\n") if *updateGolden { if err := os.WriteFile(goldenPath, []byte(got+"\n"), 0o644); err != nil { t.Fatalf("update golden: %v", err) @@ -60,7 +60,9 @@ func assertGolden(t *testing.T, goldenPath, got string) { if err != nil { t.Fatalf("read golden file: %v", err) } - want := strings.TrimRight(string(golden), "\n") + want := strings.TrimRight(string(golden), "\r\n") + want = strings.ReplaceAll(want, "\r\n", "\n") + got = strings.ReplaceAll(got, "\r\n", "\n") if got != want { t.Fatalf("output does not match golden file %s (run with -update-golden to refresh).\n\nGOT:\n%s\n\nWANT:\n%s", goldenPath, got, want) } @@ -142,24 +144,36 @@ func TestIndexerCache_FindEffectiveRoot(t *testing.T) { const model = "test-model" t.Run("returns path when no parent exists", func(t *testing.T) { + project := filepath.Join(t.TempDir(), "project") + pkg := filepath.Join(project, "src", "pkg") + if err := os.MkdirAll(pkg, 0o755); err != nil { + t.Fatal(err) + } + ic := &indexerCache{ cache: make(map[string]cacheEntry), embedder: &stubEmbedder{model: model}, } - root := ic.findEffectiveRoot("/project/src/pkg") - if root != "/project/src/pkg" { - t.Fatalf("expected original path, got %s", root) + root := ic.findEffectiveRoot(pkg) + if root != pkg { + t.Fatalf("expected original path %q, got %q", pkg, root) } }) t.Run("returns cached parent", func(t *testing.T) { + project := filepath.Join(t.TempDir(), "project") + pkg := filepath.Join(project, "src", "pkg") + if err := os.MkdirAll(pkg, 0o755); err != nil { + t.Fatal(err) + } + ic := &indexerCache{ - cache: map[string]cacheEntry{"/project": {idx: nil, effectiveRoot: "/project"}}, + cache: map[string]cacheEntry{project: {idx: nil, effectiveRoot: project}}, embedder: &stubEmbedder{model: model}, } - root := ic.findEffectiveRoot("/project/src/pkg") - if root != "/project" { - t.Fatalf("expected /project (cached parent), got %s", root) + root := ic.findEffectiveRoot(pkg) + if root != project { + t.Fatalf("expected cached parent %q, got %q", project, root) } }) @@ -167,8 +181,13 @@ func TestIndexerCache_FindEffectiveRoot(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - // Create the DB file that would exist for /project with our model. - parentDBPath := config.DBPathForProject("/project", model) + project := filepath.Join(t.TempDir(), "project") + pkg := filepath.Join(project, "src", "pkg") + if err := os.MkdirAll(pkg, 0o755); err != nil { + t.Fatal(err) + } + + parentDBPath := config.DBPathForProject(project, model) if err := os.MkdirAll(filepath.Dir(parentDBPath), 0o755); err != nil { t.Fatal(err) } @@ -180,9 +199,9 @@ func TestIndexerCache_FindEffectiveRoot(t *testing.T) { cache: make(map[string]cacheEntry), embedder: &stubEmbedder{model: model}, } - root := ic.findEffectiveRoot("/project/src/pkg") - if root != "/project" { - t.Fatalf("expected /project (db on disk), got %s", root) + root := ic.findEffectiveRoot(pkg) + if root != project { + t.Fatalf("expected parent with db %q, got %q", project, root) } }) @@ -190,8 +209,13 @@ func TestIndexerCache_FindEffectiveRoot(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_DATA_HOME", tmpDir) - // Simulate a parent index at /project. - parentDBPath := config.DBPathForProject("/project", model) + project := filepath.Join(t.TempDir(), "project") + skippedPath := filepath.Join(project, "testdata", "fixtures", "go") + if err := os.MkdirAll(skippedPath, 0o755); err != nil { + t.Fatal(err) + } + + parentDBPath := config.DBPathForProject(project, model) if err := os.MkdirAll(filepath.Dir(parentDBPath), 0o755); err != nil { t.Fatal(err) } @@ -205,9 +229,9 @@ func TestIndexerCache_FindEffectiveRoot(t *testing.T) { } // "testdata" is in merkle.SkipDirs — the parent index would never // contain these files, so findEffectiveRoot must return the path itself. - root := ic.findEffectiveRoot("/project/testdata/fixtures/go") - if root != "/project/testdata/fixtures/go" { - t.Fatalf("expected original path (skip dir in route), got %s", root) + root := ic.findEffectiveRoot(skippedPath) + if root != skippedPath { + t.Fatalf("expected original path through skip dir %q, got %q", skippedPath, root) } }) } @@ -509,10 +533,11 @@ func TestIndexerCache_GetOrCreate_WorktreePathIgnoresPreferredRoot(t *testing.T) } // cwd=parentRepo is passed as preferredRoot (the outer monorepo). - _, effectiveRoot, _, err := ic.getOrCreate(worktreePath, parentRepo) + idx, effectiveRoot, _, err := ic.getOrCreate(worktreePath, parentRepo) if err != nil { t.Fatalf("getOrCreate: %v", err) } + t.Cleanup(func() { _ = idx.Close() }) // When path is a git worktree, the effective root must be the worktree // path, not the outer repo. Using the parent causes the entire monorepo to @@ -587,6 +612,16 @@ func TestIndexerCache_GetOrCreate_PreferredRoot(t *testing.T) { } func TestValidateSearchInput_CwdPathInteraction(t *testing.T) { + root := filepath.Join(t.TempDir(), "project") + src := filepath.Join(root, "src") + other := filepath.Join(t.TempDir(), "other") + if err := os.MkdirAll(src, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(other, 0o755); err != nil { + t.Fatal(err) + } + tests := []struct { name string input SemanticSearchInput @@ -595,22 +630,22 @@ func TestValidateSearchInput_CwdPathInteraction(t *testing.T) { }{ { name: "cwd only — path defaults to cwd", - input: SemanticSearchInput{Cwd: "/project", Query: "test"}, - wantPath: "/project", + input: SemanticSearchInput{Cwd: root, Query: "test"}, + wantPath: root, }, { name: "path only — works as before", - input: SemanticSearchInput{Path: "/project/src", Query: "test"}, - wantPath: "/project/src", + input: SemanticSearchInput{Path: src, Query: "test"}, + wantPath: src, }, { name: "both valid — path under cwd", - input: SemanticSearchInput{Cwd: "/project", Path: "/project/src", Query: "test"}, - wantPath: "/project/src", + input: SemanticSearchInput{Cwd: root, Path: src, Query: "test"}, + wantPath: src, }, { name: "both invalid — path outside cwd", - input: SemanticSearchInput{Cwd: "/project", Path: "/other", Query: "test"}, + input: SemanticSearchInput{Cwd: root, Path: other, Query: "test"}, wantErr: "path must be equal to or under cwd", }, { @@ -1108,6 +1143,7 @@ func TestEnsureIndexed_SkipsWhenLockHeld(t *testing.T) { if err != nil { t.Fatalf("getOrCreate: %v", err) } + t.Cleanup(func() { _ = idx.Close() }) dbPath := config.DBPathForProject(effectiveRoot, ic.embedder.ModelName()) lockPath := indexlock.LockPathForDB(dbPath) diff --git a/internal/git/worktree_test.go b/internal/git/worktree_test.go index 3e7c50db..48afa2a2 100644 --- a/internal/git/worktree_test.go +++ b/internal/git/worktree_test.go @@ -160,6 +160,7 @@ func TestInternalWorktreePaths_NotARepo(t *testing.T) { t.Skip("git not on PATH") } dir := t.TempDir() + t.Setenv("GIT_CEILING_DIRECTORIES", filepath.Dir(dir)) paths := InternalWorktreePaths(dir) if len(paths) != 0 { t.Errorf("expected nil for non-repo, got %v", paths) @@ -172,6 +173,9 @@ func TestListWorktrees_NotARepo(t *testing.T) { } dir := t.TempDir() + + t.Setenv("GIT_CEILING_DIRECTORIES", filepath.Dir(dir)) + paths, err := ListWorktrees(dir) if err == nil { t.Fatalf("expected error, got paths: %v", paths) diff --git a/internal/index/index_test.go b/internal/index/index_test.go index b2b34dde..1b53e88a 100644 --- a/internal/index/index_test.go +++ b/internal/index/index_test.go @@ -21,6 +21,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "testing" "time" @@ -619,7 +620,6 @@ func TestIndexer_StaleUnsupportedExtensionNotCountedAsRemoved(t *testing.T) { // The test passing without error means the ghost record was not propagated. } - // TestIndexer_StaleUnsupportedExtensionDeletedFromDB verifies that after a // reindex, stale file records with unsupported extensions (e.g. .md from // donor seeding) are purged from the DB. @@ -935,6 +935,9 @@ func Nested() {} } func TestIndexer_SkipsPermissionDeniedFile(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Windows chmod does not reliably deny reads for the current user") + } if os.Getuid() == 0 { t.Skip("root bypasses file permission checks") } diff --git a/internal/merkle/ignore.go b/internal/merkle/ignore.go index 50f0c470..37cea5fe 100644 --- a/internal/merkle/ignore.go +++ b/internal/merkle/ignore.go @@ -19,6 +19,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "sync" @@ -151,6 +152,8 @@ func (t *IgnoreTree) loadDir(dirRel string) *dirIgnore { // shouldSkip implements SkipFunc. It checks the six filtering layers: // 1. SkipDirs, 2. SkipFiles, 3. .gitignore, 4. .lumenignore, 5. .gitattributes, 6. extension. func (t *IgnoreTree) shouldSkip(relPath string, isDir bool) bool { + relPath = filepath.Clean(relPath) + slashRelPath := filepath.ToSlash(relPath) base := filepath.Base(relPath) if isDir && SkipDirs[base] { return true @@ -165,7 +168,7 @@ func (t *IgnoreTree) shouldSkip(relPath string, isDir bool) bool { t.mu.Lock() defer t.mu.Unlock() - if t.globalIgnore != nil && t.globalIgnore.MatchesPath(relPath) { + if t.globalIgnore != nil && t.globalIgnore.MatchesPath(slashRelPath) { return true } @@ -202,10 +205,10 @@ func (t *IgnoreTree) checkIgnoreRules(relPath, anc string, isDir bool) bool { func getPathFromAncestor(relPath, anc string) string { if anc == "" { - return relPath + return filepath.ToSlash(relPath) } pathFromAnc, _ := filepath.Rel(anc, relPath) - return pathFromAnc + return filepath.ToSlash(pathFromAnc) } // ancestorDirs returns the directory hierarchy from root ("") to dirRel. @@ -357,6 +360,9 @@ func IsRootUnindexable(dir string) (bool, string) { // while the cleaned-input check keeps "/etc" itself matching. clean := filepath.Clean(dir) resolved := resolvePath(dir) + if runtime.GOOS == "windows" && (isWindowsDriveRoot(clean) || isWindowsDriveRoot(resolved)) { + return true, "windows drive root" + } if refusedRoots[clean] || refusedRoots[resolved] { return true, "hardcoded system root" } @@ -385,6 +391,15 @@ func IsRootUnindexable(dir string) (bool, string) { return false, "" } +func isWindowsDriveRoot(path string) bool { + volume := filepath.VolumeName(path) + if volume == "" { + return false + } + rest := strings.TrimPrefix(path, volume) + return rest == `\` || rest == `/` || rest == "" +} + // MakeSkipWithExtra is like MakeSkip but also skips directories whose relative // paths are listed in extraSkipDirs. This is used to exclude git worktrees // that are checked out inside the project root directory. diff --git a/internal/merkle/ignore_test.go b/internal/merkle/ignore_test.go index e702360b..2869ac07 100644 --- a/internal/merkle/ignore_test.go +++ b/internal/merkle/ignore_test.go @@ -147,6 +147,7 @@ func TestIsRootUnindexable(t *testing.T) { } windowsPaths := []string{ `C:\`, + `D:\`, `C:\Windows`, `C:\Program Files`, `C:\Program Files (x86)`, @@ -171,6 +172,12 @@ func TestIsRootUnindexable(t *testing.T) { if !got { t.Errorf("expected %q to be refused as an index root", p) } + if runtime.GOOS == "windows" && filepath.VolumeName(p) != "" && filepath.Clean(p) == filepath.VolumeName(p)+`\` { + if reason != "windows drive root" { + t.Errorf("reason for %q = %q, want %q", p, reason, "windows drive root") + } + continue + } if reason != "hardcoded system root" { t.Errorf("reason for %q = %q, want %q", p, reason, "hardcoded system root") } @@ -558,8 +565,8 @@ func TestAncestorDirs(t *testing.T) { {"", []string{""}}, {".", []string{""}}, {"a", []string{"", "a"}}, - {"a/b", []string{"", "a", "a/b"}}, - {"a/b/c", []string{"", "a", "a/b", "a/b/c"}}, + {"a/b", []string{"", "a", filepath.Join("a", "b")}}, + {"a/b/c", []string{"", "a", filepath.Join("a", "b"), filepath.Join("a", "b", "c")}}, } for _, tt := range tests { @@ -632,9 +639,9 @@ func TestBuildTree_WithNestedGitignore(t *testing.T) { // main.go, sub/sub.go, sub/helper.go should be present // app.log (root .gitignore), sub/internal_helper.go (nested .gitignore) excluded expected := map[string]bool{ - "main.go": true, - "sub/sub.go": true, - "sub/helper.go": true, + "main.go": true, + filepath.Join("sub", "sub.go"): true, + filepath.Join("sub", "helper.go"): true, } if len(tree.Files) != len(expected) { t.Fatalf("expected %d files, got %d: %v", len(expected), len(tree.Files), tree.Files) @@ -647,7 +654,7 @@ func TestBuildTree_WithNestedGitignore(t *testing.T) { if _, ok := tree.Files["app.log"]; ok { t.Error("expected app.log to be excluded by root .gitignore") } - if _, ok := tree.Files["sub/internal_helper.go"]; ok { + if _, ok := tree.Files[filepath.Join("sub", "internal_helper.go")]; ok { t.Error("expected sub/internal_helper.go to be excluded by nested .gitignore") } } @@ -662,7 +669,7 @@ func TestIgnoreTree_GlobalGitignore(t *testing.T) { // Create git config pointing to it configPath := filepath.Join(globalIgnoreDir, "gitconfig") - configContent := fmt.Sprintf("[core]\n\texcludesFile = %s\n", globalIgnorePath) + configContent := fmt.Sprintf("[core]\n\texcludesFile = %s\n", filepath.ToSlash(globalIgnorePath)) if err := os.WriteFile(configPath, []byte(configContent), 0o644); err != nil { t.Fatal(err) } @@ -679,4 +686,3 @@ func TestIgnoreTree_GlobalGitignore(t *testing.T) { t.Error("main.go should not be skipped") } } - diff --git a/internal/merkle/merkle_test.go b/internal/merkle/merkle_test.go index 104a374a..600b947a 100644 --- a/internal/merkle/merkle_test.go +++ b/internal/merkle/merkle_test.go @@ -18,6 +18,7 @@ import ( "fmt" "os" "path/filepath" + "runtime" "testing" ) @@ -218,6 +219,9 @@ func TestCollectFilePaths_SkipsLargeFiles(t *testing.T) { } func TestBuildTree_SkipsPermissionDeniedFile(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Windows chmod does not reliably deny reads for the current user") + } if os.Getuid() == 0 { t.Skip("root bypasses file permission checks") } From 7974d70da4059db8ca8b4ff308398d0ca88d4cd8 Mon Sep 17 00:00:00 2001 From: philipgraffshapiro Date: Thu, 11 Jun 2026 18:54:03 -0400 Subject: [PATCH 2/5] fix: resolve lumen windows launcher binary --- scripts/run | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/run b/scripts/run index b0bda40b..fe32f547 100755 --- a/scripts/run +++ b/scripts/run @@ -13,12 +13,16 @@ case "$ARCH" in x86_64) ARCH="amd64" ;; aarch64) ARCH="arm64" ;; esac +case "$OS" in + mingw*|msys*|cygwin*) OS="windows"; EXE=".exe" ;; + *) EXE="" ;; +esac # Find binary: check bin/ first, then goreleaser dist/ output, then download BINARY="" for candidate in \ - "${PLUGIN_ROOT}/bin/lumen" \ - "${PLUGIN_ROOT}/bin/lumen-${OS}-${ARCH}"; do + "${PLUGIN_ROOT}/bin/lumen${EXE}" \ + "${PLUGIN_ROOT}/bin/lumen-${OS}-${ARCH}${EXE}"; do if [ -x "$candidate" ]; then BINARY="$candidate" break @@ -27,7 +31,7 @@ done # Download on first run if no binary found if [ -z "$BINARY" ]; then - BINARY="${PLUGIN_ROOT}/bin/lumen-${OS}-${ARCH}" + BINARY="${PLUGIN_ROOT}/bin/lumen-${OS}-${ARCH}${EXE}" REPO="ory/lumen" @@ -43,7 +47,7 @@ if [ -z "$BINARY" ]; then exit 1 fi - ASSET="lumen-${VERSION#v}-${OS}-${ARCH}" + ASSET="lumen-${VERSION#v}-${OS}-${ARCH}${EXE}" URL="https://github.com/${REPO}/releases/download/${VERSION}/${ASSET}" echo "Downloading lumen ${VERSION} for ${OS}/${ARCH}..." >&2 @@ -70,7 +74,7 @@ if [ -z "$BINARY" ]; then echo "Falling back to ${LATEST_TAG}..." >&2 VERSION="$LATEST_TAG" - ASSET="lumen-${VERSION#v}-${OS}-${ARCH}" + ASSET="lumen-${VERSION#v}-${OS}-${ARCH}${EXE}" URL="https://github.com/${REPO}/releases/download/${VERSION}/${ASSET}" curl -fL --progress-bar --max-time 300 --retry 3 --retry-delay 2 "$URL" -o "$BINARY" From 647cc1b5cecbcd45cd467f238f5cd1816765dd3b Mon Sep 17 00:00:00 2001 From: philipgraffshapiro Date: Thu, 11 Jun 2026 19:18:40 -0400 Subject: [PATCH 3/5] fix: refuse agent session store index roots --- internal/merkle/ignore.go | 20 +++++++++++++++++ internal/merkle/ignore_test.go | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/internal/merkle/ignore.go b/internal/merkle/ignore.go index 37cea5fe..791e99c6 100644 --- a/internal/merkle/ignore.go +++ b/internal/merkle/ignore.go @@ -373,6 +373,9 @@ func IsRootUnindexable(dir string) (bool, string) { return true, "user home directory" } } + if isAgentSessionStoreRoot(clean) || isAgentSessionStoreRoot(resolved) { + return true, "agent session store" + } gi, err := ignore.CompileIgnoreFile(filepath.Join(dir, ".lumenignore")) if err != nil || gi == nil { @@ -400,6 +403,23 @@ func isWindowsDriveRoot(path string) bool { return rest == `\` || rest == `/` || rest == "" } +func isAgentSessionStoreRoot(path string) bool { + parts := strings.FieldsFunc(filepath.Clean(path), func(r rune) bool { + return r == '/' || r == '\\' + }) + for i := 0; i+1 < len(parts); i++ { + parent := strings.ToLower(parts[i]) + child := strings.ToLower(parts[i+1]) + if parent == ".claude" && child == "projects" { + return true + } + if parent == ".codex" && (child == "sessions" || child == "history") { + return true + } + } + return false +} + // MakeSkipWithExtra is like MakeSkip but also skips directories whose relative // paths are listed in extraSkipDirs. This is used to exclude git worktrees // that are checked out inside the project root directory. diff --git a/internal/merkle/ignore_test.go b/internal/merkle/ignore_test.go index 2869ac07..b5514546 100644 --- a/internal/merkle/ignore_test.go +++ b/internal/merkle/ignore_test.go @@ -217,6 +217,45 @@ func TestIsRootUnindexable(t *testing.T) { } }) + t.Run("agent session stores are refused", func(t *testing.T) { + root := t.TempDir() + cases := []struct { + name string + path string + }{ + {name: "claude projects", path: filepath.Join(root, ".claude", "projects")}, + {name: "codex sessions", path: filepath.Join(root, ".codex", "sessions")}, + {name: "codex history", path: filepath.Join(root, ".codex", "history")}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if err := os.MkdirAll(tc.path, 0o755); err != nil { + t.Fatal(err) + } + got, reason := IsRootUnindexable(tc.path) + if !got { + t.Fatalf("expected %q to be refused as an agent session store", tc.path) + } + if reason != "agent session store" { + t.Fatalf("reason = %q, want %q", reason, "agent session store") + } + }) + } + }) + + t.Run("agent config directories remain indexable inside projects", func(t *testing.T) { + root := t.TempDir() + for _, dir := range []string{filepath.Join(root, ".claude"), filepath.Join(root, ".codex")} { + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatal(err) + } + if got, reason := IsRootUnindexable(dir); got { + t.Fatalf("expected config dir %q to remain indexable, got reason %q", dir, reason) + } + } + }) + t.Run("hardcoded refusal does not falsely flag siblings of home", func(t *testing.T) { // A directory that is NOT in the refusal list and has no .lumenignore // must still be indexable. Use the test's tempdir which is outside any From 57666436d5c80c6e15f16449cb3a077d2d2760c4 Mon Sep 17 00:00:00 2001 From: philipgraffshapiro Date: Thu, 11 Jun 2026 19:39:05 -0400 Subject: [PATCH 4/5] fix: refuse protected system subtrees --- internal/merkle/ignore.go | 130 ----------------------- internal/merkle/ignore_test.go | 19 ++++ internal/merkle/root_guard.go | 187 +++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 130 deletions(-) create mode 100644 internal/merkle/root_guard.go diff --git a/internal/merkle/ignore.go b/internal/merkle/ignore.go index 791e99c6..5d1e7330 100644 --- a/internal/merkle/ignore.go +++ b/internal/merkle/ignore.go @@ -19,7 +19,6 @@ import ( "os" "os/exec" "path/filepath" - "runtime" "strings" "sync" @@ -291,135 +290,6 @@ func parseLinguistExcluded(path string) *ignore.GitIgnore { return ignore.CompileIgnoreLines(patterns...) } -// refusedRoots are filesystem roots that are never appropriate as a Lumen -// index root regardless of configuration. Indexing them would walk huge, -// machine-managed trees and on macOS trigger TCC prompts for protected -// folders. The user's $HOME is added at lookup time via os.UserHomeDir. -// -// Entries cover Unix/macOS and Windows. Keys are compared against -// filepath.Clean(dir), which is platform-dependent (forward slashes on -// Unix, backslashes on Windows), so Windows entries only match on Windows -// and vice versa — harmless to include both. -var refusedRoots = map[string]bool{ - // Unix / macOS - "/": true, - "/Users": true, - "/home": true, - "/tmp": true, - "/private/tmp": true, - "/var": true, - "/private/var": true, - "/etc": true, - "/usr": true, - "/opt": true, - "/Applications": true, - "/Library": true, - "/System": true, - // Windows - `C:\`: true, - `C:\Windows`: true, - `C:\Program Files`: true, - `C:\Program Files (x86)`: true, - `C:\Users`: true, - `C:\ProgramData`: true, -} - -// resolvePath returns filepath.EvalSymlinks(dir) when it succeeds, falling -// back to filepath.Clean(dir) when it does not (e.g. dir does not exist on -// disk, or a symlink in the chain cannot be resolved). This ensures that a -// symlink to $HOME or a refused root is caught by IsRootUnindexable rather -// than slipping through on the symlink path alone. -func resolvePath(dir string) string { - if resolved, err := filepath.EvalSymlinks(dir); err == nil { - return resolved - } - return filepath.Clean(dir) -} - -// IsRootUnindexable reports whether dir is unsuitable as a Lumen index root. -// When true, the returned string is a short human-readable reason suitable for -// inclusion in an error message. When false, the reason is empty. -// -// Two checks combine: -// -// 1. Hardcoded refusal list — filesystem roots ($HOME, /, /Users, /tmp, -// /var, /etc, /usr, /Applications, /Library and macOS /private/* twins) -// that should never be project roots regardless of user config. -// 2. .lumenignore probe — if dir/.lumenignore contains patterns broad -// enough to match every file (e.g. "**", "**/*", "*"), the user has -// declared the directory un-indexable at its boundary. -// -// Without these guards the indexer walks the entire tree, ignores every -// file, produces an empty index, and on macOS triggers TCC prompts along -// the way. Callers (findAncestorIndex, `lumen index`) use this to refuse -// such roots upfront. -func IsRootUnindexable(dir string) (bool, string) { - // Check both the cleaned input and the symlink-resolved form against the - // refusal map. On macOS, /etc → /private/etc and /tmp → /private/tmp via - // symlinks: resolving lets a user-supplied "/private/etc" still match, - // while the cleaned-input check keeps "/etc" itself matching. - clean := filepath.Clean(dir) - resolved := resolvePath(dir) - if runtime.GOOS == "windows" && (isWindowsDriveRoot(clean) || isWindowsDriveRoot(resolved)) { - return true, "windows drive root" - } - if refusedRoots[clean] || refusedRoots[resolved] { - return true, "hardcoded system root" - } - if home, err := os.UserHomeDir(); err == nil { - homeClean := filepath.Clean(home) - homeResolved := resolvePath(home) - if homeClean == clean || homeClean == resolved || homeResolved == clean || homeResolved == resolved { - return true, "user home directory" - } - } - if isAgentSessionStoreRoot(clean) || isAgentSessionStoreRoot(resolved) { - return true, "agent session store" - } - - gi, err := ignore.CompileIgnoreFile(filepath.Join(dir, ".lumenignore")) - if err != nil || gi == nil { - return false, "" - } - // Probe with both a root-level entry and a nested entry using long random - // sentinels that no realistic specific pattern would match. Patterns like - // "*" alone match only the root probe (gitignore `*` doesn't cross `/`); - // patterns like "**", "**/*", "*/*" match both probes — which is what we - // take as "ignores everything". - const probeRoot = "lumen-root-probe-X9F2K7M3" - const probeNested = "lumen-root-probe-X9F2K7M3/L8B4Q1P5R6N2" - if gi.MatchesPath(probeRoot) && gi.MatchesPath(probeNested) { - return true, ".lumenignore catch-all pattern" - } - return false, "" -} - -func isWindowsDriveRoot(path string) bool { - volume := filepath.VolumeName(path) - if volume == "" { - return false - } - rest := strings.TrimPrefix(path, volume) - return rest == `\` || rest == `/` || rest == "" -} - -func isAgentSessionStoreRoot(path string) bool { - parts := strings.FieldsFunc(filepath.Clean(path), func(r rune) bool { - return r == '/' || r == '\\' - }) - for i := 0; i+1 < len(parts); i++ { - parent := strings.ToLower(parts[i]) - child := strings.ToLower(parts[i+1]) - if parent == ".claude" && child == "projects" { - return true - } - if parent == ".codex" && (child == "sessions" || child == "history") { - return true - } - } - return false -} - // MakeSkipWithExtra is like MakeSkip but also skips directories whose relative // paths are listed in extraSkipDirs. This is used to exclude git worktrees // that are checked out inside the project root directory. diff --git a/internal/merkle/ignore_test.go b/internal/merkle/ignore_test.go index b5514546..588ccfb4 100644 --- a/internal/merkle/ignore_test.go +++ b/internal/merkle/ignore_test.go @@ -184,6 +184,25 @@ func TestIsRootUnindexable(t *testing.T) { } }) + t.Run("windows system descendants are refused case-insensitively", func(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("windows-only path semantics") + } + for _, p := range []string{ + `C:\Windows\System32`, + `C:\WINDOWS\system32`, + `C:\Program Files\Vendor`, + `C:\ProgramData\Vendor`, + } { + got, reason := IsRootUnindexable(p) + if !got { + t.Fatalf("expected %q to be refused as a protected system subtree", p) + } + if reason != "hardcoded system root" { + t.Fatalf("reason for %q = %q, want %q", p, reason, "hardcoded system root") + } + } + }) t.Run("symlink to home is refused", func(t *testing.T) { home, err := os.UserHomeDir() if err != nil { diff --git a/internal/merkle/root_guard.go b/internal/merkle/root_guard.go new file mode 100644 index 00000000..b1372efb --- /dev/null +++ b/internal/merkle/root_guard.go @@ -0,0 +1,187 @@ +// Copyright 2026 Aeneas Rekkas +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package merkle + +import ( + "os" + "path/filepath" + "runtime" + "strings" + + ignore "github.com/sabhiram/go-gitignore" +) + +// refusedRoots are filesystem roots that are never appropriate as a Lumen +// index root regardless of configuration. Indexing them would walk huge, +// machine-managed trees and on macOS trigger TCC prompts for protected +// folders. The user's $HOME is added at lookup time via os.UserHomeDir. +// +// Entries cover Unix/macOS and Windows. Keys are compared against +// filepath.Clean(dir), which is platform-dependent (forward slashes on +// Unix, backslashes on Windows), so Windows entries only match on Windows +// and vice versa -- harmless to include both. +var refusedRoots = map[string]bool{ + // Unix / macOS + "/": true, + "/Users": true, + "/home": true, + "/tmp": true, + "/private/tmp": true, + "/var": true, + "/private/var": true, + "/etc": true, + "/usr": true, + "/opt": true, + "/Applications": true, + "/Library": true, + "/System": true, + // Windows + `C:\`: true, + `C:\Windows`: true, + `C:\Program Files`: true, + `C:\Program Files (x86)`: true, + `C:\Users`: true, + `C:\ProgramData`: true, +} + +// refusedRootSubtrees are protected Windows roots whose descendants are also +// machine-managed. Keep broad user containers such as C:\Users exact-only. +var refusedRootSubtrees = []string{ + `C:\Windows`, + `C:\Program Files`, + `C:\Program Files (x86)`, + `C:\ProgramData`, +} + +// resolvePath returns filepath.EvalSymlinks(dir) when it succeeds, falling +// back to filepath.Clean(dir) when it does not (e.g. dir does not exist on +// disk, or a symlink in the chain cannot be resolved). This ensures that a +// symlink to $HOME or a refused root is caught by IsRootUnindexable rather +// than slipping through on the symlink path alone. +func resolvePath(dir string) string { + if resolved, err := filepath.EvalSymlinks(dir); err == nil { + return resolved + } + return filepath.Clean(dir) +} + +// IsRootUnindexable reports whether dir is unsuitable as a Lumen index root. +// When true, the returned string is a short human-readable reason suitable for +// inclusion in an error message. When false, the reason is empty. +// +// Two checks combine: +// +// 1. Hardcoded refusal list -- filesystem roots ($HOME, /, /Users, /tmp, +// /var, /etc, /usr, /Applications, /Library and macOS /private/* twins) +// that should never be project roots regardless of user config. +// 2. .lumenignore probe -- if dir/.lumenignore contains patterns broad +// enough to match every file (e.g. "**", "**/*", "*"), the user has +// declared the directory un-indexable at its boundary. +// +// Without these guards the indexer walks the entire tree, ignores every +// file, produces an empty index, and on macOS triggers TCC prompts along +// the way. Callers (findAncestorIndex, `lumen index`) use this to refuse +// such roots upfront. +func IsRootUnindexable(dir string) (bool, string) { + // Check both the cleaned input and the symlink-resolved form against the + // refusal map. On macOS, /etc -> /private/etc and /tmp -> /private/tmp via + // symlinks: resolving lets a user-supplied "/private/etc" still match, + // while the cleaned-input check keeps "/etc" itself matching. + clean := filepath.Clean(dir) + resolved := resolvePath(dir) + if runtime.GOOS == "windows" && (isWindowsDriveRoot(clean) || isWindowsDriveRoot(resolved)) { + return true, "windows drive root" + } + if refusedRoots[clean] || refusedRoots[resolved] { + return true, "hardcoded system root" + } + if isRefusedRootSubtree(clean) || isRefusedRootSubtree(resolved) { + return true, "hardcoded system root" + } + if home, err := os.UserHomeDir(); err == nil { + homeClean := filepath.Clean(home) + homeResolved := resolvePath(home) + if homeClean == clean || homeClean == resolved || homeResolved == clean || homeResolved == resolved { + return true, "user home directory" + } + } + if isAgentSessionStoreRoot(clean) || isAgentSessionStoreRoot(resolved) { + return true, "agent session store" + } + + gi, err := ignore.CompileIgnoreFile(filepath.Join(dir, ".lumenignore")) + if err != nil || gi == nil { + return false, "" + } + // Probe with both a root-level entry and a nested entry using long random + // sentinels that no realistic specific pattern would match. Patterns like + // "*" alone match only the root probe (gitignore `*` doesn't cross `/`); + // patterns like "**", "**/*", "*/*" match both probes -- which is what we + // take as "ignores everything". + const probeRoot = "lumen-root-probe-X9F2K7M3" + const probeNested = "lumen-root-probe-X9F2K7M3/L8B4Q1P5R6N2" + if gi.MatchesPath(probeRoot) && gi.MatchesPath(probeNested) { + return true, ".lumenignore catch-all pattern" + } + return false, "" +} + +func isWindowsDriveRoot(path string) bool { + volume := filepath.VolumeName(path) + if volume == "" { + return false + } + rest := strings.TrimPrefix(path, volume) + return rest == `\` || rest == `/` || rest == "" +} + +func isRefusedRootSubtree(path string) bool { + if runtime.GOOS != "windows" { + return false + } + for _, root := range refusedRootSubtrees { + if sameOrUnderRoot(path, root) { + return true + } + } + return false +} + +func sameOrUnderRoot(path, root string) bool { + path = strings.ToLower(filepath.Clean(path)) + root = strings.ToLower(filepath.Clean(root)) + if path == root { + return true + } + root = strings.TrimRight(root, `\/`) + return strings.HasPrefix(path, root+string(filepath.Separator)) +} + +func isAgentSessionStoreRoot(path string) bool { + parts := strings.FieldsFunc(filepath.Clean(path), func(r rune) bool { + return r == '/' || r == '\\' + }) + for i := 0; i+1 < len(parts); i++ { + parent := strings.ToLower(parts[i]) + child := strings.ToLower(parts[i+1]) + if parent == ".claude" && child == "projects" { + return true + } + if parent == ".codex" && (child == "sessions" || child == "history") { + return true + } + } + return false +} From a8d1b9775c8494f3201d3a17c52c0a84eacf46fb Mon Sep 17 00:00:00 2001 From: philipgraffshapiro Date: Thu, 11 Jun 2026 23:08:54 -0400 Subject: [PATCH 5/5] fix: purge windows paths case-insensitively --- cmd/purge.go | 35 ++++++++++++++++++++++++++++++----- cmd/purge_test.go | 20 ++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/cmd/purge.go b/cmd/purge.go index 72dd36e3..cab58ac4 100644 --- a/cmd/purge.go +++ b/cmd/purge.go @@ -19,6 +19,7 @@ import ( "io" "os" "path/filepath" + "runtime" "strings" "github.com/ory/lumen/internal/config" @@ -153,10 +154,8 @@ func purgeOneTarget(stderr io.Writer, indexMap map[string][]string, seen map[str } } - match := "" - if _, ok := indexMap[target]; ok { - match = target - } else if !inGitRepo { + match := lookupProjectIndex(indexMap, target) + if match == "" && !inGitRepo { // Non-git fallback: match the deepest stored path that contains the // target. Mirrors `findAncestorIndex` semantics used by index/search. match = longestAncestor(indexMap, target) @@ -183,12 +182,21 @@ func purgeOneTarget(stderr io.Writer, indexMap map[string][]string, seen map[str return removed, nil } +func lookupProjectIndex(indexMap map[string][]string, target string) string { + for stored := range indexMap { + if sameProjectPath(stored, target) { + return stored + } + } + return "" +} + // longestAncestor returns the longest key in indexMap that is either equal to // target or an ancestor directory of target, or "" if no such key exists. func longestAncestor(indexMap map[string][]string, target string) string { best := "" for stored := range indexMap { - if stored == target || strings.HasPrefix(target, stored+string(filepath.Separator)) { + if sameProjectPath(stored, target) || pathIsUnder(target, stored) { if len(stored) > len(best) { best = stored } @@ -197,6 +205,23 @@ func longestAncestor(indexMap map[string][]string, target string) string { return best } +func sameProjectPath(a, b string) bool { + if runtime.GOOS == "windows" { + return strings.EqualFold(a, b) + } + return a == b +} + +func pathIsUnder(path, root string) bool { + root = strings.TrimRight(root, `\/`) + candidate := root + string(filepath.Separator) + if runtime.GOOS == "windows" { + path = strings.ToLower(path) + candidate = strings.ToLower(candidate) + } + return strings.HasPrefix(path, candidate) +} + func pluralY(n int) string { if n == 1 { return "y" diff --git a/cmd/purge_test.go b/cmd/purge_test.go index 1a1badf8..1ca5673b 100644 --- a/cmd/purge_test.go +++ b/cmd/purge_test.go @@ -18,6 +18,7 @@ import ( "bytes" "os" "path/filepath" + "runtime" "strings" "testing" @@ -110,6 +111,25 @@ func TestPurge_SinglePath_RemovesOnlyThatProject(t *testing.T) { assert.NoError(t, err, "project B hash dir should be untouched") } +func TestPurge_WindowsStoredPathCaseInsensitive(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("windows path matching is case-insensitive") + } + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + storedPath := `C:\WINDOWS\system32` + requestPath := `C:\Windows\System32` + hashDir := seedIndex(t, storedPath, embedder.DefaultModel) + + _, stderrOut, err := runPurgeCmd(t, []string{requestPath}) + require.NoError(t, err) + assert.Contains(t, stderrOut, "Removed 1 index directory") + + _, err = os.Stat(hashDir) + assert.True(t, os.IsNotExist(err), "case-variant Windows path should purge stored index") +} + func TestPurge_PathInsideGitRepo_ResolvesToGitRoot(t *testing.T) { tmp := resolvedTempDir(t) t.Setenv("XDG_DATA_HOME", tmp)