From 7d5180970826a6ae67c0b4e1c66e808924d16f8b Mon Sep 17 00:00:00 2001 From: Ismael Garrido Date: Sat, 30 May 2026 16:40:33 -0300 Subject: [PATCH 1/2] feat(purge): rework index cleanup with cwd default and new flags Change "lumen purge" with no arguments to remove only the current project's index (normalized to its git root) instead of wiping everything, and add explicit modes for the broader operations: --all Remove every index, including legacy indexes that predate project_path metadata. --missing Remove indexes whose recorded project folder no longer exists; only deletes when the folder is confirmed missing. --dry-run With --missing, preview removals without deleting. Each removed index is logged individually, mutually exclusive flag combinations are rejected, and the data-dir scan is shared across all modes. README and command help updated accordingly. Co-authored-by: Claude Opus 4.8 --- README.md | 6 +- cmd/purge.go | 158 +++++++++++++++++++++++++++++++++++++++------- cmd/purge_test.go | 146 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 272 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index e5e5c8e..acd1957 100644 --- a/README.md +++ b/README.md @@ -392,8 +392,10 @@ and binary version. Different models or Lumen versions automatically get separate indexes. No files are added to your repo, no `.gitignore` modifications needed. -You can safely delete the entire `lumen` directory to clear all indexes, or use -`lumen purge` to do it automatically. +You can safely delete the entire `lumen` directory to clear all indexes. Or use +`lumen purge` (current project only), `lumen purge --all` (every index), or +`lumen purge --missing` (drop indexes whose source folder was deleted; add +`--dry-run` to preview). **Git worktrees** are detected automatically. When you create a new worktree (`git worktree add` or `claude --worktree`), Lumen finds a sibling worktree's diff --git a/cmd/purge.go b/cmd/purge.go index 37ec3cc..a08a325 100644 --- a/cmd/purge.go +++ b/cmd/purge.go @@ -27,25 +27,49 @@ import ( "github.com/spf13/cobra" ) +const ( + flagAll = "all" + flagMissing = "missing" + flagDryRun = "dry-run" +) + func init() { + registerPurgeFlags(purgeCmd) rootCmd.AddCommand(purgeCmd) } +// registerPurgeFlags declares the purge command's flags. Shared by init and +// the test helper so tests exercise the real flag set. +func registerPurgeFlags(cmd *cobra.Command) { + cmd.Flags().Bool(flagAll, false, "Remove every index under the data directory") + cmd.Flags().Bool(flagMissing, false, "Remove indexes whose project folder no longer exists") + cmd.Flags().Bool(flagDryRun, false, "With --missing, list what would be removed without deleting") +} + +// lumenDataDir returns the directory holding all lumen index databases. +func lumenDataDir() string { + return filepath.Join(config.XDGDataDir(), "lumen") +} + var purgeCmd = &cobra.Command{ Use: "purge [path...]", Short: "Remove lumen index data", Long: `Deletes lumen index databases under ~/.local/share/lumen/. -With no arguments, removes every index (irreversible — all indexes will be -rebuilt on the next search). +With no arguments, removes only the index for the current working directory's +project (the path is normalized to its git root first). -With one or more paths, removes only the index directories associated with -those projects. Each path is normalized to its git root first, then matched -against the project_path recorded inside each index database, so switching -embedding models or using custom models never leaves orphan indexes. +With one or more paths, removes the index directories associated with those +projects. Each path is normalized to its git root, then matched against the +project_path recorded inside each index database, so switching embedding models +or using custom models never leaves orphan indexes. -Indexes created by older binaries that did not record project_path cannot be -matched by path; run "lumen purge" with no arguments to wipe those. + --all Remove every index (irreversible — all indexes will be rebuilt on + the next search). Also clears legacy indexes created by older + binaries that did not record project_path. + --missing Remove every index whose recorded project folder no longer exists + on disk. Only deletes when the folder is confirmed missing. + --dry-run With --missing, list what would be removed without deleting. Note: a concurrently running indexer for a purged project may log a write error and exit; re-run "lumen index" afterwards to rebuild.`, @@ -54,14 +78,53 @@ error and exit; re-run "lumen index" afterwards to rebuild.`, } func runPurge(cmd *cobra.Command, args []string) error { - if len(args) == 0 { - return purgeAll(cmd.ErrOrStderr()) + all, _ := cmd.Flags().GetBool(flagAll) + missing, _ := cmd.Flags().GetBool(flagMissing) + dryRun, _ := cmd.Flags().GetBool(flagDryRun) + + if err := validatePurgeFlags(all, missing, dryRun, len(args)); err != nil { + return err + } + + stderr := cmd.ErrOrStderr() + stdout := cmd.OutOrStdout() + + switch { + case all: + return purgeAll(stderr) + case missing: + return purgeMissing(stderr, stdout, dryRun) + default: + if len(args) == 0 { + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("determine working directory: %w", err) + } + args = []string{cwd} + } + return purgeProjects(stderr, stdout, args) + } +} + +// validatePurgeFlags enforces mutual exclusivity of the purge modes. +func validatePurgeFlags(all, missing, dryRun bool, nArgs int) error { + if all && missing { + return fmt.Errorf("--all and --missing cannot be combined") + } + if all && nArgs > 0 { + return fmt.Errorf("--all cannot be combined with explicit paths") + } + if missing && nArgs > 0 { + return fmt.Errorf("--missing cannot be combined with explicit paths") } - return purgeProjects(cmd.ErrOrStderr(), cmd.OutOrStdout(), args) + if dryRun && !missing { + return fmt.Errorf("--dry-run is only valid with --missing") + } + return nil } func purgeAll(stderr io.Writer) error { - dataDir := filepath.Join(config.XDGDataDir(), "lumen") + dataDir := lumenDataDir() info, err := os.Stat(dataDir) if err != nil { @@ -75,6 +138,19 @@ func purgeAll(stderr io.Writer) error { return fmt.Errorf("%s is not a directory", dataDir) } + // Log each index directory before wiping, matching the per-index logging + // used by the other purge modes. Legacy dirs without project_path metadata + // are logged by path alone. + indexMap, legacy, _ := scanIndexes(dataDir) + for projectPath, hashDirs := range indexMap { + for _, hashDir := range hashDirs { + _, _ = fmt.Fprintf(stderr, "Removed %s (%s)\n", hashDir, projectPath) + } + } + for _, hashDir := range legacy { + _, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir) + } + if err := os.RemoveAll(dataDir); err != nil { return fmt.Errorf("remove index data: %w", err) } @@ -83,8 +159,7 @@ func purgeAll(stderr io.Writer) error { } func purgeProjects(stderr, stdout io.Writer, args []string) error { - dataDir := filepath.Join(config.XDGDataDir(), "lumen") - indexMap, err := scanIndexes(dataDir) + indexMap, _, err := scanIndexes(lumenDataDir()) if err != nil { return err } @@ -102,18 +177,54 @@ func purgeProjects(stderr, stdout io.Writer, args []string) error { return nil } +func purgeMissing(stderr, stdout io.Writer, dryRun bool) error { + indexMap, _, err := scanIndexes(lumenDataDir()) + if err != nil { + return err + } + + verb := "Removed" + if dryRun { + verb = "Would remove" + } + + removed := 0 + for projectPath, hashDirs := range indexMap { + if _, statErr := os.Stat(projectPath); statErr == nil { + continue // folder still exists — keep the index + } else if !os.IsNotExist(statErr) { + // Conservative: any error other than "not exist" must never delete. + _, _ = fmt.Fprintf(stderr, "Skipping %s: cannot stat (%v)\n", projectPath, statErr) + continue + } + for _, hashDir := range hashDirs { + if !dryRun { + if err := os.RemoveAll(hashDir); err != nil { + return fmt.Errorf("remove %s: %w", hashDir, err) + } + } + _, _ = fmt.Fprintf(stderr, "%s %s (%s)\n", verb, hashDir, projectPath) + removed++ + } + } + + _, _ = fmt.Fprintf(stdout, "%s %d index director%s whose folder no longer exists.\n", verb, removed, pluralY(removed)) + return nil +} + // scanIndexes walks dataDir (one level deep) and returns a map of stored -// project_path → list of hash directories for that project. Hash directories -// that can't be read or lack project_path metadata are silently skipped so a -// single broken index never blocks purging of others. -func scanIndexes(dataDir string) (map[string][]string, error) { - result := make(map[string][]string) +// project_path → list of hash directories for that project, plus the hash +// directories that can't be read or lack project_path metadata (legacy +// indexes). Path-based purge modes ignore the legacy slice so a single broken +// index never blocks purging of others; --all uses it to log those dirs. +func scanIndexes(dataDir string) (indexMap map[string][]string, legacy []string, err error) { + indexMap = make(map[string][]string) entries, err := os.ReadDir(dataDir) if err != nil { if os.IsNotExist(err) { - return result, nil + return indexMap, nil, nil } - return nil, fmt.Errorf("read data dir: %w", err) + return nil, nil, fmt.Errorf("read data dir: %w", err) } for _, entry := range entries { if !entry.IsDir() { @@ -123,11 +234,12 @@ func scanIndexes(dataDir string) (map[string][]string, error) { dbPath := filepath.Join(hashDir, "index.db") stored, err := store.ReadMetaAt(dbPath, "project_path") if err != nil || stored == "" { + legacy = append(legacy, hashDir) continue } - result[stored] = append(result[stored], hashDir) + indexMap[stored] = append(indexMap[stored], hashDir) } - return result, nil + return indexMap, legacy, nil } // purgeOneTarget resolves arg to a project root and removes every hash diff --git a/cmd/purge_test.go b/cmd/purge_test.go index 1a1badf..778255c 100644 --- a/cmd/purge_test.go +++ b/cmd/purge_test.go @@ -43,46 +43,90 @@ func seedIndex(t *testing.T, projectPath, model string) string { return filepath.Dir(dbPath) } -// runPurgeCmd invokes runPurge with the provided args and returns captured -// stdout, stderr, and the error (if any). -func runPurgeCmd(t *testing.T, args []string) (stdout, stderr string, err error) { +// runPurgeCmd invokes runPurge with the provided args and optional flag names +// (each set to "true") and returns captured stdout, stderr, and the error. +func runPurgeCmd(t *testing.T, args []string, flags ...string) (stdout, stderr string, err error) { t.Helper() outBuf := new(bytes.Buffer) errBuf := new(bytes.Buffer) cmd := &cobra.Command{} + registerPurgeFlags(cmd) + for _, f := range flags { + require.NoError(t, cmd.Flags().Set(f, "true")) + } cmd.SetOut(outBuf) cmd.SetErr(errBuf) err = runPurge(cmd, args) return outBuf.String(), errBuf.String(), err } -func TestPurge_NoArgs_RemovesEverything(t *testing.T) { +func TestPurge_NoArgs_PurgesCwdOnly(t *testing.T) { tmp := resolvedTempDir(t) t.Setenv("XDG_DATA_HOME", tmp) - seedIndex(t, "/project/a", embedder.DefaultModel) - seedIndex(t, "/project/b", embedder.DefaultModel) + projectA := filepath.Join(tmp, "projectA") + projectB := filepath.Join(tmp, "projectB") + require.NoError(t, os.MkdirAll(projectA, 0o755)) + require.NoError(t, os.MkdirAll(projectB, 0o755)) + runGit(t, projectA, "init") + runGit(t, projectB, "init") - lumenRoot := filepath.Join(tmp, "lumen") - entries, err := os.ReadDir(lumenRoot) + hashDirA := seedIndex(t, projectA, embedder.DefaultModel) + hashDirB := seedIndex(t, projectB, embedder.DefaultModel) + + // No-args default operates on the current working directory. + t.Chdir(projectA) + + _, _, err := runPurgeCmd(t, nil) require.NoError(t, err) - require.Len(t, entries, 2, "should have seeded two hash dirs") + + _, err = os.Stat(hashDirA) + assert.True(t, os.IsNotExist(err), "cwd project A hash dir should be gone") + _, err = os.Stat(hashDirB) + assert.NoError(t, err, "project B hash dir should be untouched") +} + +func TestPurge_NoArgs_CwdWithoutIndex_ReportsNone(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + empty := filepath.Join(tmp, "empty") + require.NoError(t, os.MkdirAll(empty, 0o755)) + t.Chdir(empty) _, stderrOut, err := runPurgeCmd(t, nil) require.NoError(t, err) + assert.Contains(t, strings.ToLower(stderrOut), "no index found") +} + +func TestPurge_All_RemovesEverything(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + seedIndex(t, "/project/a", embedder.DefaultModel) + hashDirB := seedIndex(t, "/project/b", embedder.DefaultModel) + + // A legacy index dir with no project_path metadata must also be wiped. + lumenRoot := filepath.Join(tmp, "lumen") + legacyDir := filepath.Join(lumenRoot, "legacyhash") + require.NoError(t, os.MkdirAll(legacyDir, 0o755)) + + _, stderrOut, err := runPurgeCmd(t, nil, flagAll) + require.NoError(t, err) assert.Contains(t, stderrOut, "Removed all index data") + assert.Contains(t, stderrOut, hashDirB, "should log each removed index dir") _, err = os.Stat(lumenRoot) assert.True(t, os.IsNotExist(err), "lumen data dir should be gone, got err=%v", err) } -func TestPurge_NoArgs_NothingToPurge(t *testing.T) { +func TestPurge_All_WithPaths_Errors(t *testing.T) { tmp := resolvedTempDir(t) t.Setenv("XDG_DATA_HOME", tmp) - _, stderrOut, err := runPurgeCmd(t, nil) - require.NoError(t, err) - assert.Contains(t, stderrOut, "No index data found") + _, _, err := runPurgeCmd(t, []string{"/some/path"}, flagAll) + require.Error(t, err) + assert.Contains(t, err.Error(), "--all") } func TestPurge_SinglePath_RemovesOnlyThatProject(t *testing.T) { @@ -220,3 +264,79 @@ func TestPurge_MultiplePaths_MixedHitsAndMisses(t *testing.T) { assert.True(t, os.IsNotExist(err), "project A hash dir should be gone") assert.Contains(t, strings.ToLower(stderrOut), "no index found", "miss should be reported") } + +func TestPurge_Missing_RemovesOnlyDeletedFolders(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + gone := filepath.Join(tmp, "gone") + alive := filepath.Join(tmp, "alive") + require.NoError(t, os.MkdirAll(gone, 0o755)) + require.NoError(t, os.MkdirAll(alive, 0o755)) + + hashGone := seedIndex(t, gone, embedder.DefaultModel) + hashAlive := seedIndex(t, alive, embedder.DefaultModel) + + // Delete one project's folder. + require.NoError(t, os.RemoveAll(gone)) + + _, stderrOut, err := runPurgeCmd(t, nil, flagMissing) + require.NoError(t, err) + assert.Contains(t, stderrOut, hashGone, "should log the removed index dir") + + _, err = os.Stat(hashGone) + assert.True(t, os.IsNotExist(err), "index for deleted folder should be removed") + _, err = os.Stat(hashAlive) + assert.NoError(t, err, "index for existing folder should be kept") +} + +func TestPurge_Missing_AllFoldersExist_RemovesNothing(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + alive := filepath.Join(tmp, "alive") + require.NoError(t, os.MkdirAll(alive, 0o755)) + hashAlive := seedIndex(t, alive, embedder.DefaultModel) + + _, _, err := runPurgeCmd(t, nil, flagMissing) + require.NoError(t, err) + + _, err = os.Stat(hashAlive) + assert.NoError(t, err, "index for existing folder should be kept") +} + +func TestPurge_Missing_WithPaths_Errors(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + _, _, err := runPurgeCmd(t, []string{"/some/path"}, flagMissing) + require.Error(t, err) + assert.Contains(t, err.Error(), "--missing") +} + +func TestPurge_DryRun_Missing_DeletesNothing(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + gone := filepath.Join(tmp, "gone") + require.NoError(t, os.MkdirAll(gone, 0o755)) + hashGone := seedIndex(t, gone, embedder.DefaultModel) + require.NoError(t, os.RemoveAll(gone)) + + _, stderrOut, err := runPurgeCmd(t, nil, flagMissing, flagDryRun) + require.NoError(t, err) + assert.Contains(t, stderrOut, "Would remove") + assert.Contains(t, stderrOut, hashGone) + + _, err = os.Stat(hashGone) + assert.NoError(t, err, "dry-run must not delete the index dir") +} + +func TestPurge_DryRun_WithoutMissing_Errors(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + _, _, err := runPurgeCmd(t, nil, flagDryRun) + require.Error(t, err) + assert.Contains(t, err.Error(), "--dry-run is only valid with --missing") +} From a924d2056cf2af722010f86ffabf661c6a12575b Mon Sep 17 00:00:00 2001 From: Ismael Garrido Date: Sun, 31 May 2026 17:49:04 -0300 Subject: [PATCH 2/2] feat(purge): add --legacy flag and refine index classification Add --legacy flag to remove only legacy indexes (created by older binaries without project_path metadata) plus unreadable/corrupt index directories. Legacy indexes remain usable by the system (located by path hash) but are invisible to path- and --missing-based purge. --- cmd/purge.go | 151 ++++++++++++++++++------- cmd/purge_test.go | 276 +++++++++++++++++++++++++++++++--------------- 2 files changed, 303 insertions(+), 124 deletions(-) diff --git a/cmd/purge.go b/cmd/purge.go index a08a325..47c9be4 100644 --- a/cmd/purge.go +++ b/cmd/purge.go @@ -30,6 +30,7 @@ import ( const ( flagAll = "all" flagMissing = "missing" + flagLegacy = "legacy" flagDryRun = "dry-run" ) @@ -43,6 +44,7 @@ func init() { func registerPurgeFlags(cmd *cobra.Command) { cmd.Flags().Bool(flagAll, false, "Remove every index under the data directory") cmd.Flags().Bool(flagMissing, false, "Remove indexes whose project folder no longer exists") + cmd.Flags().Bool(flagLegacy, false, "Remove only legacy/unreadable indexes lacking project_path metadata") cmd.Flags().Bool(flagDryRun, false, "With --missing, list what would be removed without deleting") } @@ -68,7 +70,13 @@ or using custom models never leaves orphan indexes. the next search). Also clears legacy indexes created by older binaries that did not record project_path. --missing Remove every index whose recorded project folder no longer exists - on disk. Only deletes when the folder is confirmed missing. + on disk (only deletes a project index when its folder is confirmed + missing), plus any unreadable/corrupt index directories. + --legacy Remove only legacy indexes created by older binaries that did not + record project_path, plus any unreadable/corrupt index directories. + Legacy indexes are still usable by the system (located by path + hash) but invisible to path- and --missing-based purge. Cannot be + combined with other flags or paths. --dry-run With --missing, list what would be removed without deleting. Note: a concurrently running indexer for a purged project may log a write @@ -80,9 +88,10 @@ error and exit; re-run "lumen index" afterwards to rebuild.`, func runPurge(cmd *cobra.Command, args []string) error { all, _ := cmd.Flags().GetBool(flagAll) missing, _ := cmd.Flags().GetBool(flagMissing) + legacy, _ := cmd.Flags().GetBool(flagLegacy) dryRun, _ := cmd.Flags().GetBool(flagDryRun) - if err := validatePurgeFlags(all, missing, dryRun, len(args)); err != nil { + if err := validatePurgeFlags(all, missing, legacy, dryRun, len(args)); err != nil { return err } @@ -92,6 +101,8 @@ func runPurge(cmd *cobra.Command, args []string) error { switch { case all: return purgeAll(stderr) + case legacy: + return purgeLegacy(stderr, stdout) case missing: return purgeMissing(stderr, stdout, dryRun) default: @@ -106,16 +117,21 @@ func runPurge(cmd *cobra.Command, args []string) error { } } -// validatePurgeFlags enforces mutual exclusivity of the purge modes. -func validatePurgeFlags(all, missing, dryRun bool, nArgs int) error { - if all && missing { - return fmt.Errorf("--all and --missing cannot be combined") +// validatePurgeFlags enforces mutual exclusivity of the purge modes. --all, +// --missing, and --legacy are the three exclusive whole-dataset modes; explicit +// paths select the default per-project mode and combine with none of them. +func validatePurgeFlags(all, missing, legacy, dryRun bool, nArgs int) error { + modes := 0 + for _, set := range []bool{all, missing, legacy} { + if set { + modes++ + } } - if all && nArgs > 0 { - return fmt.Errorf("--all cannot be combined with explicit paths") + if modes > 1 { + return fmt.Errorf("--all, --missing, and --legacy are mutually exclusive") } - if missing && nArgs > 0 { - return fmt.Errorf("--missing cannot be combined with explicit paths") + if modes > 0 && nArgs > 0 { + return fmt.Errorf("--all, --missing, and --legacy cannot be combined with explicit paths") } if dryRun && !missing { return fmt.Errorf("--dry-run is only valid with --missing") @@ -123,6 +139,11 @@ func validatePurgeFlags(all, missing, dryRun bool, nArgs int) error { return nil } +// purgeAll removes the entire lumen data directory. This is unconditional by +// design: --all must wipe everything regardless of whether individual indexes +// can be scanned or read. The pre-delete scan is best-effort and used only for +// per-index logging — its error is deliberately ignored, since a corrupt or +// unreadable index is exactly the kind of state --all exists to clear. func purgeAll(stderr io.Writer) error { dataDir := lumenDataDir() @@ -139,16 +160,18 @@ func purgeAll(stderr io.Writer) error { } // Log each index directory before wiping, matching the per-index logging - // used by the other purge modes. Legacy dirs without project_path metadata - // are logged by path alone. - indexMap, legacy, _ := scanIndexes(dataDir) + // used by the other purge modes. Dirs without project_path metadata and + // unreadable dirs are logged by path alone. + indexMap, noMeta, unreadable, _ := scanIndexes(dataDir) for projectPath, hashDirs := range indexMap { for _, hashDir := range hashDirs { _, _ = fmt.Fprintf(stderr, "Removed %s (%s)\n", hashDir, projectPath) } } - for _, hashDir := range legacy { - _, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir) + for _, dirs := range [][]string{noMeta, unreadable} { + for _, hashDir := range dirs { + _, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir) + } } if err := os.RemoveAll(dataDir); err != nil { @@ -159,7 +182,7 @@ func purgeAll(stderr io.Writer) error { } func purgeProjects(stderr, stdout io.Writer, args []string) error { - indexMap, _, err := scanIndexes(lumenDataDir()) + indexMap, _, _, err := scanIndexes(lumenDataDir()) if err != nil { return err } @@ -178,7 +201,7 @@ func purgeProjects(stderr, stdout io.Writer, args []string) error { } func purgeMissing(stderr, stdout io.Writer, dryRun bool) error { - indexMap, _, err := scanIndexes(lumenDataDir()) + indexMap, _, unreadable, err := scanIndexes(lumenDataDir()) if err != nil { return err } @@ -188,6 +211,16 @@ func purgeMissing(stderr, stdout io.Writer, dryRun bool) error { verb = "Would remove" } + remove := func(hashDir, reason string) error { + if !dryRun { + if err := os.RemoveAll(hashDir); err != nil { + return fmt.Errorf("remove %s: %w", hashDir, err) + } + } + _, _ = fmt.Fprintf(stderr, "%s %s (%s)\n", verb, hashDir, reason) + return nil + } + removed := 0 for projectPath, hashDirs := range indexMap { if _, statErr := os.Stat(projectPath); statErr == nil { @@ -198,33 +231,74 @@ func purgeMissing(stderr, stdout io.Writer, dryRun bool) error { continue } for _, hashDir := range hashDirs { - if !dryRun { - if err := os.RemoveAll(hashDir); err != nil { - return fmt.Errorf("remove %s: %w", hashDir, err) - } + if err := remove(hashDir, projectPath); err != nil { + return err } - _, _ = fmt.Fprintf(stderr, "%s %s (%s)\n", verb, hashDir, projectPath) removed++ } } - _, _ = fmt.Fprintf(stdout, "%s %d index director%s whose folder no longer exists.\n", verb, removed, pluralY(removed)) + // Unreadable/corrupt index dirs have no folder mapping and can never be + // served or rebuilt in place, so --missing clears them too. + for _, hashDir := range unreadable { + if err := remove(hashDir, "unreadable"); err != nil { + return err + } + removed++ + } + + _, _ = fmt.Fprintf(stdout, "%s %d index director%s (missing folders and unreadable indexes).\n", verb, removed, pluralY(removed)) return nil } -// scanIndexes walks dataDir (one level deep) and returns a map of stored -// project_path → list of hash directories for that project, plus the hash -// directories that can't be read or lack project_path metadata (legacy -// indexes). Path-based purge modes ignore the legacy slice so a single broken -// index never blocks purging of others; --all uses it to log those dirs. -func scanIndexes(dataDir string) (indexMap map[string][]string, legacy []string, err error) { +// purgeLegacy removes legacy hash directories: readable DBs that do not record +// project_path (still usable by the system, located by path hash, but invisible +// to path- and --missing-based purge) plus unreadable/corrupt directories. +// --legacy is the explicit way to clear both. +func purgeLegacy(stderr, stdout io.Writer) error { + _, noMeta, unreadable, err := scanIndexes(lumenDataDir()) + if err != nil { + return err + } + + removed := 0 + for _, dirs := range [][]string{noMeta, unreadable} { + for _, hashDir := range dirs { + if err := os.RemoveAll(hashDir); err != nil { + return fmt.Errorf("remove %s: %w", hashDir, err) + } + _, _ = fmt.Fprintf(stderr, "Removed %s\n", hashDir) + removed++ + } + } + + _, _ = fmt.Fprintf(stdout, "Removed %d legacy index director%s.\n", removed, pluralY(removed)) + return nil +} + +// scanIndexes walks dataDir (one level deep) and classifies each hash directory +// into three buckets: +// +// - indexMap: stored project_path → hash directories (readable DB with +// project_path metadata). +// - noMeta: readable DBs that do not record project_path (created by older +// binaries). These remain usable by the system — they are located by path +// hash, not by metadata — so path- and --missing-based purge leave them +// alone; only --all and --legacy remove them. +// - unreadable: directories whose index.db is missing or corrupt. These can +// never be served or rebuilt in place, so every purge mode that scans +// (--missing and --legacy) clears them. +// +// Path-based purge ignores the non-indexMap buckets so a single broken index +// never blocks purging of others. +func scanIndexes(dataDir string) (indexMap map[string][]string, noMeta, unreadable []string, err error) { indexMap = make(map[string][]string) entries, err := os.ReadDir(dataDir) if err != nil { if os.IsNotExist(err) { - return indexMap, nil, nil + return indexMap, nil, nil, nil } - return nil, nil, fmt.Errorf("read data dir: %w", err) + return nil, nil, nil, fmt.Errorf("read data dir: %w", err) } for _, entry := range entries { if !entry.IsDir() { @@ -232,14 +306,17 @@ func scanIndexes(dataDir string) (indexMap map[string][]string, legacy []string, } hashDir := filepath.Join(dataDir, entry.Name()) dbPath := filepath.Join(hashDir, "index.db") - stored, err := store.ReadMetaAt(dbPath, "project_path") - if err != nil || stored == "" { - legacy = append(legacy, hashDir) - continue + stored, readErr := store.ReadMetaAt(dbPath, "project_path") + switch { + case readErr != nil: + unreadable = append(unreadable, hashDir) + case stored == "": + noMeta = append(noMeta, hashDir) + default: + indexMap[stored] = append(indexMap[stored], hashDir) } - indexMap[stored] = append(indexMap[stored], hashDir) } - return indexMap, legacy, nil + return indexMap, noMeta, unreadable, nil } // purgeOneTarget resolves arg to a project root and removes every hash diff --git a/cmd/purge_test.go b/cmd/purge_test.go index 778255c..b475e56 100644 --- a/cmd/purge_test.go +++ b/cmd/purge_test.go @@ -29,20 +29,46 @@ import ( "github.com/stretchr/testify/require" ) -// seedIndex creates a real SQLite index DB at the hash-named directory for -// (projectPath, model) with project_path recorded in project_meta, so that -// tests exercise the same metadata-scan code path as production. -func seedIndex(t *testing.T, projectPath, model string) string { +// seedIndexWithMeta creates a real SQLite index DB at the hash-named directory +// for (projectPath, model). When recordPath is true it records project_path in +// project_meta (the modern layout exercised by the metadata-scan code path); +// when false it omits it, mimicking indexes written by older binaries. +func seedIndexWithMeta(t *testing.T, projectPath, model string, recordPath bool) string { t.Helper() dbPath := config.DBPathForProject(projectPath, model) require.NoError(t, os.MkdirAll(filepath.Dir(dbPath), 0o755)) s, err := store.New(dbPath, 4) require.NoError(t, err) - require.NoError(t, s.SetMeta("project_path", projectPath)) + if recordPath { + require.NoError(t, s.SetMeta("project_path", projectPath)) + } require.NoError(t, s.Close()) return filepath.Dir(dbPath) } +// seedIndex creates an index DB that records project_path. +func seedIndex(t *testing.T, projectPath, model string) string { + t.Helper() + return seedIndexWithMeta(t, projectPath, model, true) +} + +// seedLegacyIndex creates an index DB with NO project_path metadata. Such DBs +// are fully usable by the system (located by path hash) but invisible to +// project_path-based purge, so scanIndexes classifies them as no-metadata legacy. +func seedLegacyIndex(t *testing.T, projectPath, model string) string { + t.Helper() + return seedIndexWithMeta(t, projectPath, model, false) +} + +// seedCorruptDir creates a hash directory whose index.db is missing, mimicking +// an unreadable/corrupt index. scanIndexes classifies it as unreadable. +func seedCorruptDir(t *testing.T, name string) string { + t.Helper() + dir := filepath.Join(lumenDataDir(), name) + require.NoError(t, os.MkdirAll(dir, 0o755)) + return dir +} + // runPurgeCmd invokes runPurge with the provided args and optional flag names // (each set to "true") and returns captured stdout, stderr, and the error. func runPurgeCmd(t *testing.T, args []string, flags ...string) (stdout, stderr string, err error) { @@ -120,15 +146,6 @@ func TestPurge_All_RemovesEverything(t *testing.T) { assert.True(t, os.IsNotExist(err), "lumen data dir should be gone, got err=%v", err) } -func TestPurge_All_WithPaths_Errors(t *testing.T) { - tmp := resolvedTempDir(t) - t.Setenv("XDG_DATA_HOME", tmp) - - _, _, err := runPurgeCmd(t, []string{"/some/path"}, flagAll) - require.Error(t, err) - assert.Contains(t, err.Error(), "--all") -} - func TestPurge_SinglePath_RemovesOnlyThatProject(t *testing.T) { tmp := resolvedTempDir(t) t.Setenv("XDG_DATA_HOME", tmp) @@ -265,78 +282,163 @@ func TestPurge_MultiplePaths_MixedHitsAndMisses(t *testing.T) { assert.Contains(t, strings.ToLower(stderrOut), "no index found", "miss should be reported") } -func TestPurge_Missing_RemovesOnlyDeletedFolders(t *testing.T) { - tmp := resolvedTempDir(t) - t.Setenv("XDG_DATA_HOME", tmp) - - gone := filepath.Join(tmp, "gone") - alive := filepath.Join(tmp, "alive") - require.NoError(t, os.MkdirAll(gone, 0o755)) - require.NoError(t, os.MkdirAll(alive, 0o755)) - - hashGone := seedIndex(t, gone, embedder.DefaultModel) - hashAlive := seedIndex(t, alive, embedder.DefaultModel) - - // Delete one project's folder. - require.NoError(t, os.RemoveAll(gone)) - - _, stderrOut, err := runPurgeCmd(t, nil, flagMissing) - require.NoError(t, err) - assert.Contains(t, stderrOut, hashGone, "should log the removed index dir") - - _, err = os.Stat(hashGone) - assert.True(t, os.IsNotExist(err), "index for deleted folder should be removed") - _, err = os.Stat(hashAlive) - assert.NoError(t, err, "index for existing folder should be kept") -} - -func TestPurge_Missing_AllFoldersExist_RemovesNothing(t *testing.T) { - tmp := resolvedTempDir(t) - t.Setenv("XDG_DATA_HOME", tmp) - - alive := filepath.Join(tmp, "alive") - require.NoError(t, os.MkdirAll(alive, 0o755)) - hashAlive := seedIndex(t, alive, embedder.DefaultModel) - - _, _, err := runPurgeCmd(t, nil, flagMissing) - require.NoError(t, err) - - _, err = os.Stat(hashAlive) - assert.NoError(t, err, "index for existing folder should be kept") -} - -func TestPurge_Missing_WithPaths_Errors(t *testing.T) { - tmp := resolvedTempDir(t) - t.Setenv("XDG_DATA_HOME", tmp) - - _, _, err := runPurgeCmd(t, []string{"/some/path"}, flagMissing) - require.Error(t, err) - assert.Contains(t, err.Error(), "--missing") -} - -func TestPurge_DryRun_Missing_DeletesNothing(t *testing.T) { - tmp := resolvedTempDir(t) - t.Setenv("XDG_DATA_HOME", tmp) - - gone := filepath.Join(tmp, "gone") - require.NoError(t, os.MkdirAll(gone, 0o755)) - hashGone := seedIndex(t, gone, embedder.DefaultModel) - require.NoError(t, os.RemoveAll(gone)) - - _, stderrOut, err := runPurgeCmd(t, nil, flagMissing, flagDryRun) - require.NoError(t, err) - assert.Contains(t, stderrOut, "Would remove") - assert.Contains(t, stderrOut, hashGone) - - _, err = os.Stat(hashGone) - assert.NoError(t, err, "dry-run must not delete the index dir") -} - -func TestPurge_DryRun_WithoutMissing_Errors(t *testing.T) { - tmp := resolvedTempDir(t) - t.Setenv("XDG_DATA_HOME", tmp) +// TestPurge_FlagBehavior consolidates the flag-validation and --missing/--legacy +// behaviors into one table-driven test. Each case sets up its own data dir state +// via setup (returning named paths for postCheck) and asserts error, stderr, and +// resulting filesystem state. +func TestPurge_FlagBehavior(t *testing.T) { + cases := []struct { + name string + flags []string + args []string + setup func(t *testing.T, tmp string) map[string]string + wantErr bool + wantErrContains []string + wantStderrContains string + postCheck func(t *testing.T, paths map[string]string, stderrOut string) + }{ + { + name: "all with paths errors", + flags: []string{flagAll}, + args: []string{"/some/path"}, + wantErr: true, + wantErrContains: []string{"--all"}, + }, + { + name: "all and missing errors", + flags: []string{flagAll, flagMissing}, + wantErr: true, + wantErrContains: []string{"--all", "--missing"}, + }, + { + name: "missing with paths errors", + flags: []string{flagMissing}, + args: []string{"/some/path"}, + wantErr: true, + wantErrContains: []string{"--missing"}, + }, + { + name: "dry-run without missing errors", + flags: []string{flagDryRun}, + wantErr: true, + wantErrContains: []string{"--dry-run is only valid with --missing"}, + }, + { + name: "legacy with all errors", + flags: []string{flagLegacy, flagAll}, + wantErr: true, + wantErrContains: []string{"--legacy"}, + }, + { + name: "legacy with paths errors", + flags: []string{flagLegacy}, + args: []string{"/some/path"}, + wantErr: true, + wantErrContains: []string{"--legacy"}, + }, + { + name: "missing removes deleted folders and unreadable dirs", + flags: []string{flagMissing}, + setup: func(t *testing.T, tmp string) map[string]string { + gone := filepath.Join(tmp, "gone") + alive := filepath.Join(tmp, "alive") + require.NoError(t, os.MkdirAll(gone, 0o755)) + require.NoError(t, os.MkdirAll(alive, 0o755)) + hashGone := seedIndex(t, gone, embedder.DefaultModel) + hashAlive := seedIndex(t, alive, embedder.DefaultModel) + hashCorrupt := seedCorruptDir(t, "corrupthash") + require.NoError(t, os.RemoveAll(gone)) // delete one project's folder + return map[string]string{"gone": hashGone, "alive": hashAlive, "corrupt": hashCorrupt} + }, + postCheck: func(t *testing.T, paths map[string]string, stderrOut string) { + assert.Contains(t, stderrOut, paths["gone"], "should log the removed index dir") + _, err := os.Stat(paths["gone"]) + assert.True(t, os.IsNotExist(err), "index for deleted folder should be removed") + _, err = os.Stat(paths["corrupt"]) + assert.True(t, os.IsNotExist(err), "unreadable index dir should be removed") + _, err = os.Stat(paths["alive"]) + assert.NoError(t, err, "index for existing folder should be kept") + }, + }, + { + name: "missing keeps existing folders", + flags: []string{flagMissing}, + setup: func(t *testing.T, tmp string) map[string]string { + alive := filepath.Join(tmp, "alive") + require.NoError(t, os.MkdirAll(alive, 0o755)) + return map[string]string{"alive": seedIndex(t, alive, embedder.DefaultModel)} + }, + postCheck: func(t *testing.T, paths map[string]string, stderrOut string) { + _, err := os.Stat(paths["alive"]) + assert.NoError(t, err, "index for existing folder should be kept") + }, + }, + { + name: "dry-run missing deletes nothing", + flags: []string{flagMissing, flagDryRun}, + wantStderrContains: "Would remove", + setup: func(t *testing.T, tmp string) map[string]string { + gone := filepath.Join(tmp, "gone") + require.NoError(t, os.MkdirAll(gone, 0o755)) + hashGone := seedIndex(t, gone, embedder.DefaultModel) + require.NoError(t, os.RemoveAll(gone)) + return map[string]string{"gone": hashGone} + }, + postCheck: func(t *testing.T, paths map[string]string, stderrOut string) { + assert.Contains(t, stderrOut, paths["gone"]) + _, err := os.Stat(paths["gone"]) + assert.NoError(t, err, "dry-run must not delete the index dir") + }, + }, + { + name: "legacy removes metadata-less and unreadable indexes", + flags: []string{flagLegacy}, + setup: func(t *testing.T, tmp string) map[string]string { + alive := filepath.Join(tmp, "alive") + require.NoError(t, os.MkdirAll(alive, 0o755)) + hashAlive := seedIndex(t, alive, embedder.DefaultModel) + hashLegacy := seedLegacyIndex(t, filepath.Join(tmp, "legacyproj"), embedder.DefaultModel) + hashCorrupt := seedCorruptDir(t, "corrupthash") + return map[string]string{"alive": hashAlive, "legacy": hashLegacy, "corrupt": hashCorrupt} + }, + postCheck: func(t *testing.T, paths map[string]string, stderrOut string) { + assert.Contains(t, stderrOut, paths["legacy"], "should log the removed legacy dir") + _, err := os.Stat(paths["legacy"]) + assert.True(t, os.IsNotExist(err), "legacy (no-metadata) index dir should be removed") + _, err = os.Stat(paths["corrupt"]) + assert.True(t, os.IsNotExist(err), "unreadable index dir should be removed") + _, err = os.Stat(paths["alive"]) + assert.NoError(t, err, "index with project_path metadata should be kept") + }, + }, + } - _, _, err := runPurgeCmd(t, nil, flagDryRun) - require.Error(t, err) - assert.Contains(t, err.Error(), "--dry-run is only valid with --missing") + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + tmp := resolvedTempDir(t) + t.Setenv("XDG_DATA_HOME", tmp) + + var paths map[string]string + if tc.setup != nil { + paths = tc.setup(t, tmp) + } + + _, stderrOut, err := runPurgeCmd(t, tc.args, tc.flags...) + + if tc.wantErr { + require.Error(t, err) + for _, want := range tc.wantErrContains { + assert.Contains(t, err.Error(), want) + } + return + } + require.NoError(t, err) + if tc.wantStderrContains != "" { + assert.Contains(t, stderrOut, tc.wantStderrContains) + } + if tc.postCheck != nil { + tc.postCheck(t, paths, stderrOut) + } + }) + } }