From e146428a0dbc0cbf57c7110fd899e87eddf2f717 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 19 Jun 2026 07:26:33 -0400 Subject: [PATCH] feat: add durable git share snapshots --- CHANGELOG.md | 2 + README.md | 9 +- SPEC.md | 2 + go.mod | 10 +- go.sum | 32 +++--- internal/cli/app.go | 37 +++++- internal/cli/app_test.go | 13 ++- internal/share/share.go | 217 +++++++++++++++++------------------ internal/share/share_test.go | 81 +++++++++++-- internal/store/store.go | 12 +- 10 files changed, 256 insertions(+), 159 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 724aab8..2d9d54a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Maintenance +- Added immutable Git-share snapshot tags and non-mutating historical restores with `update --ref`, using CrawlKit for shared Git history mechanics. +- Moved FTS5 query escaping onto CrawlKit and refreshed Go dependencies. - Updated crawlkit through 0.12.2 for shared runtime hardening, SQLite 1.52, and absolute Windows database paths. ## 0.7.2 - 2026-06-10 diff --git a/README.md b/README.md index 64ab65a..77bf4c2 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,7 @@ Choose the path that matches your setup: - `report` summarizes archive activity and git-share freshness without writing SQL - `publish` exports the local SQLite archive into a git repo as compressed JSONL shards plus a manifest - `subscribe` configures a git-backed reader that can run without Slack credentials -- `update` pulls and imports the latest git snapshot +- `update` pulls and imports the latest git snapshot, or restores a historical tag/ref without moving the share checkout - `sync` performs a one-shot crawl from bot/API, MCP connector, wiretap/desktop, or both - `import` imports a Slack export ZIP or extracted export directory - `purge` previews or deletes messages and message-owned records older than a cutoff @@ -380,10 +380,12 @@ stale_after = "15m" Behavior: - `publish` writes gzipped JSONL shards plus `manifest.json` into `repo_path` +- `publish --tag ` attaches an immutable lightweight tag to the committed snapshot - cached non-DM/non-private file media is included by default; use `--no-media` to omit it - `subscribe` writes a git-reader config, disables Slack API and desktop sources for that config, clones the repo, and imports the snapshot - pass `--db` to `subscribe` when you want the reader archive to land in a non-default SQLite path - `update` pulls and re-imports only when the manifest changes +- `update --ref ` imports that historical snapshot without checking it out - `status`, `search`, `messages`, `mentions`, `sql`, `users`, `channels`, and `report` auto-refresh stale git snapshots before reading when `auto_update = true` - `sync --source bot` and `sync --source all` warm from the git snapshot before hitting Slack when a share remote is configured - `status` and `doctor` surface the current git-share repo, last import time, and whether the local snapshot is stale @@ -395,6 +397,7 @@ Behavior: ```bash go run ./cmd/slacrawl publish --remote /path/to/private/slacrawl-archive.git --push go run ./cmd/slacrawl publish --repo ~/.slacrawl/share --branch main --message "archive: daily refresh" --push +go run ./cmd/slacrawl publish --tag backup-2026-06-19 --push ``` Relevant flags: @@ -403,6 +406,7 @@ Relevant flags: - `--remote` sets or overrides the git remote used for publish - `--branch` chooses the target branch - `--message` sets the git commit message +- `--tag` creates an immutable snapshot tag and requires a commit - `--no-commit` exports files without creating a git commit - `--push` pushes the new commit to `origin` - `--no-media` omits cached media files from the snapshot @@ -435,8 +439,11 @@ Relevant flags: ```bash go run ./cmd/slacrawl update go run ./cmd/slacrawl update --repo ~/.slacrawl/share --branch main +go run ./cmd/slacrawl update --ref backup-2026-06-19 ``` +`--ref` accepts a tag, branch, or commit. Historical imports read Git objects directly and leave the share repo's current branch and working tree unchanged. + ### `report` `report` is the fastest human-readable archive summary and is especially handy in git-share mode because it shows the current archive footprint plus share freshness. diff --git a/SPEC.md b/SPEC.md index 2b60fd2..5a41baf 100644 --- a/SPEC.md +++ b/SPEC.md @@ -312,6 +312,8 @@ Share config: - `[share].repo_path` is the local clone / working repo path used for publish and update - `[share].branch` defaults to `main` - `[share].auto_update` controls whether read commands import stale git snapshots before querying +- `publish --tag ` creates an immutable tag for a committed snapshot +- `update --ref ` restores a historical snapshot without changing the share checkout - `[share].stale_after` defines how old the last successful import can be before auto-refresh runs - share sync state should record both the last successful import time and the last imported manifest generation time diff --git a/go.mod b/go.mod index 2c685f7..89ac5e5 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.26.4 require ( github.com/alecthomas/kong v1.15.0 github.com/golang/snappy v1.0.0 - github.com/openclaw/crawlkit v0.12.2 + github.com/openclaw/crawlkit v0.12.3-0.20260619112528-82bf1826da3f github.com/slack-go/slack v0.26.0 github.com/stretchr/testify v1.11.1 github.com/syndtr/goleveldb v1.0.0 @@ -17,7 +17,7 @@ require ( github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/charmbracelet/bubbles v1.0.0 // indirect github.com/charmbracelet/bubbletea v1.3.10 // indirect - github.com/charmbracelet/colorprofile v0.4.1 // indirect + github.com/charmbracelet/colorprofile v0.4.3 // indirect github.com/charmbracelet/lipgloss v1.1.0 // indirect github.com/charmbracelet/x/ansi v0.11.7 // indirect github.com/charmbracelet/x/cellbuf v0.0.15 // indirect @@ -32,19 +32,19 @@ require ( github.com/lucasb-eyer/go-colorful v1.4.0 // indirect github.com/mattn/go-isatty v0.0.22 // indirect github.com/mattn/go-localereader v0.0.1 // indirect - github.com/mattn/go-runewidth v0.0.23 // indirect + github.com/mattn/go-runewidth v0.0.24 // indirect github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect - github.com/pelletier/go-toml/v2 v2.3.1 // indirect + github.com/pelletier/go-toml/v2 v2.4.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect golang.org/x/net v0.53.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - modernc.org/libc v1.72.3 // indirect + modernc.org/libc v1.73.4 // indirect modernc.org/mathutil v1.7.1 // indirect modernc.org/memory v1.11.0 // indirect modernc.org/sqlite v1.52.0 // indirect diff --git a/go.sum b/go.sum index fc26032..25d99ea 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,8 @@ github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5f github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E= github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4= -github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk= -github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk= +github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q= +github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q= github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY= github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30= github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI= @@ -55,8 +55,8 @@ github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4= github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= -github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw= -github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/mattn/go-runewidth v0.0.24 h1:cpokDiIn0MGnhdHwuWnJBITySJ20QyNGnY2kR/ay2DU= +github.com/mattn/go-runewidth v0.0.24/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= @@ -70,10 +70,10 @@ github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/openclaw/crawlkit v0.12.2 h1:KivYMOHfemLG9LrfKKI8A/FTDJpdFJyeOreCGbKCsXA= -github.com/openclaw/crawlkit v0.12.2/go.mod h1:+Z9vrCgH8BJ/+3MMoMfnDyhXC9ON7bEDduGvp5TmmuM= -github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc= -github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/openclaw/crawlkit v0.12.3-0.20260619112528-82bf1826da3f h1:U3pEzAcN0SZK++4A/UgbdUX3X+iAPj/r+/CdqE6jLks= +github.com/openclaw/crawlkit v0.12.3-0.20260619112528-82bf1826da3f/go.mod h1:zOJv5WPWO1AuuXO7zW8NRTxb/ZTkIQXYPrx3StmnMUI= +github.com/pelletier/go-toml/v2 v2.4.0 h1:Mwu0mAkUKbittDs3/ADDWXqMmq3EOK2VHiuCkV00Row= +github.com/pelletier/go-toml/v2 v2.4.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= @@ -117,20 +117,20 @@ gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -modernc.org/cc/v4 v4.28.2 h1:3tQ0lf2ADtoby2EtSP+J7IE2SHwEJdP8ioR59wx7XpY= -modernc.org/cc/v4 v4.28.2/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI= -modernc.org/ccgo/v4 v4.34.0 h1:yRLPFZieg532OT4rp4JFNIVcquwalMX26G95WQDqwCQ= -modernc.org/ccgo/v4 v4.34.0/go.mod h1:AS5WYMyBakQ+fhsHhtP8mWB82KTGPkNNJDGfGQCe0/A= +modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c= +modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI= +modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws= +modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE= modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM= modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU= modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= -modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo= -modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= +modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc= +modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= -modernc.org/libc v1.72.3 h1:ZnDF4tXn4NBXFutMMQC4vtbTFSXhhKzR73fv0beZEAU= -modernc.org/libc v1.72.3/go.mod h1:dn0dZNnnn1clLyvRxLxYExxiKRZIRENOfqQ8XEeg4Qs= +modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA= +modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8= modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= diff --git a/internal/cli/app.go b/internal/cli/app.go index fbeb761..80f0924 100644 --- a/internal/cli/app.go +++ b/internal/cli/app.go @@ -1839,6 +1839,7 @@ func (a *App) runPublish(ctx context.Context, configPath string, args []string, remote := fs.String("remote", cfg.Share.Remote, "git remote") branch := fs.String("branch", cfg.Share.Branch, "git branch") message := fs.String("message", "", "commit message") + tag := fs.String("tag", "", "immutable snapshot tag") noCommit := fs.Bool("no-commit", false, "skip git commit") push := fs.Bool("push", false, "push to origin") noMedia := fs.Bool("no-media", !cfg.ShareMediaEnabled(), "omit cached media files") @@ -1848,6 +1849,9 @@ func (a *App) runPublish(ctx context.Context, configPath string, args []string, if fs.NArg() != 0 { return errors.New("publish takes no positional arguments") } + if *noCommit && strings.TrimSpace(*tag) != "" { + return errors.New("publish --tag requires a commit") + } st, err := a.openStore(cfg) if err != nil { return err @@ -1858,6 +1862,10 @@ func (a *App) runPublish(ctx context.Context, configPath string, args []string, if err != nil { return err } + opts.Tag = strings.TrimSpace(*tag) + if err := share.ValidateTag(ctx, opts); err != nil { + return err + } manifest, err := share.Export(ctx, st, opts) if err != nil { return err @@ -1869,6 +1877,10 @@ func (a *App) runPublish(ctx context.Context, configPath string, args []string, return err } } + createdTag, err := share.CreateImmutableTag(ctx, opts) + if err != nil { + return err + } if *push { if err := share.Push(ctx, opts); err != nil { return err @@ -1884,6 +1896,7 @@ func (a *App) runPublish(ctx context.Context, configPath string, args []string, "tables": manifest.Tables, "media": manifest.Media, "committed": committed, + "tag": createdTag, "pushed": *push, }, format, true) } @@ -1977,6 +1990,7 @@ func (a *App) runUpdate(ctx context.Context, configPath string, args []string, f repoPath := fs.String("repo", cfg.Share.RepoPath, "local clone path") remote := fs.String("remote", cfg.Share.Remote, "git remote") branch := fs.String("branch", cfg.Share.Branch, "git branch") + ref := fs.String("ref", "", "historical git ref to import") noMedia := fs.Bool("no-media", !cfg.ShareMediaEnabled(), "skip restoring cached media") if err := fs.Parse(args); err != nil { return err @@ -1993,12 +2007,22 @@ func (a *App) runUpdate(ctx context.Context, configPath string, args []string, f if err != nil { return err } - if err := share.Pull(ctx, opts); err != nil { - return err - } - manifest, imported, err := share.ImportIfChanged(ctx, st, opts) - if err != nil { - return err + var manifest share.Manifest + var imported bool + if strings.TrimSpace(*ref) == "" { + if err := share.Pull(ctx, opts); err != nil { + return err + } + manifest, imported, err = share.ImportIfChanged(ctx, st, opts) + if err != nil { + return err + } + } else { + manifest, err = share.ImportAt(ctx, st, opts, *ref) + if err != nil { + return err + } + imported = true } return a.writeOutput("Update", map[string]any{ "repo_path": opts.RepoPath, @@ -2007,6 +2031,7 @@ func (a *App) runUpdate(ctx context.Context, configPath string, args []string, f "tables": manifest.Tables, "media": manifest.Media, "imported": imported, + "ref": strings.TrimSpace(*ref), }, format, true) } diff --git a/internal/cli/app_test.go b/internal/cli/app_test.go index eb8abff..0ead899 100644 --- a/internal/cli/app_test.go +++ b/internal/cli/app_test.go @@ -611,7 +611,12 @@ func TestPublishSubscribeAndSearchGitArchive(t *testing.T) { var stdout bytes.Buffer app := &App{Stdout: &stdout, Stderr: &stdout} - require.NoError(t, app.Run(ctx, []string{"--config", publisherCfgPath, "--json", "publish", "--push"})) + require.NoError(t, app.Run(ctx, []string{"--config", publisherCfgPath, "--json", "publish", "--tag", "test-snapshot", "--push"})) + var publish map[string]any + require.NoError(t, json.Unmarshal(stdout.Bytes(), &publish)) + require.Equal(t, "test-snapshot", publish["tag"]) + require.Equal(t, true, publish["pushed"]) + require.ErrorContains(t, app.Run(ctx, []string{"--config", publisherCfgPath, "publish", "--tag", "invalid", "--no-commit"}), "requires a commit") readerCfgPath := filepath.Join(dir, "reader.toml") stdout.Reset() @@ -633,6 +638,12 @@ func TestPublishSubscribeAndSearchGitArchive(t *testing.T) { require.NoError(t, json.Unmarshal(stdout.Bytes(), &rows)) require.Len(t, rows, 1) require.Equal(t, "archive seed message", rows[0]["text"]) + + stdout.Reset() + require.NoError(t, app.Run(ctx, []string{"--config", readerCfgPath, "--json", "update", "--ref", "test-snapshot"})) + var update map[string]any + require.NoError(t, json.Unmarshal(stdout.Bytes(), &update)) + require.Equal(t, "test-snapshot", update["ref"]) } func TestSubscribePersistsNoMedia(t *testing.T) { diff --git a/internal/share/share.go b/internal/share/share.go index a9d7c9e..651c906 100644 --- a/internal/share/share.go +++ b/internal/share/share.go @@ -11,12 +11,13 @@ import ( "fmt" "io" "os" - "os/exec" + "path" "path/filepath" "strconv" "strings" "time" + "github.com/openclaw/crawlkit/mirror" "github.com/openclaw/slacrawl/internal/media" "github.com/openclaw/slacrawl/internal/store" ) @@ -55,6 +56,7 @@ type Options struct { RepoPath string Remote string Branch string + Tag string CacheDir string IncludeMedia bool } @@ -93,114 +95,47 @@ type SyncState struct { } func EnsureRepo(ctx context.Context, opts Options) error { - if strings.TrimSpace(opts.RepoPath) == "" { - return errors.New("share repo path is empty") - } - if _, err := os.Stat(filepath.Join(opts.RepoPath, ".git")); err == nil { - return nil - } - if strings.TrimSpace(opts.Remote) != "" { - if err := os.MkdirAll(filepath.Dir(opts.RepoPath), 0o750); err != nil { - return fmt.Errorf("mkdir share parent: %w", err) - } - if err := gitRun(ctx, "", "clone", opts.Remote, opts.RepoPath); err != nil { - return err - } - if branch := normalizeBranch(opts.Branch); branch != "" { - remoteRef := "refs/remotes/origin/" + branch - if _, err := gitOutput(ctx, opts.RepoPath, "rev-parse", "--verify", remoteRef); err == nil { - return gitRun(ctx, opts.RepoPath, "checkout", "-B", branch, "origin/"+branch) - } - if err := gitRun(ctx, opts.RepoPath, "checkout", "-B", branch); err != nil { - return err - } - } - return nil - } - if err := os.MkdirAll(opts.RepoPath, 0o750); err != nil { - return fmt.Errorf("mkdir share repo: %w", err) - } - if err := gitRun(ctx, opts.RepoPath, "init"); err != nil { - return err - } - if branch := normalizeBranch(opts.Branch); branch != "" { - if err := gitRun(ctx, opts.RepoPath, "checkout", "-B", branch); err != nil { - return err - } - } - return nil + return mirror.EnsureRepo(ctx, mirrorOptions(opts)) } func Pull(ctx context.Context, opts Options) error { if strings.TrimSpace(opts.Remote) == "" { return EnsureRepo(ctx, opts) } - if err := EnsureRepo(ctx, opts); err != nil { - return err - } - if err := gitRun(ctx, opts.RepoPath, "fetch", "--prune", "origin"); err != nil { + if err := mirror.EnsureRemote(ctx, mirrorOptions(opts)); err != nil { return err } - branch := normalizeBranch(opts.Branch) - remoteRef := "refs/remotes/origin/" + branch - _, remoteErr := gitOutput(ctx, opts.RepoPath, "rev-parse", "--verify", remoteRef) - _, localErr := gitOutput(ctx, opts.RepoPath, "rev-parse", "--verify", "refs/heads/"+branch) - if localErr == nil { - if err := gitRun(ctx, opts.RepoPath, "checkout", branch); err != nil { - return err - } - if remoteErr != nil { - return nil - } - if err := gitRun(ctx, opts.RepoPath, "merge", "--ff-only", "origin/"+branch); err != nil { - return fmt.Errorf("fast-forward %s from origin/%s: %w", branch, branch, err) - } - return nil - } - if remoteErr != nil { - return gitRun(ctx, opts.RepoPath, "checkout", "-B", branch) - } - return gitRun(ctx, opts.RepoPath, "checkout", "-B", branch, "origin/"+branch) + pullOpts := mirrorOptions(opts) + pullOpts.Remote = "" + return mirror.PullCurrent(ctx, pullOpts) } func Commit(ctx context.Context, opts Options, message string) (bool, error) { - if err := gitRun(ctx, opts.RepoPath, "add", "."); err != nil { - return false, err - } - out, err := gitOutput(ctx, opts.RepoPath, "status", "--porcelain") - if err != nil { - return false, err - } - if strings.TrimSpace(out) == "" { - return false, nil - } if strings.TrimSpace(message) == "" { message = "sync: slack archive" } - if err := gitRun(ctx, opts.RepoPath, - "-c", "commit.gpgsign=false", - "-c", "user.name=slacrawl", - "-c", "user.email=slacrawl@example.invalid", - "commit", "-m", message, - ); err != nil { - return false, err - } - return true, nil + return mirror.CommitPaths(ctx, mirrorOptions(opts), message, []string{"."}) } func Push(ctx context.Context, opts Options) error { - branch := normalizeBranch(opts.Branch) - out, err := gitOutput(ctx, opts.RepoPath, "push", "-u", "origin", branch) - if err == nil { - return nil + if strings.TrimSpace(opts.Tag) == "" { + return mirror.Push(ctx, mirrorOptions(opts)) } - if !isNonFastForwardPush(out) { - return fmt.Errorf("git push -u origin %s: %w\n%s", branch, err, strings.TrimSpace(out)) + return mirror.PushAtomic(ctx, mirrorOptions(opts), "HEAD:refs/heads/"+normalizeBranch(opts.Branch), "refs/tags/"+strings.TrimSpace(opts.Tag)) +} + +func ValidateTag(ctx context.Context, opts Options) error { + if strings.TrimSpace(opts.Tag) == "" { + return nil } - if pullErr := gitRun(ctx, opts.RepoPath, "pull", "--rebase", "--autostash", "origin", branch); pullErr != nil { - return fmt.Errorf("rebase before push retry: %w", pullErr) + if err := Pull(ctx, opts); err != nil { + return err } - return gitRun(ctx, opts.RepoPath, "push", "-u", "origin", branch) + return mirror.ValidateTag(ctx, mirrorOptions(opts), opts.Tag) +} + +func CreateImmutableTag(ctx context.Context, opts Options) (string, error) { + return mirror.CreateImmutableTag(ctx, mirrorOptions(opts), opts.Tag) } func Export(ctx context.Context, s *store.Store, opts Options) (Manifest, error) { @@ -217,7 +152,12 @@ func Export(ctx context.Context, s *store.Store, opts Options) (Manifest, error) } func exportLocked(ctx context.Context, s *store.Store, opts Options) (Manifest, error) { - if err := EnsureRepo(ctx, opts); err != nil { + if strings.TrimSpace(opts.Remote) != "" { + if err := mirror.EnsureRemote(ctx, mirrorOptions(opts)); err != nil { + return Manifest{}, err + } + } + if err := mirror.SyncForWrite(ctx, mirrorOptions(opts)); err != nil { return Manifest{}, err } dataDir := filepath.Join(opts.RepoPath, "tables") @@ -489,6 +429,10 @@ func ReadManifest(repoPath string) (Manifest, error) { } return Manifest{}, fmt.Errorf("read share manifest: %w", err) } + return parseManifest(data) +} + +func parseManifest(data []byte) (Manifest, error) { var manifest Manifest if err := json.Unmarshal(data, &manifest); err != nil { return Manifest{}, fmt.Errorf("parse share manifest: %w", err) @@ -499,6 +443,71 @@ func ReadManifest(repoPath string) (Manifest, error) { return manifest, nil } +// ImportAt restores a snapshot from a Git ref without changing the share checkout. +func ImportAt(ctx context.Context, s *store.Store, opts Options, ref string) (Manifest, error) { + ref = strings.TrimSpace(ref) + if ref == "" { + return Import(ctx, s, opts) + } + if err := mirror.Fetch(ctx, mirrorOptions(opts)); err != nil { + return Manifest{}, err + } + manifestBody, commit, err := mirror.ReadFileAt(ctx, mirrorOptions(opts), ref, ManifestName) + if err != nil { + return Manifest{}, err + } + manifest, err := parseManifest(manifestBody) + if err != nil { + return Manifest{}, err + } + tempDir, err := os.MkdirTemp("", "slacrawl-share-ref-*") + if err != nil { + return Manifest{}, fmt.Errorf("create historical share directory: %w", err) + } + defer func() { _ = os.RemoveAll(tempDir) }() + if err := os.WriteFile(filepath.Join(tempDir, ManifestName), manifestBody, 0o600); err != nil { + return Manifest{}, fmt.Errorf("write historical manifest: %w", err) + } + for _, table := range manifest.Tables { + for _, file := range tableManifestFiles(table) { + if err := materializeRefFile(ctx, mirrorOptions(opts), commit, file, tempDir); err != nil { + return Manifest{}, err + } + } + } + if opts.IncludeMedia && manifest.Media != nil { + for _, item := range manifest.Media.Items { + if err := materializeRefFile(ctx, mirrorOptions(opts), commit, item.Path, tempDir); err != nil { + return Manifest{}, err + } + } + } + historicalOpts := opts + historicalOpts.RepoPath = tempDir + historicalOpts.Remote = "" + historicalOpts.Tag = "" + return Import(ctx, s, historicalOpts) +} + +func materializeRefFile(ctx context.Context, opts mirror.Options, ref, filePath, targetRoot string) error { + clean := path.Clean(filepath.ToSlash(strings.TrimSpace(filePath))) + if clean == "." || clean == ".." || path.IsAbs(clean) || strings.HasPrefix(clean, "../") || strings.ContainsRune(clean, '\x00') { + return fmt.Errorf("invalid historical share path %q", filePath) + } + body, _, err := mirror.ReadFileAt(ctx, opts, ref, clean) + if err != nil { + return err + } + target := filepath.Join(targetRoot, filepath.FromSlash(clean)) + if err := os.MkdirAll(filepath.Dir(target), 0o750); err != nil { + return fmt.Errorf("create historical share directory: %w", err) + } + if err := os.WriteFile(target, body, 0o600); err != nil { + return fmt.Errorf("write historical share file %s: %w", clean, err) + } + return nil +} + func exportTable(ctx context.Context, db *sql.DB, dataDir, table string) (TableManifest, error) { rows, err := db.QueryContext(ctx, "select * from "+quoteIdent(table)) //nolint:gosec // Table names are emitted through quoteIdent from export metadata. if err != nil { @@ -1194,29 +1203,13 @@ func normalizeBranch(branch string) string { return strings.TrimSpace(branch) } -func gitRun(ctx context.Context, dir string, args ...string) error { - out, err := gitOutput(ctx, dir, args...) - if err != nil { - return fmt.Errorf("git %s: %w\n%s", strings.Join(args, " "), err, strings.TrimSpace(out)) - } - return nil -} - -func gitOutput(ctx context.Context, dir string, args ...string) (string, error) { - //nolint:gosec // This helper only invokes git with caller-controlled subcommands. - cmd := exec.CommandContext(ctx, "git", args...) - if dir != "" { - cmd.Dir = dir +func mirrorOptions(opts Options) mirror.Options { + return mirror.Options{ + RepoPath: strings.TrimSpace(opts.RepoPath), + Remote: strings.TrimSpace(opts.Remote), + Branch: normalizeBranch(opts.Branch), + DirMode: 0o750, } - body, err := cmd.CombinedOutput() - return string(body), err -} - -func isNonFastForwardPush(out string) bool { - lower := strings.ToLower(out) - return strings.Contains(lower, "non-fast-forward") || - strings.Contains(lower, "fetch first") || - strings.Contains(lower, "failed to push some refs") } func parseSyncTime(raw string) time.Time { diff --git a/internal/share/share_test.go b/internal/share/share_test.go index c2fa04b..c7af599 100644 --- a/internal/share/share_test.go +++ b/internal/share/share_test.go @@ -5,7 +5,9 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "fmt" "os" + "os/exec" "path/filepath" "strings" "testing" @@ -149,23 +151,23 @@ func TestPullPreservesLocalCommitsAheadOfOrigin(t *testing.T) { remoteRepo := filepath.Join(dir, "remote.git") shareRepo := filepath.Join(dir, "share") - require.NoError(t, gitRun(ctx, "", "init", "-b", "main", remoteWork)) + require.NoError(t, testGitRun(ctx, "", "init", "-b", "main", remoteWork)) require.NoError(t, os.WriteFile(filepath.Join(remoteWork, "manifest.json"), []byte("{}\n"), 0o600)) testGitCommit(t, ctx, remoteWork, "seed") - require.NoError(t, gitRun(ctx, "", "clone", "--bare", remoteWork, remoteRepo)) + require.NoError(t, testGitRun(ctx, "", "clone", "--bare", remoteWork, remoteRepo)) opts := Options{RepoPath: shareRepo, Remote: remoteRepo, Branch: "main"} require.NoError(t, Pull(ctx, opts)) require.NoError(t, os.WriteFile(filepath.Join(shareRepo, "local.txt"), []byte("local\n"), 0o600)) testGitCommit(t, ctx, shareRepo, "local") - localHead, err := gitOutput(ctx, shareRepo, "rev-parse", "HEAD") + localHead, err := testGitOutput(ctx, shareRepo, "rev-parse", "HEAD") require.NoError(t, err) - originHead, err := gitOutput(ctx, shareRepo, "rev-parse", "origin/main") + originHead, err := testGitOutput(ctx, shareRepo, "rev-parse", "origin/main") require.NoError(t, err) require.NotEqual(t, strings.TrimSpace(originHead), strings.TrimSpace(localHead)) require.NoError(t, Pull(ctx, opts)) - afterHead, err := gitOutput(ctx, shareRepo, "rev-parse", "HEAD") + afterHead, err := testGitOutput(ctx, shareRepo, "rev-parse", "HEAD") require.NoError(t, err) require.Equal(t, strings.TrimSpace(localHead), strings.TrimSpace(afterHead)) } @@ -177,13 +179,13 @@ func TestPullInitializesRequestedRemoteBranchOnClone(t *testing.T) { remoteRepo := filepath.Join(dir, "remote.git") shareRepo := filepath.Join(dir, "share") - require.NoError(t, gitRun(ctx, "", "init", "-b", "main", remoteWork)) + require.NoError(t, testGitRun(ctx, "", "init", "-b", "main", remoteWork)) require.NoError(t, os.WriteFile(filepath.Join(remoteWork, "manifest.json"), []byte("release\n"), 0o600)) testGitCommit(t, ctx, remoteWork, "release") - require.NoError(t, gitRun(ctx, remoteWork, "branch", "release")) + require.NoError(t, testGitRun(ctx, remoteWork, "branch", "release")) require.NoError(t, os.WriteFile(filepath.Join(remoteWork, "manifest.json"), []byte("main\n"), 0o600)) testGitCommit(t, ctx, remoteWork, "main") - require.NoError(t, gitRun(ctx, "", "clone", "--bare", remoteWork, remoteRepo)) + require.NoError(t, testGitRun(ctx, "", "clone", "--bare", remoteWork, remoteRepo)) opts := Options{RepoPath: shareRepo, Remote: remoteRepo, Branch: "release"} require.NoError(t, Pull(ctx, opts)) @@ -217,6 +219,48 @@ func TestImportIfChangedSkipsCurrentManifest(t *testing.T) { require.False(t, changed) } +func TestImportAtRestoresTaggedSnapshotWithoutMovingCheckout(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + source := seedStore(t, filepath.Join(dir, "source.db")) + defer func() { require.NoError(t, source.Close()) }() + + opts := Options{RepoPath: filepath.Join(dir, "share"), Branch: "main", Tag: "snapshot-old"} + _, err := Export(ctx, source, opts) + require.NoError(t, err) + committed, err := Commit(ctx, opts, "old snapshot") + require.NoError(t, err) + require.True(t, committed) + tag, err := CreateImmutableTag(ctx, opts) + require.NoError(t, err) + require.Equal(t, "snapshot-old", tag) + + _, err = source.DB().ExecContext(ctx, `update messages set text = 'new snapshot', normalized_text = 'new snapshot'`) + require.NoError(t, err) + opts.Tag = "" + _, err = Export(ctx, source, opts) + require.NoError(t, err) + committed, err = Commit(ctx, opts, "new snapshot") + require.NoError(t, err) + require.True(t, committed) + headBefore, err := testGitOutput(ctx, opts.RepoPath, "rev-parse", "HEAD") + require.NoError(t, err) + + reader, err := store.Open(filepath.Join(dir, "reader.db")) + require.NoError(t, err) + defer func() { require.NoError(t, reader.Close()) }() + manifest, err := ImportAt(ctx, reader, opts, "snapshot-old") + require.NoError(t, err) + require.False(t, manifest.GeneratedAt.IsZero()) + rows, err := reader.Search(ctx, "", "archive", 10) + require.NoError(t, err) + require.Len(t, rows, 1) + require.Equal(t, "git backed archive works", rows[0].Text) + headAfter, err := testGitOutput(ctx, opts.RepoPath, "rev-parse", "HEAD") + require.NoError(t, err) + require.Equal(t, strings.TrimSpace(headBefore), strings.TrimSpace(headAfter)) +} + func TestExportImportRestoresMediaFiles(t *testing.T) { ctx := context.Background() dir := t.TempDir() @@ -347,8 +391,8 @@ func assertArchiveStillPresent(t *testing.T, ctx context.Context, s *store.Store func testGitCommit(t *testing.T, ctx context.Context, repoPath string, message string) { t.Helper() - require.NoError(t, gitRun(ctx, repoPath, "add", ".")) - require.NoError(t, gitRun(ctx, repoPath, + require.NoError(t, testGitRun(ctx, repoPath, "add", ".")) + require.NoError(t, testGitRun(ctx, repoPath, "-c", "commit.gpgsign=false", "-c", "user.name=slacrawl-test", "-c", "user.email=slacrawl-test@example.invalid", @@ -356,6 +400,23 @@ func testGitCommit(t *testing.T, ctx context.Context, repoPath string, message s )) } +func testGitRun(ctx context.Context, dir string, args ...string) error { + _, err := testGitOutput(ctx, dir, args...) + return err +} + +func testGitOutput(ctx context.Context, dir string, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, "git", args...) + if dir != "" { + cmd.Dir = dir + } + body, err := cmd.CombinedOutput() + if err != nil { + return string(body), fmt.Errorf("git %s: %w: %s", strings.Join(args, " "), err, strings.TrimSpace(string(body))) + } + return string(body), nil +} + func seedStore(t *testing.T, path string) *store.Store { t.Helper() s, err := store.Open(path) diff --git a/internal/store/store.go b/internal/store/store.go index 24c37fa..fd978ce 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -1318,7 +1318,7 @@ func (s *Store) SearchMessages(ctx context.Context, opts SearchOptions) ([]Messa case SearchModeRawFTS: return s.searchFTS(ctx, opts.WorkspaceID, query, opts.Limit) case SearchModePhrase: - return s.searchFTS(ctx, opts.WorkspaceID, quoteFTS5Phrase(query), opts.Limit) + return s.searchFTS(ctx, opts.WorkspaceID, crawlstore.FTS5Phrase(query), opts.Limit) case SearchModeTerms: return s.searchFTS(ctx, opts.WorkspaceID, termsFTS5Query(query), opts.Limit) case SearchModeAuto: @@ -1329,7 +1329,7 @@ func (s *Store) SearchMessages(ctx context.Context, opts SearchOptions) ([]Messa } func (s *Store) searchAuto(ctx context.Context, workspaceID string, query string, limit int) ([]MessageRow, error) { - candidates := []string{quoteFTS5Phrase(query)} + candidates := []string{crawlstore.FTS5Phrase(query)} if terms := termsFTS5Query(query); terms != "" && terms != candidates[0] { candidates = append(candidates, terms) } @@ -1400,18 +1400,14 @@ limit ? return out, s.resolveMessageRowMentions(ctx, out) } -func quoteFTS5Phrase(query string) string { - return `"` + strings.ReplaceAll(strings.TrimSpace(query), `"`, `""`) + `"` -} - func termsFTS5Query(query string) string { terms := searchTerms(query) if len(terms) == 0 { - return quoteFTS5Phrase(query) + return crawlstore.FTS5Phrase(query) } quoted := make([]string, 0, len(terms)) for _, term := range terms { - quoted = append(quoted, quoteFTS5Phrase(term)) + quoted = append(quoted, crawlstore.FTS5Phrase(term)) } return strings.Join(quoted, " AND ") }