diff --git a/CLAUDE.md b/CLAUDE.md index 769f4c8e6..4b3e3d0a4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -437,6 +437,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran - **Shadow branch migration** - if user does stash/pull/rebase (HEAD changes without commit), shadow branch is automatically moved to new base commit - **Orphaned branch cleanup** - if a shadow branch exists without a corresponding session state file, it is automatically reset when a new session starts - PrePush hook can push `entire/checkpoints/v1` branch alongside user pushes +- **OPF (OpenAI Privacy Filter) runs at pre-push, not post-commit**: when `redaction.openai_privacy_filter.enabled` is true, the PrePush hook re-redacts unpushed `entire/checkpoints/v1` commits with the OPF 8th layer, builds new commits carrying an `Entire-OPF-Applied: true` trailer, and atomically updates the local v1 ref before pushing. Per-commit condensation stays on the fast 7-layer pipeline. See `strategy/manual_commit_opf_rewrite.go` and `docs/security-and-privacy.md` for the full flow, including divergence detection, bootstrap caps, and CAS-on-conflict semantics. - Safe to use on main/master since it never modifies commit history #### Key Files @@ -444,6 +445,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran - `strategy.go` - Interface definition and context structs (`StepContext`, `TaskStepContext`, `RewindPoint`, etc.) - `common.go` - Helpers for metadata extraction, tree building, rewind validation, `ListCheckpoints()` - `manual_commit*.go` - Manual-commit strategy: main impl, types, session state, condensation, rewind, git ops, logs, hook handlers (prepare-commit-msg, post-commit, post-rewrite, pre-push), reset +- `manual_commit_opf_rewrite.go` - Pre-push OPF re-redaction: walks unpushed v1 commits, runs OPF over their blobs, rebuilds commits with `Entire-OPF-Applied: true` trailer, CAS-updates the local ref. Sentinel error types (use `errors.As`): `V1DivergedError`, `BootstrapTooLargeError`, `V1RefMovedError`, `OPFRuntimeFailedError`. - `cleanup.go` - Cleanup discovery/deletion for shadow branches, session states, and checkpoint metadata - `session_state.go` - Package-level session state functions - `hooks.go` - Git hook installation diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 5bf71e886..6fb79bf3b 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -221,7 +221,9 @@ type WriteCommittedOptions struct { // Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources). Transcript redact.RedactedBytes - // Prompts contains user prompts from the session + // Prompts contains the raw user prompts from the session. Run through + // redactedJoinedPrompts before persisting — the writer does this + // inside writeSessionToSubdirectory. Prompts []string // FilesTouched are files modified during the session @@ -360,7 +362,8 @@ type UpdateCommittedOptions struct { // Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources). Transcript redact.RedactedBytes - // Prompts contains all user prompts (replaces existing) + // Prompts contains the raw user prompts (replaces existing). + // See WriteCommittedOptions.Prompts. Prompts []string // Agent identifies the agent type (needed for transcript chunking) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index d97220c5f..a6629b47f 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -81,7 +81,7 @@ func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) { store := NewGitStore(repo) entries := make(map[string]object.TreeEntry) - err = store.copyMetadataDir(metadataDir, "checkpoint/", entries) + err = store.copyMetadataDir(context.Background(), metadataDir, "checkpoint/", entries) if err != nil { t.Fatalf("copyMetadataDir failed: %v", err) } @@ -3409,7 +3409,7 @@ func TestCopyMetadataDir_RedactsSecrets(t *testing.T) { store := NewGitStore(repo) entries := make(map[string]object.TreeEntry) - if err := store.copyMetadataDir(metadataDir, "cp/", entries); err != nil { + if err := store.copyMetadataDir(context.Background(), metadataDir, "cp/", entries); err != nil { t.Fatalf("copyMetadataDir() error = %v", err) } @@ -4410,6 +4410,45 @@ func TestCheckpointSummary_HasReview(t *testing.T) { } } +// TestRedactBlobBytes_JSONMetadata pins the .json branch of RedactBlobBytes: +// checkpoint metadata files (metadata.json) carry free-form fields like +// Summary.Intent and ReviewPrompt that previously bypassed redaction because +// the dispatcher only matched .jsonl. The PR 1236 fix extended the JSON-aware +// branch to .json. We assert via a low-entropy AWS-key shaped secret (catches +// the 7-layer pipeline) so the test stays deterministic without the OPF binary. +func TestRedactBlobBytes_JSONMetadata(t *testing.T) { + t.Parallel() + + meta := CommittedMetadata{ + Kind: "agent_review", + ReviewPrompt: "credential leak: key=AKIAYRWQG5EJLPZLBYNP", + Summary: &Summary{ + Intent: "leak: key=AKIAYRWQG5EJLPZLBYNP", + }, + } + b, err := json.Marshal(meta) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + got := RedactBlobBytes(context.Background(), b, "metadata.json", false) + if strings.Contains(string(got), "AKIAYRWQG5EJLPZLBYNP") { + t.Errorf("expected AWS key redacted in metadata.json blob, got %s", string(got)) + } + if !strings.Contains(string(got), "REDACTED") { + t.Errorf("expected REDACTED placeholder in metadata.json blob, got %s", string(got)) + } + // JSON structure must survive — Kind is not redactable content, so it + // should round-trip through the JSON-aware redactor. + var roundTripped map[string]any + if err := json.Unmarshal(got, &roundTripped); err != nil { + t.Errorf("redacted .json blob must remain valid JSON, got parse err %v (content: %s)", err, string(got)) + } + if roundTripped["kind"] != "agent_review" { + t.Errorf(`expected "kind":"agent_review" preserved after redaction, got %v`, roundTripped["kind"]) + } +} + // TestCheckpointSummary_HasInvestigation pins the JSON wire format for the // HasInvestigation umbrella flag on CheckpointSummary. Mirrors the // HasReview test: callers depend on the on-disk shape, so this asserts on diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 68f7bfb87..387598b60 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -355,7 +355,7 @@ func (s *GitStore) writeStandardCheckpointEntries(ctx context.Context, opts Writ // Copy additional metadata files from directory if specified (to session subdirectory) if opts.MetadataDir != "" { - if err := s.copyMetadataDir(opts.MetadataDir, sessionPath, entries); err != nil { + if err := s.copyMetadataDir(ctx, opts.MetadataDir, sessionPath, entries); err != nil { return fmt.Errorf("failed to copy metadata directory: %w", err) } } @@ -418,9 +418,10 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName } - // Write prompts + // Write prompts via the 7-layer pipeline. OPF runs only in the + // pre-push rewrite path (manual_commit_opf_rewrite.go). if len(opts.Prompts) > 0 { - promptContent := redact.String(JoinPrompts(opts.Prompts)) + promptContent := redactedJoinedPrompts(opts.Prompts) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return filePaths, err @@ -1521,9 +1522,9 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti } } - // Replace prompts (apply redaction as safety net) + // Replace prompts with 7-layer-redacted content. if len(opts.Prompts) > 0 { - promptContent := redact.String(JoinPrompts(opts.Prompts)) + promptContent := redactedJoinedPrompts(opts.Prompts) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return fmt.Errorf("failed to create prompt blob: %w", err) @@ -1843,7 +1844,7 @@ func CreateBlobFromContent(repo *git.Repository, content []byte) (plumbing.Hash, // copyMetadataDir copies all files from a directory to the checkpoint path. // Used to include additional metadata files like task checkpoints, subagent transcripts, etc. -func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[string]object.TreeEntry) error { +func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath string, entries map[string]object.TreeEntry) error { err := filepath.Walk(metadataDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -1882,7 +1883,13 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str return fmt.Errorf("path traversal detected: %s", relPath) } - // Create blob from file with secrets redaction + // Create blob from file with 7-layer secrets redaction. + // Post-commit emits 7-layer-only blobs; the pre-push rewrite + // (strategy/manual_commit_opf_rewrite.go) walks the resulting + // tree, re-redacts these blobs with OPF when enabled, and + // rewrites entire/checkpoints/v1 into 8-layer commits before + // they leave the local machine. + _ = ctx // ctx not needed by the 7-layer path; kept on caller signature for future use blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) @@ -1904,8 +1911,13 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str return nil } -// createRedactedBlobFromFile reads a file, applies secrets redaction, and creates a git blob. -// JSONL files get JSONL-aware redaction; all other files get plain string redaction. +// createRedactedBlobFromFile reads a file, applies the 7-layer redaction +// pipeline, and creates a git blob. Used by committed-checkpoint writes +// at post-commit time. The OpenAI Privacy Filter is intentionally NOT +// run here — OPF lives in the pre-push rewrite path +// (strategy/manual_commit_opf_rewrite.go), which re-redacts the 7-layer +// blobs into 8-layer commits before they leave the local machine. +// JSONL files get JSONL-aware redaction; all other files get plain byte redaction. func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) { info, err := os.Stat(filePath) if err != nil { @@ -1933,16 +1945,7 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) return hash, mode, nil } - if strings.HasSuffix(treePath, ".jsonl") { - redacted, jsonlErr := redact.JSONLBytes(content) - if jsonlErr != nil { - content = redact.Bytes(content) - } else { - content = redacted.Bytes() - } - } else { - content = redact.Bytes(content) - } + content = RedactBlobBytes(context.Background(), content, treePath, false) hash, err := CreateBlobFromContent(repo, content) if err != nil { @@ -1951,6 +1954,44 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) return hash, mode, nil } +// RedactBlobBytes redacts a single blob's content given its tree path. +// JSON-shaped files (.jsonl or .json) get JSON-aware redaction (falling +// back to plain bytes on parse failure so regex/credential layers +// still apply); other files get plain byte redaction. When +// usePrivacyFilter is true the full 8-layer pipeline (including OPF) +// runs; otherwise the 7-layer pipeline. +// +// .json is handled alongside .jsonl because checkpoint metadata files +// (metadata.json, per-session metadata.json) carry free-form fields +// like Summary.Intent / Summary.Outcome / ReviewPrompt that can +// contain PII the regex layers miss. The JSON-aware redactor extracts +// string leaves and applies OPF only to those, preserving the JSON +// structure. +// +// Post-commit condensation uses false (fast path). The pre-push rewrite +// (strategy/manual_commit_opf_rewrite.go) uses true. +func RedactBlobBytes(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte { + if strings.HasSuffix(treePath, ".jsonl") || strings.HasSuffix(treePath, ".json") { + var ( + redacted redact.RedactedBytes + err error + ) + if usePrivacyFilter { + redacted, err = redact.JSONLBytesWithPrivacyFilter(ctx, content) + } else { + redacted, err = redact.JSONLBytes(content) + } + if err == nil { + return redacted.Bytes() + } + // JSONL parse failed — fall through to plain bytes. + } + if usePrivacyFilter { + return redact.BytesWithPrivacyFilter(ctx, content) + } + return redact.Bytes(content) +} + // GetGitAuthorFromRepo retrieves the git user.name and user.email, // checking both the repository-local config and the global ~/.gitconfig. func GetGitAuthorFromRepo(repo *git.Repository) (name, email string) { diff --git a/cmd/entire/cli/checkpoint/committed_opf_trailer_test.go b/cmd/entire/cli/checkpoint/committed_opf_trailer_test.go new file mode 100644 index 000000000..b73145537 --- /dev/null +++ b/cmd/entire/cli/checkpoint/committed_opf_trailer_test.go @@ -0,0 +1,70 @@ +package checkpoint + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/testutil" + "github.com/entireio/cli/cmd/entire/cli/trailers" + "github.com/entireio/cli/redact" + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/object" + "github.com/stretchr/testify/require" +) + +// TestWriteCommitted_DoesNotEmitOPFAppliedTrailer is the regression guard +// for the architectural promise: standard post-commit condensation writes +// 7-layer-only blobs and MUST NOT mark them with the Entire-OPF-Applied +// trailer. The trailer is emitted exclusively by the pre-push rewrite +// path; if a future change accidentally added it to the standard writer, +// the pre-push rewrite would skip those commits (HasOPFApplied true → +// reparent-only, no actual OPF run) and ship 7-layer content as if it +// were 8-layer. This test pins down that contract. +func TestWriteCommitted_DoesNotEmitOPFAppliedTrailer(t *testing.T) { + t.Parallel() + + tempDir := t.TempDir() + testutil.InitRepo(t, tempDir) + repo, err := git.PlainOpen(tempDir) + require.NoError(t, err) + + wt, err := repo.Worktree() + require.NoError(t, err) + readmeFile := filepath.Join(tempDir, "README.md") + require.NoError(t, os.WriteFile(readmeFile, []byte("# Test"), 0o644)) + _, err = wt.Add("README.md") + require.NoError(t, err) + _, err = wt.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com"}, + }) + require.NoError(t, err) + + store := NewGitStore(repo) + cpID := id.MustCheckpointID("a1b2c3d4e5f6") + + err = store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "regression-no-opf-trailer", + Strategy: "manual-commit", + Transcript: redact.AlreadyRedacted([]byte(`{"role":"user","content":"hello"}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + // Read the latest commit message on entire/checkpoints/v1 and assert + // HasOPFApplied is false. We resolve via the ref then walk back the + // single commit the writer just produced. + ref, err := repo.Reference(plumbing.NewBranchReferenceName("entire/checkpoints/v1"), true) + require.NoError(t, err, "writer should have created entire/checkpoints/v1") + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + + if trailers.HasOPFApplied(commit.Message) { + t.Errorf("standard WriteCommitted emitted Entire-OPF-Applied trailer; commit message:\n%s", commit.Message) + } +} diff --git a/cmd/entire/cli/checkpoint/prompts.go b/cmd/entire/cli/checkpoint/prompts.go index fc8d26d35..6df096ce3 100644 --- a/cmd/entire/cli/checkpoint/prompts.go +++ b/cmd/entire/cli/checkpoint/prompts.go @@ -1,6 +1,10 @@ package checkpoint -import "strings" +import ( + "strings" + + "github.com/entireio/cli/redact" +) // PromptSeparator is the canonical separator used in prompt.txt when multiple // prompts are stored in a single file. @@ -23,3 +27,10 @@ func SplitPromptContent(content string) []string { } return prompts } + +// redactedJoinedPrompts joins prompts and runs the 7-layer redaction +// pipeline. OPF runs exclusively in the pre-push rewrite (not here), +// so the writer's hot path stays predictable. +func redactedJoinedPrompts(prompts []string) string { + return redact.String(strings.Join(prompts, PromptSeparator)) +} diff --git a/cmd/entire/cli/checkpoint/prompts_test.go b/cmd/entire/cli/checkpoint/prompts_test.go index 4b1119625..93f4f3179 100644 --- a/cmd/entire/cli/checkpoint/prompts_test.go +++ b/cmd/entire/cli/checkpoint/prompts_test.go @@ -14,7 +14,6 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) { "first line\nwith newline", "second prompt", } - joined := JoinPrompts(original) split := SplitPromptContent(joined) @@ -24,6 +23,15 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) { func TestSplitPromptContent_EmptyContent(t *testing.T) { t.Parallel() - assert.Nil(t, SplitPromptContent("")) } + +// TestRedactedJoinedPrompts_AppliesSafetyNet verifies the helper joins +// prompts with the canonical separator and runs them through the 7-layer +// pipeline. OPF runs only in the pre-push rewrite path, never here. +func TestRedactedJoinedPrompts_AppliesSafetyNet(t *testing.T) { + t.Parallel() + got := redactedJoinedPrompts([]string{"hello", "world"}) + assert.NotEmpty(t, got) + assert.Contains(t, got, PromptSeparator) +} diff --git a/cmd/entire/cli/hooks_git_cmd.go b/cmd/entire/cli/hooks_git_cmd.go index 27ef45f28..f8729b94c 100644 --- a/cmd/entire/cli/hooks_git_cmd.go +++ b/cmd/entire/cli/hooks_git_cmd.go @@ -2,6 +2,7 @@ package cli import ( "context" + "fmt" "log/slog" "time" @@ -231,6 +232,13 @@ func newHooksGitPrePushCmd() *cobra.Command { Use: "pre-push ", Short: "Handle pre-push git hook", Args: cobra.ExactArgs(1), + // SilenceUsage/Errors so non-zero exits from privacy-critical + // failures (OPF rewrite errors) print only the error message, + // not cobra's usage banner. The error message itself already + // includes user guidance (see ErrV1Diverged / ErrBootstrapTooLarge / + // ErrV1RefMoved in strategy/manual_commit_opf_rewrite.go). + SilenceUsage: true, + SilenceErrors: false, RunE: func(cmd *cobra.Command, args []string) error { if gitHooksDisabled { return nil @@ -245,7 +253,20 @@ func newHooksGitPrePushCmd() *cobra.Command { hookErr := g.strategy.PrePush(g.ctx, remote) g.logCompleted(hookErr) - return nil + // Propagate the error so the hook script exits non-zero and + // git push aborts the entire batch. PrePush itself only + // returns errors for privacy-critical failures (OPF rewrite — + // e.g., V1DivergedError, BootstrapTooLargeError, + // V1RefMovedError, OPFRuntimeFailedError); transient + // checkpoint-push failures are logged and swallowed before + // reaching this point. See strategy/manual_commit_push.go + // for the contract. We wrap with a short "pre-push:" prefix + // so the user sees the source of the abort without losing + // the underlying type (errors.As still finds the sentinels). + if hookErr == nil { + return nil + } + return fmt.Errorf("pre-push: %w", hookErr) }, } } diff --git a/cmd/entire/cli/settings/settings.go b/cmd/entire/cli/settings/settings.go index fe2bef0a9..ddc44f125 100644 --- a/cmd/entire/cli/settings/settings.go +++ b/cmd/entire/cli/settings/settings.go @@ -19,6 +19,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/logging" "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/session" + "github.com/entireio/cli/redact" ) const ( @@ -204,6 +205,10 @@ type RedactionSettings struct { // "[REDACTED_