Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
083292d
feat(redact): add OpenAI Privacy Filter as opt-in 8th layer
peyton-alt May 14, 2026
8123aef
feat(checkpoint): wire OPF into committed-checkpoint writes only
peyton-alt May 14, 2026
6008464
docs(redact): document Optional OpenAI Privacy Filter setup
peyton-alt May 14, 2026
672044c
fix(checkpoint): drop unused strings import in prompts_test.go
peyton-alt May 14, 2026
295d7be
refactor(redact): type-wrap pre-redacted prompts; move OPF test helpers
peyton-alt May 15, 2026
cd4d873
fix(review): make manifest_test session-state tests time-relative
peyton-alt May 15, 2026
42a0927
feat(redact): OPF runs at pre-push, not commit time
peyton-alt May 15, 2026
36ced06
feat(redact): interactive prompt + ENTIRE_OPF env var before pre-push…
peyton-alt May 15, 2026
6d54ea4
perf(redact): scope OPF rewrite to the commit's own shard
peyton-alt May 15, 2026
e1a0ce0
feat(strategy): delete shadow branches once their checkpoints are pushed
peyton-alt May 19, 2026
337f1e1
refactor(redact): remove dead OPF abstractions
peyton-alt May 19, 2026
e4a8e46
fix(redact): address PR 1236 review comments
peyton-alt May 20, 2026
8151b3b
fix(redact): address remaining PR 1236 review feedback
peyton-alt May 20, 2026
83b589d
fix(redact): close privacy holes flagged in PR 1236 final review
peyton-alt May 21, 2026
2453cde
Merge remote-tracking branch 'origin/main' into feat/opf-prepush
peyton-alt May 28, 2026
2bc0ee3
fix(checkpoint): add nolint:ireturn to NewCommittedReader
peyton-alt May 28, 2026
658a935
Merge remote-tracking branch 'origin/main' into feat/openai-privacy-f…
peyton-alt Jun 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -437,13 +437,15 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- **Shadow branch migration** - if user does stash/pull/rebase (HEAD changes without commit), shadow branch is automatically moved to new base commit
- **Orphaned branch cleanup** - if a shadow branch exists without a corresponding session state file, it is automatically reset when a new session starts
- PrePush hook can push `entire/checkpoints/v1` branch alongside user pushes
- **OPF (OpenAI Privacy Filter) runs at pre-push, not post-commit**: when `redaction.openai_privacy_filter.enabled` is true, the PrePush hook re-redacts unpushed `entire/checkpoints/v1` commits with the OPF 8th layer, builds new commits carrying an `Entire-OPF-Applied: true` trailer, and atomically updates the local v1 ref before pushing. Per-commit condensation stays on the fast 7-layer pipeline. See `strategy/manual_commit_opf_rewrite.go` and `docs/security-and-privacy.md` for the full flow, including divergence detection, bootstrap caps, and CAS-on-conflict semantics.
- Safe to use on main/master since it never modifies commit history

#### Key Files

- `strategy.go` - Interface definition and context structs (`StepContext`, `TaskStepContext`, `RewindPoint`, etc.)
- `common.go` - Helpers for metadata extraction, tree building, rewind validation, `ListCheckpoints()`
- `manual_commit*.go` - Manual-commit strategy: main impl, types, session state, condensation, rewind, git ops, logs, hook handlers (prepare-commit-msg, post-commit, post-rewrite, pre-push), reset
- `manual_commit_opf_rewrite.go` - Pre-push OPF re-redaction: walks unpushed v1 commits, runs OPF over their blobs, rebuilds commits with `Entire-OPF-Applied: true` trailer, CAS-updates the local ref. Sentinel error types (use `errors.As`): `V1DivergedError`, `BootstrapTooLargeError`, `V1RefMovedError`, `OPFRuntimeFailedError`.
- `cleanup.go` - Cleanup discovery/deletion for shadow branches, session states, and checkpoint metadata
- `session_state.go` - Package-level session state functions
- `hooks.go` - Git hook installation
Expand Down
7 changes: 5 additions & 2 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ type WriteCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains user prompts from the session
// Prompts contains the raw user prompts from the session. Run through
// redactedJoinedPrompts before persisting — the writer does this
// inside writeSessionToSubdirectory.
Prompts []string

// FilesTouched are files modified during the session
Expand Down Expand Up @@ -360,7 +362,8 @@ type UpdateCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains all user prompts (replaces existing)
// Prompts contains the raw user prompts (replaces existing).
// See WriteCommittedOptions.Prompts.
Prompts []string

// Agent identifies the agent type (needed for transcript chunking)
Expand Down
43 changes: 41 additions & 2 deletions cmd/entire/cli/checkpoint/checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) {
store := NewGitStore(repo)
entries := make(map[string]object.TreeEntry)

err = store.copyMetadataDir(metadataDir, "checkpoint/", entries)
err = store.copyMetadataDir(context.Background(), metadataDir, "checkpoint/", entries)
if err != nil {
t.Fatalf("copyMetadataDir failed: %v", err)
}
Expand Down Expand Up @@ -3409,7 +3409,7 @@ func TestCopyMetadataDir_RedactsSecrets(t *testing.T) {
store := NewGitStore(repo)
entries := make(map[string]object.TreeEntry)

if err := store.copyMetadataDir(metadataDir, "cp/", entries); err != nil {
if err := store.copyMetadataDir(context.Background(), metadataDir, "cp/", entries); err != nil {
t.Fatalf("copyMetadataDir() error = %v", err)
}

Expand Down Expand Up @@ -4410,6 +4410,45 @@ func TestCheckpointSummary_HasReview(t *testing.T) {
}
}

// TestRedactBlobBytes_JSONMetadata pins the .json branch of RedactBlobBytes:
// checkpoint metadata files (metadata.json) carry free-form fields like
// Summary.Intent and ReviewPrompt that previously bypassed redaction because
// the dispatcher only matched .jsonl. The PR 1236 fix extended the JSON-aware
// branch to .json. We assert via a low-entropy AWS-key shaped secret (catches
// the 7-layer pipeline) so the test stays deterministic without the OPF binary.
func TestRedactBlobBytes_JSONMetadata(t *testing.T) {
t.Parallel()

meta := CommittedMetadata{
Kind: "agent_review",
ReviewPrompt: "credential leak: key=AKIAYRWQG5EJLPZLBYNP",
Summary: &Summary{
Intent: "leak: key=AKIAYRWQG5EJLPZLBYNP",
},
}
b, err := json.Marshal(meta)
if err != nil {
t.Fatalf("marshal: %v", err)
}

got := RedactBlobBytes(context.Background(), b, "metadata.json", false)
if strings.Contains(string(got), "AKIAYRWQG5EJLPZLBYNP") {
t.Errorf("expected AWS key redacted in metadata.json blob, got %s", string(got))
}
if !strings.Contains(string(got), "REDACTED") {
t.Errorf("expected REDACTED placeholder in metadata.json blob, got %s", string(got))
}
// JSON structure must survive — Kind is not redactable content, so it
// should round-trip through the JSON-aware redactor.
var roundTripped map[string]any
if err := json.Unmarshal(got, &roundTripped); err != nil {
t.Errorf("redacted .json blob must remain valid JSON, got parse err %v (content: %s)", err, string(got))
}
if roundTripped["kind"] != "agent_review" {
t.Errorf(`expected "kind":"agent_review" preserved after redaction, got %v`, roundTripped["kind"])
}
}

// TestCheckpointSummary_HasInvestigation pins the JSON wire format for the
// HasInvestigation umbrella flag on CheckpointSummary. Mirrors the
// HasReview test: callers depend on the on-disk shape, so this asserts on
Expand Down
79 changes: 60 additions & 19 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ func (s *GitStore) writeStandardCheckpointEntries(ctx context.Context, opts Writ

// Copy additional metadata files from directory if specified (to session subdirectory)
if opts.MetadataDir != "" {
if err := s.copyMetadataDir(opts.MetadataDir, sessionPath, entries); err != nil {
if err := s.copyMetadataDir(ctx, opts.MetadataDir, sessionPath, entries); err != nil {
return fmt.Errorf("failed to copy metadata directory: %w", err)
}
}
Expand Down Expand Up @@ -418,9 +418,10 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName
}

// Write prompts
// Write prompts via the 7-layer pipeline. OPF runs only in the
// pre-push rewrite path (manual_commit_opf_rewrite.go).
if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down Expand Up @@ -1521,9 +1522,9 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
}
}

// Replace prompts (apply redaction as safety net)
// Replace prompts with 7-layer-redacted content.
if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -1843,7 +1844,7 @@ func CreateBlobFromContent(repo *git.Repository, content []byte) (plumbing.Hash,

// copyMetadataDir copies all files from a directory to the checkpoint path.
// Used to include additional metadata files like task checkpoints, subagent transcripts, etc.
func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[string]object.TreeEntry) error {
func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath string, entries map[string]object.TreeEntry) error {
err := filepath.Walk(metadataDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
Expand Down Expand Up @@ -1882,7 +1883,13 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
return fmt.Errorf("path traversal detected: %s", relPath)
}

// Create blob from file with secrets redaction
// Create blob from file with 7-layer secrets redaction.
// Post-commit emits 7-layer-only blobs; the pre-push rewrite
// (strategy/manual_commit_opf_rewrite.go) walks the resulting
// tree, re-redacts these blobs with OPF when enabled, and
// rewrites entire/checkpoints/v1 into 8-layer commits before
// they leave the local machine.
_ = ctx // ctx not needed by the 7-layer path; kept on caller signature for future use
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath)
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
Expand All @@ -1904,8 +1911,13 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
return nil
}

// createRedactedBlobFromFile reads a file, applies secrets redaction, and creates a git blob.
// JSONL files get JSONL-aware redaction; all other files get plain string redaction.
// createRedactedBlobFromFile reads a file, applies the 7-layer redaction
// pipeline, and creates a git blob. Used by committed-checkpoint writes
// at post-commit time. The OpenAI Privacy Filter is intentionally NOT
// run here — OPF lives in the pre-push rewrite path
// (strategy/manual_commit_opf_rewrite.go), which re-redacts the 7-layer
// blobs into 8-layer commits before they leave the local machine.
// JSONL files get JSONL-aware redaction; all other files get plain byte redaction.
func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
info, err := os.Stat(filePath)
if err != nil {
Expand Down Expand Up @@ -1933,16 +1945,7 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string)
return hash, mode, nil
}

if strings.HasSuffix(treePath, ".jsonl") {
redacted, jsonlErr := redact.JSONLBytes(content)
if jsonlErr != nil {
content = redact.Bytes(content)
} else {
content = redacted.Bytes()
}
} else {
content = redact.Bytes(content)
}
content = RedactBlobBytes(context.Background(), content, treePath, false)

hash, err := CreateBlobFromContent(repo, content)
if err != nil {
Expand All @@ -1951,6 +1954,44 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string)
return hash, mode, nil
}

// RedactBlobBytes redacts a single blob's content given its tree path.
// JSON-shaped files (.jsonl or .json) get JSON-aware redaction (falling
// back to plain bytes on parse failure so regex/credential layers
// still apply); other files get plain byte redaction. When
// usePrivacyFilter is true the full 8-layer pipeline (including OPF)
// runs; otherwise the 7-layer pipeline.
//
// .json is handled alongside .jsonl because checkpoint metadata files
// (metadata.json, per-session metadata.json) carry free-form fields
// like Summary.Intent / Summary.Outcome / ReviewPrompt that can
// contain PII the regex layers miss. The JSON-aware redactor extracts
// string leaves and applies OPF only to those, preserving the JSON
// structure.
//
// Post-commit condensation uses false (fast path). The pre-push rewrite
// (strategy/manual_commit_opf_rewrite.go) uses true.
func RedactBlobBytes(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte {
if strings.HasSuffix(treePath, ".jsonl") || strings.HasSuffix(treePath, ".json") {
var (
redacted redact.RedactedBytes
err error
)
if usePrivacyFilter {
redacted, err = redact.JSONLBytesWithPrivacyFilter(ctx, content)
} else {
redacted, err = redact.JSONLBytes(content)
}
if err == nil {
return redacted.Bytes()
}
// JSONL parse failed — fall through to plain bytes.
}
if usePrivacyFilter {
return redact.BytesWithPrivacyFilter(ctx, content)
}
return redact.Bytes(content)
}

// GetGitAuthorFromRepo retrieves the git user.name and user.email,
// checking both the repository-local config and the global ~/.gitconfig.
func GetGitAuthorFromRepo(repo *git.Repository) (name, email string) {
Expand Down
70 changes: 70 additions & 0 deletions cmd/entire/cli/checkpoint/committed_opf_trailer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package checkpoint

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/testutil"
"github.com/entireio/cli/cmd/entire/cli/trailers"
"github.com/entireio/cli/redact"
"github.com/go-git/go-git/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/stretchr/testify/require"
)

// TestWriteCommitted_DoesNotEmitOPFAppliedTrailer is the regression guard
// for the architectural promise: standard post-commit condensation writes
// 7-layer-only blobs and MUST NOT mark them with the Entire-OPF-Applied
// trailer. The trailer is emitted exclusively by the pre-push rewrite
// path; if a future change accidentally added it to the standard writer,
// the pre-push rewrite would skip those commits (HasOPFApplied true →
// reparent-only, no actual OPF run) and ship 7-layer content as if it
// were 8-layer. This test pins down that contract.
func TestWriteCommitted_DoesNotEmitOPFAppliedTrailer(t *testing.T) {
t.Parallel()

tempDir := t.TempDir()
testutil.InitRepo(t, tempDir)
repo, err := git.PlainOpen(tempDir)
require.NoError(t, err)

wt, err := repo.Worktree()
require.NoError(t, err)
readmeFile := filepath.Join(tempDir, "README.md")
require.NoError(t, os.WriteFile(readmeFile, []byte("# Test"), 0o644))
_, err = wt.Add("README.md")
require.NoError(t, err)
_, err = wt.Commit("Initial commit", &git.CommitOptions{
Author: &object.Signature{Name: "Test", Email: "test@test.com"},
})
require.NoError(t, err)

store := NewGitStore(repo)
cpID := id.MustCheckpointID("a1b2c3d4e5f6")

err = store.WriteCommitted(context.Background(), WriteCommittedOptions{
CheckpointID: cpID,
SessionID: "regression-no-opf-trailer",
Strategy: "manual-commit",
Transcript: redact.AlreadyRedacted([]byte(`{"role":"user","content":"hello"}` + "\n")),
AuthorName: "Test",
AuthorEmail: "test@test.com",
})
require.NoError(t, err)

// Read the latest commit message on entire/checkpoints/v1 and assert
// HasOPFApplied is false. We resolve via the ref then walk back the
// single commit the writer just produced.
ref, err := repo.Reference(plumbing.NewBranchReferenceName("entire/checkpoints/v1"), true)
require.NoError(t, err, "writer should have created entire/checkpoints/v1")
commit, err := repo.CommitObject(ref.Hash())
require.NoError(t, err)

if trailers.HasOPFApplied(commit.Message) {
t.Errorf("standard WriteCommitted emitted Entire-OPF-Applied trailer; commit message:\n%s", commit.Message)
}
}
13 changes: 12 additions & 1 deletion cmd/entire/cli/checkpoint/prompts.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package checkpoint

import "strings"
import (
"strings"

"github.com/entireio/cli/redact"
)

// PromptSeparator is the canonical separator used in prompt.txt when multiple
// prompts are stored in a single file.
Expand All @@ -23,3 +27,10 @@ func SplitPromptContent(content string) []string {
}
return prompts
}

// redactedJoinedPrompts joins prompts and runs the 7-layer redaction
// pipeline. OPF runs exclusively in the pre-push rewrite (not here),
// so the writer's hot path stays predictable.
func redactedJoinedPrompts(prompts []string) string {
return redact.String(strings.Join(prompts, PromptSeparator))
}
12 changes: 10 additions & 2 deletions cmd/entire/cli/checkpoint/prompts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {
"first line\nwith newline",
"second prompt",
}

joined := JoinPrompts(original)
split := SplitPromptContent(joined)

Expand All @@ -24,6 +23,15 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {

func TestSplitPromptContent_EmptyContent(t *testing.T) {
t.Parallel()

assert.Nil(t, SplitPromptContent(""))
}

// TestRedactedJoinedPrompts_AppliesSafetyNet verifies the helper joins
// prompts with the canonical separator and runs them through the 7-layer
// pipeline. OPF runs only in the pre-push rewrite path, never here.
func TestRedactedJoinedPrompts_AppliesSafetyNet(t *testing.T) {
t.Parallel()
got := redactedJoinedPrompts([]string{"hello", "world"})
assert.NotEmpty(t, got)
assert.Contains(t, got, PromptSeparator)
}
Loading
Loading