Skip to content
2 changes: 2 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- **Shadow branch migration** - if user does stash/pull/rebase (HEAD changes without commit), shadow branch is automatically moved to new base commit
- **Orphaned branch cleanup** - if a shadow branch exists without a corresponding session state file, it is automatically reset when a new session starts
- PrePush hook can push `entire/checkpoints/v1` branch alongside user pushes
- **OPF (OpenAI Privacy Filter) runs at pre-push, not post-commit**: when `redaction.openai_privacy_filter.enabled` is true, the PrePush hook re-redacts unpushed `entire/checkpoints/v1` commits with the OPF 8th layer, builds new commits carrying an `Entire-OPF-Applied: true` trailer, and atomically updates the local v1 ref before pushing. Per-commit condensation stays on the fast 7-layer pipeline. See `strategy/manual_commit_opf_rewrite.go` and `docs/security-and-privacy.md` for the full flow, including divergence detection, bootstrap caps, and CAS-on-conflict semantics.
- Safe to use on main/master since it never modifies commit history

#### Key Files
Expand All @@ -450,6 +451,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- `common.go` - Helpers for metadata extraction, tree building, rewind validation, `ListCheckpoints()`
- `session.go` - Session/checkpoint data structures
- `push_common.go` - PrePush logic for pushing `entire/checkpoints/v1` branch
- `manual_commit_opf_rewrite.go` - Pre-push OPF re-redaction: walks unpushed v1 commits, runs OPF over their blobs, rebuilds commits with `Entire-OPF-Applied: true` trailer, CAS-updates the local ref. Sentinel errors: `ErrV1Diverged`, `ErrBootstrapTooLarge`, `ErrV1RefMoved`.
Comment thread
peyton-alt marked this conversation as resolved.
Outdated
- `manual_commit.go` - Manual-commit strategy main implementation
- `manual_commit_types.go` - Type definitions: `SessionState`, `CheckpointInfo`, `CondenseResult`
- `manual_commit_session.go` - Session state management (load/save/list session states)
Expand Down
27 changes: 5 additions & 22 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,22 +222,11 @@ type WriteCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains the raw user prompts from the session. These are NOT
// guaranteed to be redacted on entry — the writer always emits the typed
// PromptsRedacted blob below (running the safety-net pipeline if it is
// the zero value). Do not read Prompts independently for persistence; go
// through redactJoinedPrompts so the redaction guarantee is preserved.
// Prompts contains the raw user prompts from the session. Run through
// redactedJoinedPrompts before persisting — the writer does this
// inside writeSessionToSubdirectory.
Prompts []string

// PromptsRedacted, when set, is the pre-redacted joined-prompts blob the
// writer uses verbatim instead of re-running the safety-net pipeline.
// Used by finalizeAllTurnCheckpoints to avoid running the OpenAI
// Privacy Filter once per checkpoint over identical joined-prompt
// strings. The typed wrapper makes the "this content was produced by
// the redaction pipeline" claim a compile-time invariant — callers
// cannot assign an arbitrary string.
PromptsRedacted redact.RedactedJoinedPrompts

// FilesTouched are files modified during the session
FilesTouched []string

Expand Down Expand Up @@ -366,16 +355,10 @@ type UpdateCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains the raw user prompts (replaces existing). NOT
// guaranteed to be redacted on entry — see WriteCommittedOptions.Prompts
// for the relationship to PromptsRedacted.
// Prompts contains the raw user prompts (replaces existing).
// See WriteCommittedOptions.Prompts.
Prompts []string

// PromptsRedacted, when set, is the pre-redacted joined-prompts blob
// the writer uses verbatim instead of re-running the safety-net
// pipeline. See WriteCommittedOptions.PromptsRedacted for rationale.
PromptsRedacted redact.RedactedJoinedPrompts

// Agent identifies the agent type (needed for transcript chunking)
Agent types.AgentType

Expand Down
66 changes: 28 additions & 38 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,12 +417,10 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName
}

// Write prompts. Uses the full 8-layer pipeline (including OPF) via
// redactedJoinedPrompts; the helper unwraps opts.PromptsRedacted when
// set so callers (finalizeAllTurnCheckpoints) that pre-redact once
// across multiple checkpoint writes don't pay OPF per checkpoint.
// Write prompts via the 7-layer pipeline. OPF runs only in the
// pre-push rewrite path (manual_commit_opf_rewrite.go).
if len(opts.Prompts) > 0 {
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down Expand Up @@ -1403,10 +1401,9 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
}
}

// Replace prompts (apply redaction as safety net; unwraps
// opts.PromptsRedacted when set).
// Replace prompts with 7-layer-redacted content.
if len(opts.Prompts) > 0 {
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -1725,11 +1722,13 @@ func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath st
return fmt.Errorf("path traversal detected: %s", relPath)
}

// Create blob from file with secrets redaction
// Committed-checkpoint write — run the full 8-layer pipeline
// including OPF. The per-turn temp-write path stays on plain
// redactors via the sibling createRedactedBlobFromFile.
blobHash, mode, err := createRedactedBlobFromFileWithPrivacyFilter(ctx, s.repo, path, relPath)
// Create blob from file with 7-layer secrets redaction.
// Post-commit emits 7-layer-only blobs; the OPF-capable variant
// (createRedactedBlobFromFileWithPrivacyFilter) is used later by
// the pre-push rewrite path, which re-redacts these blobs into
// 8-layer commits before they leave the local machine.
Comment thread
peyton-alt marked this conversation as resolved.
Outdated
_ = ctx // ctx not needed by the 7-layer path; kept on caller signature for future use
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath)
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
}
Expand All @@ -1751,22 +1750,13 @@ func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath st
}

// createRedactedBlobFromFile reads a file, applies the 7-layer redaction
// pipeline, and creates a git blob. Used by per-turn temporary-checkpoint
// writes — the OpenAI Privacy Filter is intentionally NOT run here to
// keep per-turn latency inside the agent loop's budget.
// pipeline, and creates a git blob. Used by committed-checkpoint writes
// at post-commit time. The OpenAI Privacy Filter is intentionally NOT
// run here — OPF lives in the pre-push rewrite path
// (strategy/manual_commit_opf_rewrite.go), which re-redacts the 7-layer
// blobs into 8-layer commits before they leave the local machine.
// JSONL files get JSONL-aware redaction; all other files get plain byte redaction.
func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
return createRedactedBlobFromFileImpl(context.Background(), repo, filePath, treePath, false)
}

// createRedactedBlobFromFileWithPrivacyFilter reads a file, applies the full
// 8-layer pipeline (including the OpenAI Privacy Filter), and creates a git
// blob. Used by committed-checkpoint writes — slower but more thorough.
func createRedactedBlobFromFileWithPrivacyFilter(ctx context.Context, repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
return createRedactedBlobFromFileImpl(ctx, repo, filePath, treePath, true)
}

func createRedactedBlobFromFileImpl(ctx context.Context, repo *git.Repository, filePath, treePath string, usePrivacyFilter bool) (plumbing.Hash, filemode.FileMode, error) {
info, err := os.Stat(filePath)
if err != nil {
return plumbing.ZeroHash, 0, fmt.Errorf("failed to stat file: %w", err)
Expand All @@ -1793,7 +1783,7 @@ func createRedactedBlobFromFileImpl(ctx context.Context, repo *git.Repository, f
return hash, mode, nil
}

content = redactBytesForBlob(ctx, content, treePath, usePrivacyFilter)
content = RedactBlobBytes(context.Background(), content, treePath, false)

hash, err := CreateBlobFromContent(repo, content)
if err != nil {
Expand All @@ -1802,14 +1792,15 @@ func createRedactedBlobFromFileImpl(ctx context.Context, repo *git.Repository, f
return hash, mode, nil
}

// redactBytesForBlob applies the appropriate redaction pipeline to file
// content for a checkpoint blob. JSONL files get JSONL-aware redaction
// (falling back to plain byte redaction on parse failure so the regex
// layers still apply); other files get plain byte redaction.
// usePrivacyFilter selects the lighter 7-layer pipeline (per-turn temp
// writes) versus the full 8-layer pipeline including OPF (committed
// writes).
func redactBytesForBlob(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte {
// RedactBlobBytes redacts a single blob's content given its tree path.
// JSONL files get JSONL-aware redaction (falling back to plain bytes on
// parse failure so regex/credential layers still apply); other files
// get plain byte redaction. When usePrivacyFilter is true the full
// 8-layer pipeline (including OPF) runs; otherwise the 7-layer pipeline.
//
// Post-commit condensation uses false (fast path). The pre-push rewrite
// (strategy/manual_commit_opf_rewrite.go) uses true.
func RedactBlobBytes(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte {
if strings.HasSuffix(treePath, ".jsonl") {
var (
redacted redact.RedactedBytes
Expand All @@ -1823,8 +1814,7 @@ func redactBytesForBlob(ctx context.Context, content []byte, treePath string, us
if err == nil {
return redacted.Bytes()
}
// JSONL parse failed — fall through so regex/credential layers
// still apply via the plain byte path.
// JSONL parse failed — fall through to plain bytes.
}
if usePrivacyFilter {
return redact.BytesWithPrivacyFilter(ctx, content)
Expand Down
70 changes: 70 additions & 0 deletions cmd/entire/cli/checkpoint/committed_opf_trailer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package checkpoint

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/testutil"
"github.com/entireio/cli/cmd/entire/cli/trailers"
"github.com/entireio/cli/redact"
"github.com/go-git/go-git/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/stretchr/testify/require"
)

// TestWriteCommitted_DoesNotEmitOPFAppliedTrailer is the regression guard
// for the architectural promise: standard post-commit condensation writes
// 7-layer-only blobs and MUST NOT mark them with the Entire-OPF-Applied
// trailer. The trailer is emitted exclusively by the pre-push rewrite
// path; if a future change accidentally added it to the standard writer,
// the pre-push rewrite would skip those commits (HasOPFApplied true →
// reparent-only, no actual OPF run) and ship 7-layer content as if it
// were 8-layer. This test pins down that contract.
func TestWriteCommitted_DoesNotEmitOPFAppliedTrailer(t *testing.T) {
t.Parallel()

tempDir := t.TempDir()
testutil.InitRepo(t, tempDir)
repo, err := git.PlainOpen(tempDir)
require.NoError(t, err)

wt, err := repo.Worktree()
require.NoError(t, err)
readmeFile := filepath.Join(tempDir, "README.md")
require.NoError(t, os.WriteFile(readmeFile, []byte("# Test"), 0o644))
_, err = wt.Add("README.md")
require.NoError(t, err)
_, err = wt.Commit("Initial commit", &git.CommitOptions{
Author: &object.Signature{Name: "Test", Email: "test@test.com"},
})
require.NoError(t, err)

store := NewGitStore(repo)
cpID := id.MustCheckpointID("a1b2c3d4e5f6")

err = store.WriteCommitted(context.Background(), WriteCommittedOptions{
CheckpointID: cpID,
SessionID: "regression-no-opf-trailer",
Strategy: "manual-commit",
Transcript: redact.AlreadyRedacted([]byte(`{"role":"user","content":"hello"}` + "\n")),
AuthorName: "Test",
AuthorEmail: "test@test.com",
})
require.NoError(t, err)

// Read the latest commit message on entire/checkpoints/v1 and assert
// HasOPFApplied is false. We resolve via the ref then walk back the
// single commit the writer just produced.
ref, err := repo.Reference(plumbing.NewBranchReferenceName("entire/checkpoints/v1"), true)
require.NoError(t, err, "writer should have created entire/checkpoints/v1")
commit, err := repo.CommitObject(ref.Hash())
require.NoError(t, err)

if trailers.HasOPFApplied(commit.Message) {
t.Errorf("standard WriteCommitted emitted Entire-OPF-Applied trailer; commit message:\n%s", commit.Message)
}
}
18 changes: 5 additions & 13 deletions cmd/entire/cli/checkpoint/prompts.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package checkpoint

import (
"context"
"strings"

"github.com/entireio/cli/redact"
Expand Down Expand Up @@ -29,16 +28,9 @@ func SplitPromptContent(content string) []string {
return prompts
}

// redactedJoinedPrompts returns the redacted prompt-blob content for the
// supplied prompts. When preRedacted is set it is unwrapped verbatim;
// otherwise the prompts are joined and run through the full 8-layer
// pipeline as a safety net. Callers that share the same prompts across
// multiple checkpoint writes (finalizeAllTurnCheckpoints) should compute
// the redacted blob once via redact.JoinedPrompts and pass it through to
// avoid running OPF repeatedly over identical input.
func redactedJoinedPrompts(ctx context.Context, prompts []string, preRedacted redact.RedactedJoinedPrompts) string {
if preRedacted.IsSet() {
return preRedacted.String()
}
return redact.JoinedPrompts(ctx, prompts, PromptSeparator).String()
// redactedJoinedPrompts joins prompts and runs the 7-layer redaction
// pipeline. OPF runs exclusively in the pre-push rewrite (not here),
// so the writer's hot path stays predictable.
func redactedJoinedPrompts(prompts []string) string {
return redact.String(strings.Join(prompts, PromptSeparator))
}
36 changes: 7 additions & 29 deletions cmd/entire/cli/checkpoint/prompts_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
package checkpoint

import (
"context"
"testing"

"github.com/entireio/cli/redact"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand All @@ -16,7 +14,6 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {
"first line\nwith newline",
"second prompt",
}

joined := JoinPrompts(original)
split := SplitPromptContent(joined)

Expand All @@ -26,34 +23,15 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {

func TestSplitPromptContent_EmptyContent(t *testing.T) {
t.Parallel()

assert.Nil(t, SplitPromptContent(""))
}

// TestRedactedJoinedPrompts_PreRedactedIsTrustedVerbatim verifies that when
// the caller supplies a set RedactedJoinedPrompts the helper unwraps it
// untouched and never re-invokes the redaction pipeline. The pre-redacted
// path is what finalizeAllTurnCheckpoints relies on to avoid running OPF
// once per checkpoint over identical joined-prompt strings.
func TestRedactedJoinedPrompts_PreRedactedIsTrustedVerbatim(t *testing.T) {
// TestRedactedJoinedPrompts_AppliesSafetyNet verifies the helper joins
// prompts with the canonical separator and runs them through the 7-layer
// pipeline. OPF runs only in the pre-push rewrite path, never here.
func TestRedactedJoinedPrompts_AppliesSafetyNet(t *testing.T) {
t.Parallel()

const preRedacted = "[REDACTED_PERSON] asked about [REDACTED_EMAIL]"
got := redactedJoinedPrompts(
context.Background(),
[]string{"raw prompt text"},
redact.AlreadyRedactedJoinedPrompts(preRedacted),
)
assert.Equal(t, preRedacted, got, "preRedacted should pass through verbatim")
}

// TestRedactedJoinedPrompts_ZeroValueFallsBackToRedaction verifies that
// when the typed preRedacted is the zero value the helper joins the
// prompts and runs the full pipeline as a safety net.
func TestRedactedJoinedPrompts_ZeroValueFallsBackToRedaction(t *testing.T) {
t.Parallel()

got := redactedJoinedPrompts(context.Background(), []string{"hello", "world"}, redact.RedactedJoinedPrompts{})
assert.NotEmpty(t, got, "zero-value preRedacted should fall back to running the redaction pipeline")
assert.Contains(t, got, PromptSeparator, "fallback output should preserve the prompt separator")
got := redactedJoinedPrompts([]string{"hello", "world"})
assert.NotEmpty(t, got)
assert.Contains(t, got, PromptSeparator)
}
6 changes: 3 additions & 3 deletions cmd/entire/cli/checkpoint/v2_committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ func (s *V2GitStore) updateCommittedMain(ctx context.Context, opts UpdateCommitt
sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex)

if len(opts.Prompts) > 0 {
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return 0, fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -891,7 +891,7 @@ func (s *V2GitStore) writeMainCheckpointEntries(ctx context.Context, opts WriteC
// and compact transcript to a session subdirectory (0/, 1/, 2/, … indexed by
// session order within the checkpoint). The raw transcript (raw_transcript) and its
// content hash (raw_transcript_hash.txt) go to /full/current, not here.
func (s *V2GitStore) writeMainSessionToSubdirectory(ctx context.Context, opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) {
func (s *V2GitStore) writeMainSessionToSubdirectory(_ context.Context, opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) {
filePaths := SessionFilePaths{}

// Clear existing entries at this session path
Expand All @@ -903,7 +903,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(ctx context.Context, opts Wr

// Write prompts
if len(opts.Prompts) > 0 {
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down
Loading
Loading