Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,22 @@ type WriteCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains user prompts from the session
// Prompts contains the raw user prompts from the session. These are NOT
// guaranteed to be redacted on entry — the writer always emits the typed
// PromptsRedacted blob below (running the safety-net pipeline if it is
// the zero value). Do not read Prompts independently for persistence; go
// through redactJoinedPrompts so the redaction guarantee is preserved.
Prompts []string

// PromptsRedacted, when set, is the pre-redacted joined-prompts blob the
// writer uses verbatim instead of re-running the safety-net pipeline.
// Used by finalizeAllTurnCheckpoints to avoid running the OpenAI
// Privacy Filter once per checkpoint over identical joined-prompt
// strings. The typed wrapper makes the "this content was produced by
// the redaction pipeline" claim a compile-time invariant — callers
// cannot assign an arbitrary string.
PromptsRedacted redact.RedactedJoinedPrompts

// FilesTouched are files modified during the session
FilesTouched []string

Expand Down Expand Up @@ -353,9 +366,16 @@ type UpdateCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains all user prompts (replaces existing)
// Prompts contains the raw user prompts (replaces existing). NOT
// guaranteed to be redacted on entry — see WriteCommittedOptions.Prompts
// for the relationship to PromptsRedacted.
Prompts []string

// PromptsRedacted, when set, is the pre-redacted joined-prompts blob
// the writer uses verbatim instead of re-running the safety-net
// pipeline. See WriteCommittedOptions.PromptsRedacted for rationale.
PromptsRedacted redact.RedactedJoinedPrompts

// Agent identifies the agent type (needed for transcript chunking)
Agent types.AgentType

Expand Down
4 changes: 2 additions & 2 deletions cmd/entire/cli/checkpoint/checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) {
store := NewGitStore(repo)
entries := make(map[string]object.TreeEntry)

err = store.copyMetadataDir(metadataDir, "checkpoint/", entries)
err = store.copyMetadataDir(context.Background(), metadataDir, "checkpoint/", entries)
if err != nil {
t.Fatalf("copyMetadataDir failed: %v", err)
}
Expand Down Expand Up @@ -3406,7 +3406,7 @@ func TestCopyMetadataDir_RedactsSecrets(t *testing.T) {
store := NewGitStore(repo)
entries := make(map[string]object.TreeEntry)

if err := store.copyMetadataDir(metadataDir, "cp/", entries); err != nil {
if err := store.copyMetadataDir(context.Background(), metadataDir, "cp/", entries); err != nil {
t.Fatalf("copyMetadataDir() error = %v", err)
}

Expand Down
80 changes: 61 additions & 19 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ func (s *GitStore) writeStandardCheckpointEntries(ctx context.Context, opts Writ

// Copy additional metadata files from directory if specified (to session subdirectory)
if opts.MetadataDir != "" {
if err := s.copyMetadataDir(opts.MetadataDir, sessionPath, entries); err != nil {
if err := s.copyMetadataDir(ctx, opts.MetadataDir, sessionPath, entries); err != nil {
return fmt.Errorf("failed to copy metadata directory: %w", err)
}
}
Expand Down Expand Up @@ -417,9 +417,12 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName
}

// Write prompts
// Write prompts. Uses the full 8-layer pipeline (including OPF) via
// redactedJoinedPrompts; the helper unwraps opts.PromptsRedacted when
// set so callers (finalizeAllTurnCheckpoints) that pre-redact once
// across multiple checkpoint writes don't pay OPF per checkpoint.
if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down Expand Up @@ -1400,9 +1403,10 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
}
}

// Replace prompts (apply redaction as safety net)
// Replace prompts (apply redaction as safety net; unwraps
// opts.PromptsRedacted when set).
if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -1682,7 +1686,7 @@ func CreateBlobFromContent(repo *git.Repository, content []byte) (plumbing.Hash,

// copyMetadataDir copies all files from a directory to the checkpoint path.
// Used to include additional metadata files like task checkpoints, subagent transcripts, etc.
func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[string]object.TreeEntry) error {
func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath string, entries map[string]object.TreeEntry) error {
err := filepath.Walk(metadataDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
Expand Down Expand Up @@ -1722,7 +1726,10 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
}

// Create blob from file with secrets redaction
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath)
// Committed-checkpoint write — run the full 8-layer pipeline
// including OPF. The per-turn temp-write path stays on plain
// redactors via the sibling createRedactedBlobFromFile.
blobHash, mode, err := createRedactedBlobFromFileWithPrivacyFilter(ctx, s.repo, path, relPath)
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
}
Expand All @@ -1743,9 +1750,23 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
return nil
}

// createRedactedBlobFromFile reads a file, applies secrets redaction, and creates a git blob.
// JSONL files get JSONL-aware redaction; all other files get plain string redaction.
// createRedactedBlobFromFile reads a file, applies the 7-layer redaction
// pipeline, and creates a git blob. Used by per-turn temporary-checkpoint
// writes — the OpenAI Privacy Filter is intentionally NOT run here to
// keep per-turn latency inside the agent loop's budget.
// JSONL files get JSONL-aware redaction; all other files get plain byte redaction.
func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
return createRedactedBlobFromFileImpl(context.Background(), repo, filePath, treePath, false)
}

// createRedactedBlobFromFileWithPrivacyFilter reads a file, applies the full
// 8-layer pipeline (including the OpenAI Privacy Filter), and creates a git
// blob. Used by committed-checkpoint writes — slower but more thorough.
func createRedactedBlobFromFileWithPrivacyFilter(ctx context.Context, repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
return createRedactedBlobFromFileImpl(ctx, repo, filePath, treePath, true)
}

func createRedactedBlobFromFileImpl(ctx context.Context, repo *git.Repository, filePath, treePath string, usePrivacyFilter bool) (plumbing.Hash, filemode.FileMode, error) {
info, err := os.Stat(filePath)
if err != nil {
return plumbing.ZeroHash, 0, fmt.Errorf("failed to stat file: %w", err)
Expand All @@ -1772,16 +1793,7 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string)
return hash, mode, nil
}

if strings.HasSuffix(treePath, ".jsonl") {
redacted, jsonlErr := redact.JSONLBytes(content)
if jsonlErr != nil {
content = redact.Bytes(content)
} else {
content = redacted.Bytes()
}
} else {
content = redact.Bytes(content)
}
content = redactBytesForBlob(ctx, content, treePath, usePrivacyFilter)

hash, err := CreateBlobFromContent(repo, content)
if err != nil {
Expand All @@ -1790,6 +1802,36 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string)
return hash, mode, nil
}

// redactBytesForBlob applies the appropriate redaction pipeline to file
// content for a checkpoint blob. JSONL files get JSONL-aware redaction
// (falling back to plain byte redaction on parse failure so the regex
// layers still apply); other files get plain byte redaction.
// usePrivacyFilter selects the lighter 7-layer pipeline (per-turn temp
// writes) versus the full 8-layer pipeline including OPF (committed
// writes).
func redactBytesForBlob(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte {
if strings.HasSuffix(treePath, ".jsonl") {
var (
redacted redact.RedactedBytes
err error
)
if usePrivacyFilter {
redacted, err = redact.JSONLBytesWithPrivacyFilter(ctx, content)
} else {
redacted, err = redact.JSONLBytes(content)
}
if err == nil {
return redacted.Bytes()
}
// JSONL parse failed — fall through so regex/credential layers
// still apply via the plain byte path.
}
if usePrivacyFilter {
return redact.BytesWithPrivacyFilter(ctx, content)
}
return redact.Bytes(content)
}

// GetGitAuthorFromRepo retrieves the git user.name and user.email,
// checking both the repository-local config and the global ~/.gitconfig.
func GetGitAuthorFromRepo(repo *git.Repository) (name, email string) {
Expand Down
21 changes: 20 additions & 1 deletion cmd/entire/cli/checkpoint/prompts.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
package checkpoint

import "strings"
import (
"context"
"strings"

"github.com/entireio/cli/redact"
)

// PromptSeparator is the canonical separator used in prompt.txt when multiple
// prompts are stored in a single file.
Expand All @@ -23,3 +28,17 @@ func SplitPromptContent(content string) []string {
}
return prompts
}

// redactedJoinedPrompts returns the redacted prompt-blob content for the
// supplied prompts. When preRedacted is set it is unwrapped verbatim;
// otherwise the prompts are joined and run through the full 8-layer
// pipeline as a safety net. Callers that share the same prompts across
// multiple checkpoint writes (finalizeAllTurnCheckpoints) should compute
// the redacted blob once via redact.JoinedPrompts and pass it through to
// avoid running OPF repeatedly over identical input.
func redactedJoinedPrompts(ctx context.Context, prompts []string, preRedacted redact.RedactedJoinedPrompts) string {
if preRedacted.IsSet() {
return preRedacted.String()
}
return redact.JoinedPrompts(ctx, prompts, PromptSeparator).String()
}
30 changes: 30 additions & 0 deletions cmd/entire/cli/checkpoint/prompts_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package checkpoint

import (
"context"
"testing"

"github.com/entireio/cli/redact"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand All @@ -27,3 +29,31 @@ func TestSplitPromptContent_EmptyContent(t *testing.T) {

assert.Nil(t, SplitPromptContent(""))
}

// TestRedactedJoinedPrompts_PreRedactedIsTrustedVerbatim verifies that when
// the caller supplies a set RedactedJoinedPrompts the helper unwraps it
// untouched and never re-invokes the redaction pipeline. The pre-redacted
// path is what finalizeAllTurnCheckpoints relies on to avoid running OPF
// once per checkpoint over identical joined-prompt strings.
func TestRedactedJoinedPrompts_PreRedactedIsTrustedVerbatim(t *testing.T) {
t.Parallel()

const preRedacted = "[REDACTED_PERSON] asked about [REDACTED_EMAIL]"
got := redactedJoinedPrompts(
context.Background(),
[]string{"raw prompt text"},
redact.AlreadyRedactedJoinedPrompts(preRedacted),
)
assert.Equal(t, preRedacted, got, "preRedacted should pass through verbatim")
}

// TestRedactedJoinedPrompts_ZeroValueFallsBackToRedaction verifies that
// when the typed preRedacted is the zero value the helper joins the
// prompts and runs the full pipeline as a safety net.
func TestRedactedJoinedPrompts_ZeroValueFallsBackToRedaction(t *testing.T) {
t.Parallel()

got := redactedJoinedPrompts(context.Background(), []string{"hello", "world"}, redact.RedactedJoinedPrompts{})
assert.NotEmpty(t, got, "zero-value preRedacted should fall back to running the redaction pipeline")
assert.Contains(t, got, PromptSeparator, "fallback output should preserve the prompt separator")
}
12 changes: 6 additions & 6 deletions cmd/entire/cli/checkpoint/v2_committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func (s *V2GitStore) buildFreshMainBatchGroupTree(ctx context.Context, cpID id.C

for sessionIndex, opts := range groupOpts {
sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex)
filePaths, err := s.writeMainSessionToSubdirectory(opts, sessionPath, entries)
filePaths, err := s.writeMainSessionToSubdirectory(ctx, opts, sessionPath, entries)
if err != nil {
return plumbing.ZeroHash, err
}
Expand Down Expand Up @@ -283,7 +283,7 @@ func (s *V2GitStore) buildMainBatchGroupTree(ctx context.Context, rootTreeHash p
}

sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex)
filePaths, err := s.writeMainSessionToSubdirectory(opts, sessionPath, entries)
filePaths, err := s.writeMainSessionToSubdirectory(ctx, opts, sessionPath, entries)
if err != nil {
return plumbing.ZeroHash, err
}
Expand Down Expand Up @@ -609,7 +609,7 @@ func (s *V2GitStore) updateCommittedMain(ctx context.Context, opts UpdateCommitt
sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex)

if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return 0, fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -865,7 +865,7 @@ func (s *V2GitStore) writeMainCheckpointEntries(ctx context.Context, opts WriteC

// Write session files (metadata and prompts — no transcript or content hash)
sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex)
sessionFilePaths, err := s.writeMainSessionToSubdirectory(opts, sessionPath, entries)
sessionFilePaths, err := s.writeMainSessionToSubdirectory(ctx, opts, sessionPath, entries)
if err != nil {
return 0, err
}
Expand All @@ -891,7 +891,7 @@ func (s *V2GitStore) writeMainCheckpointEntries(ctx context.Context, opts WriteC
// and compact transcript to a session subdirectory (0/, 1/, 2/, … indexed by
// session order within the checkpoint). The raw transcript (raw_transcript) and its
// content hash (raw_transcript_hash.txt) go to /full/current, not here.
func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) {
func (s *V2GitStore) writeMainSessionToSubdirectory(ctx context.Context, opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) {
filePaths := SessionFilePaths{}

// Clear existing entries at this session path
Expand All @@ -903,7 +903,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions,

// Write prompts
if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down
24 changes: 21 additions & 3 deletions cmd/entire/cli/review/manifest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ const manifestTokenTestAgentType agenttypes.AgentType = "Review Token Test"

func TestHydrateReviewSummaryTokensFromStates_PopulatesTokensFromSessionState(t *testing.T) {
t.Parallel()
started := time.Date(2026, 5, 8, 10, 0, 0, 0, time.UTC)
// Time-relative so this test doesn't go stale: session.StateStore.Load
// auto-deletes sessions whose StartedAt is older than 7 days
// (StaleSessionThreshold), and a hardcoded fixed date silently starts
// failing once the calendar clock crosses that threshold. Use "an hour
// ago" so we exercise the 5-second jitter check inside
// matchReviewSessionState while staying well inside the staleness window.
started := time.Now().UTC().Add(-time.Hour)
summary := reviewtypes.RunSummary{
StartedAt: started,
AgentRuns: []reviewtypes.AgentRun{
Expand Down Expand Up @@ -67,7 +73,13 @@ func TestHydrateReviewSummaryTokensFromStates_FallsBackToTranscript(t *testing.T
return manifestTokenTestAgent{}, nil
}

started := time.Date(2026, 5, 8, 10, 0, 0, 0, time.UTC)
// Time-relative so this test doesn't go stale: session.StateStore.Load
// auto-deletes sessions whose StartedAt is older than 7 days
// (StaleSessionThreshold), and a hardcoded fixed date silently starts
// failing once the calendar clock crosses that threshold. Use "an hour
// ago" so we exercise the 5-second jitter check inside
// matchReviewSessionState while staying well inside the staleness window.
started := time.Now().UTC().Add(-time.Hour)
tmp := t.TempDir()
transcriptPath := filepath.Join(tmp, "review.jsonl")
transcript := "review transcript\n"
Expand Down Expand Up @@ -112,7 +124,13 @@ func TestReviewSummaryTokenEnricher_LoadsCurrentSessionState(t *testing.T) {
if err != nil {
t.Fatalf("NewStateStore: %v", err)
}
started := time.Date(2026, 5, 8, 10, 0, 0, 0, time.UTC)
// Time-relative so this test doesn't go stale: session.StateStore.Load
// auto-deletes sessions whose StartedAt is older than 7 days
// (StaleSessionThreshold), and a hardcoded fixed date silently starts
// failing once the calendar clock crosses that threshold. Use "an hour
// ago" so we exercise the 5-second jitter check inside
// matchReviewSessionState while staying well inside the staleness window.
started := time.Now().UTC().Add(-time.Hour)
if err := store.Save(ctx, &session.State{
SessionID: "codex-session-token",
Kind: session.KindAgentReview,
Expand Down
Loading
Loading