diff --git a/e2e/entire/entire.go b/e2e/entire/entire.go index 2a807412d3..d5d524660e 100644 --- a/e2e/entire/entire.go +++ b/e2e/entire/entire.go @@ -52,6 +52,14 @@ func Doctor(t *testing.T, dir string) string { return run(t, dir, "doctor", "--force") } +// DoctorOutput runs `entire doctor --force` and returns the captured +// output, ignoring the exit code. Useful for preflight scans where the +// caller wants to grep for specific findings without failing on +// unrelated non-zero exits (e.g. v2 generation health drift). +func DoctorOutput(dir string) (string, error) { + return runOutput(dir, "doctor", "--force") +} + // CleanDryRun runs `entire clean --dry-run` and returns the output. func CleanDryRun(t *testing.T, dir string) string { t.Helper() diff --git a/e2e/testutil/preflight_test.go b/e2e/testutil/preflight_test.go new file mode 100644 index 0000000000..70a568ce23 --- /dev/null +++ b/e2e/testutil/preflight_test.go @@ -0,0 +1,57 @@ +package testutil + +import "testing" + +func TestFindHookDrift(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + out string + wantOK bool + wantMatch string + }{ + { + name: "clean output passes", + out: "āœ“ Metadata branches: OK\nāœ“ Codex hook trust: OK\nNo stuck sessions found.\n", + wantOK: false, + }, + { + name: "stale hooks file flagged", + out: "Codex hooks: OUT OF DATE\n 1 hook(s) the CLI installs today aren't declared in .codex/hooks.json:\n", + wantOK: true, + wantMatch: "OUT OF DATE", + }, + { + name: "trust review flagged", + out: "Codex hook trust: REVIEW NEEDED\n 1 hook(s) declared in .codex/hooks.json have no trusted_hash entry yet:\n", + wantOK: true, + wantMatch: "REVIEW NEEDED", + }, + { + name: "unrelated v2 generation issues do not trip preflight", + out: "v2 generations: 3 issue(s) found\nError: v2 generation health check failed\n", + wantOK: false, + }, + { + name: "first matching marker wins", + out: "Codex hooks: OUT OF DATE\n...\nCodex hook trust: REVIEW NEEDED\n", + wantOK: true, + wantMatch: "OUT OF DATE", + }, + } + + for _, tc := range cases { + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + marker, ok := findHookDrift(tc.out) + if ok != tc.wantOK { + t.Fatalf("findHookDrift returned ok=%v, want %v\noutput: %q", ok, tc.wantOK, tc.out) + } + if marker != tc.wantMatch { + t.Fatalf("findHookDrift returned %q, want %q", marker, tc.wantMatch) + } + }) + } +} diff --git a/e2e/testutil/repo.go b/e2e/testutil/repo.go index 1f363a9eea..e091b4cf9f 100644 --- a/e2e/testutil/repo.go +++ b/e2e/testutil/repo.go @@ -179,9 +179,49 @@ func SetupRepo(t *testing.T, agent agents.Agent) *RepoState { } }) + verifyDoctorClean(t, dir, artDir) + return state } +// hookDriftMarkers are doctor output substrings that signal an +// actionable hook-installation problem. Scoped narrowly so unrelated +// findings (e.g. v2 generation health on dev machines) don't trip +// preflight. +var hookDriftMarkers = []string{ + "OUT OF DATE", // codex hooks.json missing a canonical event + "REVIEW NEEDED", // codex hook trust gap +} + +// verifyDoctorClean runs `entire doctor --force` and fails the test if +// the output contains any hook-drift marker. The full output is dumped +// to /doctor-preflight.log either way so failure traces include +// the diagnosis. Run after `entire enable` and any agent-specific +// post-enable tweaks — drift here means the test would fail later in +// confusing ways. +func verifyDoctorClean(t *testing.T, dir, artDir string) { + t.Helper() + out, _ := entire.DoctorOutput(dir) + if artDir != "" { + _ = os.WriteFile(filepath.Join(artDir, "doctor-preflight.log"), []byte(out), 0o644) + } + if marker, ok := findHookDrift(out); ok { + t.Fatalf("preflight: entire doctor reports hook drift (%q):\n%s", marker, out) + } +} + +// findHookDrift returns the first hookDriftMarkers substring present in +// out, or "" + false when none match. Pulled out so the marker contract +// is unit-testable without spawning the entire binary. +func findHookDrift(out string) (string, bool) { + for _, marker := range hookDriftMarkers { + if strings.Contains(out, marker) { + return marker, true + } + } + return "", false +} + // ApplySuiteCheckpointsMode configures an arbitrary repo for the current // suite-wide E2E checkpoints mode. Useful for repos created outside SetupRepo, // such as fresh clones in remote-resume scenarios.