diff --git a/cmd/entire/cli/checkpoint/remote/git.go b/cmd/entire/cli/checkpoint/remote/git.go index e24cdc938..ec30e0eb8 100644 --- a/cmd/entire/cli/checkpoint/remote/git.go +++ b/cmd/entire/cli/checkpoint/remote/git.go @@ -309,6 +309,22 @@ func ResolveFetchTarget(ctx context.Context, target string) (string, error) { return url, nil } +// ResolveFilteredFetchTarget returns a fetch target suitable for an explicit +// filtered fetch. Remote names are resolved to URLs even when the repo-level +// filtered_fetches setting is disabled, because callers that pass +// --filter=blob:none must not let git persist promisor settings onto a named +// remote like origin. +func ResolveFilteredFetchTarget(ctx context.Context, target string) (string, error) { + if target == "" || IsURL(target) || isLocalPath(target) { + return target, nil + } + url, err := GetRemoteURL(ctx, target) + if err != nil { + return "", fmt.Errorf("get remote URL: %w", err) + } + return url, nil +} + // newCommand creates an exec.Cmd for a git operation that may need // checkpoint token authentication. If ENTIRE_CHECKPOINT_TOKEN is set: // - if the target in args is (or resolves to) an SSH remote, the target is diff --git a/cmd/entire/cli/checkpoint/remote/git_test.go b/cmd/entire/cli/checkpoint/remote/git_test.go index a6a995822..7bb585afc 100644 --- a/cmd/entire/cli/checkpoint/remote/git_test.go +++ b/cmd/entire/cli/checkpoint/remote/git_test.go @@ -256,6 +256,36 @@ func TestResolveFetchTarget(t *testing.T) { }) } +// Not parallel: uses t.Chdir() +func TestResolveFilteredFetchTarget_AlwaysResolvesRemoteName(t *testing.T) { + ctx := context.Background() + + tmpDir := t.TempDir() + testutil.InitRepo(t, tmpDir) + testutil.WriteFile(t, tmpDir, "f.txt", "init") + testutil.GitAdd(t, tmpDir, "f.txt") + testutil.GitCommit(t, tmpDir, "init") + + cmd := exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://github.com/org/repo.git") + cmd.Dir = tmpDir + cmd.Env = testutil.GitIsolatedEnv() + require.NoError(t, cmd.Run()) + + t.Chdir(tmpDir) + + target, err := ResolveFilteredFetchTarget(ctx, "origin") + require.NoError(t, err) + assert.Equal(t, "https://github.com/org/repo.git", target) + + target, err = ResolveFilteredFetchTarget(ctx, "https://github.com/org/repo.git") + require.NoError(t, err) + assert.Equal(t, "https://github.com/org/repo.git", target) + + target, err = ResolveFilteredFetchTarget(ctx, "../repo.git") + require.NoError(t, err) + assert.Equal(t, "../repo.git", target) +} + func TestAppendCheckpointTokenEnv(t *testing.T) { t.Parallel() diff --git a/cmd/entire/cli/strategy/push_v2.go b/cmd/entire/cli/strategy/push_v2.go index 8faf55168..701824061 100644 --- a/cmd/entire/cli/strategy/push_v2.go +++ b/cmd/entire/cli/strategy/push_v2.go @@ -8,6 +8,7 @@ import ( "io" "log/slog" "os" + "os/exec" "slices" "sort" "strings" @@ -26,6 +27,8 @@ import ( ) // tryPushRef attempts to push a custom ref using an explicit refspec. +var errNoRemoteRotationRefs = errors.New("no remote rotation archive refs") + func tryPushRef(ctx context.Context, target string, refName plumbing.ReferenceName) error { ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() @@ -392,25 +395,14 @@ func fetchAndMergeRef(ctx context.Context, target string, refName plumbing.Refer ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() - fetchTarget, err := remote.ResolveFetchTarget(ctx, target) - if err != nil { - return fmt.Errorf("resolve fetch target: %w", err) - } - // Fetch to a temp ref tmpRefSuffix := strings.ReplaceAll(string(refName), "/", "-") tmpRefName := plumbing.ReferenceName("refs/entire-fetch-tmp/" + tmpRefSuffix) refSpec := fmt.Sprintf("+%s:%s", refName, tmpRefName) - // Recovery flattens fetched trees recursively, so it needs a complete object - // graph instead of the normal blobless sync fetch. - if output, err := remote.Fetch(ctx, remote.FetchOptions{ - Remote: fetchTarget, - RefSpecs: []string{refSpec}, - NoTags: true, - NoFilter: true, - ExtraArgs: []string{"--no-write-fetch-head"}, - }); err != nil { + // Recovery only needs commits and trees to merge checkpoint refs. Blobless + // fetches avoid pulling every transcript blob across large v2 refs. + if output, err := fetchTreeMergeRefs(ctx, target, []string{refSpec}); err != nil { return fmt.Errorf("fetch failed: %s", output) } @@ -424,9 +416,10 @@ func fetchAndMergeRef(ctx context.Context, target string, refName plumbing.Refer // Check for rotation conflict on /full/current if refName == plumbing.ReferenceName(paths.V2FullCurrentRefName) { - remoteRotationArchives, detectErr := detectRemoteRotationArchives(ctx, target, repo) - if detectErr == nil && len(remoteRotationArchives) > 0 { - return handleRotationConflict(ctx, target, fetchTarget, repo, refName, tmpRefName, remoteRotationArchives) + if err := handleRotationConflict(ctx, target, repo, refName, tmpRefName); err == nil { + return nil + } else if !errors.Is(err, errNoRemoteRotationRefs) { + return err } } @@ -485,44 +478,28 @@ func fetchAndMergeRef(ctx context.Context, target string, refName plumbing.Refer return nil } -// detectRemoteRotationArchives discovers archived generation refs on the remote -// that are missing locally or whose local ref hash differs from the remote ref -// hash. Returns them sorted ascending (oldest first). -func detectRemoteRotationArchives(ctx context.Context, target string, repo *git.Repository) ([]string, error) { - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - - output, err := remote.LsRemote(ctx, target, paths.V2FullRefPrefix+"*") +func fetchTreeMergeRefs(ctx context.Context, target string, refSpecs []string) ([]byte, error) { + fetchTarget, err := remote.ResolveFilteredFetchTarget(ctx, target) if err != nil { - return nil, fmt.Errorf("ls-remote failed: %w", err) + return nil, fmt.Errorf("resolve fetch target: %w", err) } - var remoteRotationArchives []string - for line := range strings.SplitSeq(strings.TrimSpace(string(output)), "\n") { - if line == "" { - continue - } - parts := strings.Fields(line) - if len(parts) < 2 { - continue - } - refName := parts[1] - suffix := strings.TrimPrefix(refName, paths.V2FullRefPrefix) - if suffix == "current" || !checkpoint.GenerationRefPattern.MatchString(suffix) { - continue - } - if len(parts[0]) != 40 { - return nil, fmt.Errorf("invalid remote archive hash %q for %s", parts[0], refName) - } - remoteHash := plumbing.NewHash(parts[0]) - localRef, err := repo.Reference(plumbing.ReferenceName(refName), true) - if err != nil || localRef.Hash() != remoteHash { - remoteRotationArchives = append(remoteRotationArchives, suffix) - } + fetch := func(fetchCtx context.Context, extra ...string) ([]byte, error) { + args := append([]string{"--no-write-fetch-head"}, extra...) + return remote.Fetch(fetchCtx, remote.FetchOptions{ + Remote: fetchTarget, + RefSpecs: refSpecs, + NoTags: true, + NoFilter: true, + ExtraArgs: args, + }) } - sort.Strings(remoteRotationArchives) - return remoteRotationArchives, nil + output, fetchErr := fetch(ctx, "--filter=blob:none") + if fetchErr != nil { + output, fetchErr = fetch(ctx) + } + return output, fetchErr } type fetchedRemoteRotationArchive struct { @@ -558,28 +535,32 @@ func readFetchedRemoteRotationArchive(repo *git.Repository, archive string) (fet }, nil } -func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, archives []string, localCurrentHash plumbing.Hash) (fetchedRemoteRotationArchive, error) { - refSpecs := make([]string, 0, len(archives)) - archiveTmpRefs := make([]plumbing.ReferenceName, 0, len(archives)) +func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, localCurrentHash plumbing.Hash) (fetchedRemoteRotationArchive, error) { + refSpec := fmt.Sprintf("+%s*:%s*", paths.V2FullRefPrefix, archiveTmpRefPrefix()) - for _, archive := range archives { - archiveRefName := plumbing.ReferenceName(paths.V2FullRefPrefix + archive) - archiveTmpRef := archiveTmpRefName(archive) - refSpecs = append(refSpecs, fmt.Sprintf("+%s:%s", archiveRefName, archiveTmpRef)) - archiveTmpRefs = append(archiveTmpRefs, archiveTmpRef) - } - - // These archive commits are read immediately through go-git for tree - // flattening, so fetch the complete refs rather than blobless packfiles. - if output, fetchErr := remote.Fetch(ctx, remote.FetchOptions{ - Remote: fetchTarget, - RefSpecs: refSpecs, - NoTags: true, - NoFilter: true, - ExtraArgs: []string{"--no-write-fetch-head"}, - }); fetchErr != nil { + if repo, openErr := OpenRepository(ctx); openErr == nil { + cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs(repo)) + } + + // Probe with --filter=blob:none; rotation recovery only needs commit and + // tree objects. Fall back to an unfiltered fetch if the server refuses. + fetch := func(fetchCtx context.Context, extra ...string) ([]byte, error) { + args := append([]string{"--no-write-fetch-head"}, extra...) + return remote.Fetch(fetchCtx, remote.FetchOptions{ + Remote: fetchTarget, + RefSpecs: []string{refSpec}, + NoTags: true, + NoFilter: true, + ExtraArgs: args, + }) + } + output, fetchErr := fetch(ctx, "--filter=blob:none") + if fetchErr != nil { + output, fetchErr = fetch(ctx) + } + if fetchErr != nil { if repo, openErr := OpenRepository(ctx); openErr == nil { - cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs) + cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs(repo)) } return fetchedRemoteRotationArchive{}, fmt.Errorf("fetch archived generations failed: %s", output) } @@ -588,106 +569,124 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, if err != nil { return fetchedRemoteRotationArchive{}, fmt.Errorf("reopen repository after fetching archived generations: %w", err) } - tmpRefsToCleanup := archiveTmpRefs + tmpRefsToCleanup := archiveTmpRefs(repo) defer func() { cleanupFetchedArchiveTmpRefs(repo, tmpRefsToCleanup) }() - localCurrentAncestors, ok := currentGenerationAncestors(ctx, repo, localCurrentHash) - if !ok { - return fetchedRemoteRotationArchive{}, errors.New("failed to read local /full/current history") + archives := fetchedArchiveSuffixes(repo, tmpRefsToCleanup) + if len(archives) == 0 { + return fetchedRemoteRotationArchive{}, errNoRemoteRotationRefs } + + // Bound the ancestry walk so disjoint-history repos fail fast instead of + // scanning every archive. A future /full/root anchor replaces this. + walkCtx, cancelWalk := context.WithTimeout(ctx, rotationAncestryWalkBudget) + defer cancelWalk() + walked := 0 for _, archive := range archives { + if walkCtx.Err() != nil { + break + } + walked++ fetched, err := readFetchedRemoteRotationArchive(repo, archive) if err != nil { return fetchedRemoteRotationArchive{}, err } - if archiveSharesHistoryWithCurrentGeneration(ctx, repo, localCurrentAncestors, fetched.ref.Hash()) { + if commitsShareHistory(walkCtx, localCurrentHash, fetched.ref.Hash()) { tmpRefsToCleanup = removeRef(tmpRefsToCleanup, fetched.tmpRefName) return fetched, nil } } - return fetchedRemoteRotationArchive{}, errors.New("no remote archive shares history with local /full/current") + err = errors.New("no remote archive shares history with local /full/current") + if errors.Is(walkCtx.Err(), context.DeadlineExceeded) { + err = fmt.Errorf("%w (walk budget exhausted after %d/%d archives)", err, walked, len(archives)) + } else if walkCtx.Err() != nil { + err = fmt.Errorf("%w: %w", err, walkCtx.Err()) + } + return fetchedRemoteRotationArchive{}, err } +// rotationAncestryWalkBudget caps the per-archive ancestry walk. var so tests can lower it. +var rotationAncestryWalkBudget = 1 * time.Second //nolint:gochecknoglobals // test override + func archiveTmpRefName(archive string) plumbing.ReferenceName { - return plumbing.ReferenceName("refs/entire-fetch-tmp/archive-" + archive) + return plumbing.ReferenceName(archiveTmpRefPrefix() + archive) } -func cleanupFetchedArchiveTmpRefs(repo *git.Repository, tmpRefs []plumbing.ReferenceName) { - for _, tmpRef := range tmpRefs { - _ = repo.Storer.RemoveReference(tmpRef) //nolint:errcheck // cleanup is best-effort - } +func archiveTmpRefPrefix() string { + return "refs/entire-fetch-tmp/archive-" } -func currentGenerationAncestors(ctx context.Context, repo *git.Repository, currentHash plumbing.Hash) (map[plumbing.Hash]struct{}, bool) { - ancestors := make(map[plumbing.Hash]struct{}) - iter, err := repo.Log(&git.LogOptions{From: currentHash}) +func archiveTmpRefs(repo *git.Repository) []plumbing.ReferenceName { + iter, err := repo.References() if err != nil { - return nil, false + return nil } defer iter.Close() - count := 0 - _ = iter.ForEach(func(c *object.Commit) error { //nolint:errcheck // Best-effort search, errors are non-fatal - if err := ctx.Err(); err != nil { - return err //nolint:wrapcheck // Propagating context cancellation - } - count++ - if count > MaxCommitTraversalDepth { - return errStop + var refs []plumbing.ReferenceName + prefix := archiveTmpRefPrefix() + _ = iter.ForEach(func(ref *plumbing.Reference) error { //nolint:errcheck // Best-effort cleanup/listing + if strings.HasPrefix(ref.Name().String(), prefix) { + refs = append(refs, ref.Name()) } - ancestors[c.Hash] = struct{}{} return nil }) - return ancestors, true + return refs } -func archiveSharesHistoryWithCurrentGeneration(ctx context.Context, repo *git.Repository, currentAncestors map[plumbing.Hash]struct{}, archiveHash plumbing.Hash) bool { - if _, ok := currentAncestors[archiveHash]; ok { - return true +func fetchedArchiveSuffixes(repo *git.Repository, tmpRefs []plumbing.ReferenceName) []string { + archives := make([]string, 0, len(tmpRefs)) + prefix := archiveTmpRefPrefix() + for _, tmpRef := range tmpRefs { + suffix, ok := strings.CutPrefix(tmpRef.String(), prefix) + if !ok || suffix == "current" || !checkpoint.GenerationRefPattern.MatchString(suffix) { + continue + } + if _, err := repo.Reference(tmpRef, true); err != nil { + continue + } + archives = append(archives, suffix) } + sort.Sort(sort.Reverse(sort.StringSlice(archives))) + return archives +} - iter, err := repo.Log(&git.LogOptions{From: archiveHash}) - if err != nil { - return false +func cleanupFetchedArchiveTmpRefs(repo *git.Repository, tmpRefs []plumbing.ReferenceName) { + for _, tmpRef := range tmpRefs { + _ = repo.Storer.RemoveReference(tmpRef) //nolint:errcheck // cleanup is best-effort } - defer iter.Close() +} - found := false - count := 0 - _ = iter.ForEach(func(c *object.Commit) error { //nolint:errcheck // Best-effort search, errors are non-fatal - if err := ctx.Err(); err != nil { - return err //nolint:wrapcheck // Propagating context cancellation - } - count++ - if count > MaxCommitTraversalDepth { - return errStop - } - if _, ok := currentAncestors[c.Hash]; ok { - found = true - return errStop - } - return nil - }) - return found +func commitsShareHistory(ctx context.Context, a, b plumbing.Hash) bool { + if a == b { + return true + } + + cmd := exec.CommandContext(ctx, "git", "merge-base", a.String(), b.String()) + return cmd.Run() == nil } // handleRotationConflict handles the case where remote /full/current was rotated. // Merges local /full/current into the related remote archived generation to avoid // duplicating checkpoint data, then adopts remote's /full/current as local. -func handleRotationConflict(ctx context.Context, target, fetchTarget string, repo *git.Repository, refName, tmpRefName plumbing.ReferenceName, remoteRotationArchives []string) error { +func handleRotationConflict(ctx context.Context, target string, repo *git.Repository, refName, tmpRefName plumbing.ReferenceName) error { localRef, err := repo.Reference(refName, true) if err != nil { return fmt.Errorf("failed to get local ref: %w", err) } - archive, err := fetchRelatedRemoteRotationArchive(ctx, fetchTarget, remoteRotationArchives, localRef.Hash()) + archiveFetchTarget, err := remote.ResolveFilteredFetchTarget(ctx, target) + if err != nil { + return fmt.Errorf("resolve archive fetch target: %w", err) + } + archive, err := fetchRelatedRemoteRotationArchive(ctx, archiveFetchTarget, localRef.Hash()) if err != nil { return fmt.Errorf("failed to find related archived generation: %w", err) } - // fetchRelatedRemoteRotationArchive fetches via git CLI, so continue with - // the fresh go-git handle it used to avoid stale pack indexes. + // fetchRelatedRemoteRotationArchive fetches via git CLI, so continue with a + // go-git handle opened after that fetch. repo = archive.repo defer func() { _ = repo.Storer.RemoveReference(archive.tmpRefName) //nolint:errcheck // cleanup is best-effort diff --git a/cmd/entire/cli/strategy/push_v2_test.go b/cmd/entire/cli/strategy/push_v2_test.go index 59722c1bb..8e4ce8ab9 100644 --- a/cmd/entire/cli/strategy/push_v2_test.go +++ b/cmd/entire/cli/strategy/push_v2_test.go @@ -202,12 +202,14 @@ func refContainsV2Checkpoint(t *testing.T, repo *git.Repository, refName plumbin func TestFetchAndMergeRef_MergesTrees(t *testing.T) { ctx := context.Background() refName := plumbing.ReferenceName(paths.V2MainRefName) + srcCP := id.MustCheckpointID("aabbccddeeff") + localCP := id.MustCheckpointID("112233445566") // Create source repo with a v2 /main ref containing one checkpoint srcDir := setupRepoWithV2Ref(t) srcRepo, err := git.PlainOpen(srcDir) require.NoError(t, err) - writeV2Checkpoint(t, srcRepo, id.MustCheckpointID("aabbccddeeff"), "session-src") + writeV2Checkpoint(t, srcRepo, srcCP, "session-src") // Create a bare "remote" and push src to it bareDir := t.TempDir() @@ -215,22 +217,33 @@ func TestFetchAndMergeRef_MergesTrees(t *testing.T) { initCmd.Dir = bareDir initCmd.Env = testutil.GitIsolatedEnv() require.NoError(t, initCmd.Run()) + enableFilteredFetchServingForTest(t, bareDir) + bareURL := "file://" + bareDir pushCmd := exec.CommandContext(ctx, "git", "push", bareDir, string(refName)+":"+string(refName)) pushCmd.Dir = srcDir require.NoError(t, pushCmd.Run()) + bareRepo, err := git.PlainOpen(bareDir) + require.NoError(t, err) + remoteBlobHash := firstBlobInV2CheckpointShard(t, bareRepo, refName, srcCP) + // Create a local repo that also has the ref but with a different checkpoint localDir := setupRepoWithV2Ref(t) + addOrigin := exec.CommandContext(ctx, "git", "remote", "add", "origin", bareURL) + addOrigin.Dir = localDir + addOrigin.Env = testutil.GitIsolatedEnv() + out, err := addOrigin.CombinedOutput() + require.NoError(t, err, "add origin failed: %s", out) localRepo, err := git.PlainOpen(localDir) require.NoError(t, err) - writeV2Checkpoint(t, localRepo, id.MustCheckpointID("112233445566"), "session-local") + writeV2Checkpoint(t, localRepo, localCP, "session-local") t.Chdir(localDir) // Fetch and merge — should combine both checkpoints - err = fetchAndMergeRef(ctx, bareDir, refName) + err = fetchAndMergeRef(ctx, "origin", refName) require.NoError(t, err) // Verify merged tree contains both checkpoints on /main @@ -259,6 +272,16 @@ func TestFetchAndMergeRef_MergesTrees(t *testing.T) { } assert.True(t, hasAA, "merged tree should contain checkpoint aabbccddeeff") assert.True(t, has11, "merged tree should contain checkpoint 112233445566") + require.Error(t, mergedRepo.Storer.HasEncodedObject(remoteBlobHash), + "remote checkpoint blob must NOT be locally available; recovery should avoid blob downloads") + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.promisor")) + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.partialclonefilter")) + + require.NoError(t, tryPushRef(ctx, bareURL, refName)) + bareRepo, err = git.PlainOpen(bareDir) + require.NoError(t, err) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, refName, srcCP)) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, refName, localCP)) } // TestPushV2Refs_SkipsUnrecordedArchiveRefs verifies that pushV2Refs pushes @@ -809,40 +832,6 @@ func TestPushV2Refs_RepeatedLocalRotationsBeforePushPublishesAllArchives(t *test "current should contain current checkpoint") } -func TestDetectRemoteRotationArchives_IncludesSameNameDifferentHash(t *testing.T) { - t.Parallel() - ctx := context.Background() - archiveRef := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000001") - - localDir := setupRepoWithV2Ref(t) - localRepo, err := git.PlainOpen(localDir) - require.NoError(t, err) - localHash := writeV2ArchiveRef(t, localRepo, archiveRef, "local archive") - - remoteDir := setupRepoWithV2Ref(t) - remoteRepo, err := git.PlainOpen(remoteDir) - require.NoError(t, err) - remoteHash := writeV2ArchiveRef(t, remoteRepo, archiveRef, "remote archive") - require.NotEqual(t, localHash, remoteHash) - - bareDir := t.TempDir() - initCmd := exec.CommandContext(ctx, "git", "init", "--bare") - initCmd.Dir = bareDir - initCmd.Env = testutil.GitIsolatedEnv() - out, err := initCmd.CombinedOutput() - require.NoError(t, err, "git init --bare failed: %s", out) - - pushArchive := exec.CommandContext(ctx, "git", "push", bareDir, - string(archiveRef)+":"+string(archiveRef)) - pushArchive.Dir = remoteDir - out, err = pushArchive.CombinedOutput() - require.NoError(t, err, "archive push failed: %s", out) - - archives, err := detectRemoteRotationArchives(ctx, bareDir, localRepo) - require.NoError(t, err) - assert.Contains(t, archives, "0000000000001") -} - func TestPrintV2PartialPushResult(t *testing.T) { t.Parallel() @@ -1206,6 +1195,8 @@ func TestFetchAndMergeRef_RemoteRotatedMultipleTimesUsesRelatedArchive(t *testin require.Error(t, err, "selected archive temp ref should be removed after rotation recovery") _, err = localRepo.Reference(archiveTmpRefName("0000000000002"), true) require.Error(t, err, "unselected archive temp ref should be removed after rotation recovery") + _, err = localRepo.Reference(archiveTmpRefName("current"), true) + require.Error(t, err, "wildcard current temp ref should be removed after rotation recovery") assert.True(t, refContainsV2Checkpoint(t, localRepo, archive1Ref, localOnlyCP), "local checkpoint from the first generation should be merged into archive 1") @@ -1219,3 +1210,137 @@ func TestFetchAndMergeRef_RemoteRotatedMultipleTimesUsesRelatedArchive(t *testin assert.True(t, refContainsV2Checkpoint(t, bareRepo, archive2Ref, remoteGen2CP), "remote generation 2 checkpoint should remain in archive 2") } + +// Not parallel: uses t.Chdir() +func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsArchiveBlobDownloads(t *testing.T) { + ctx := context.Background() + fullCurrentRef := plumbing.ReferenceName(paths.V2FullCurrentRefName) + archive1Ref := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000001") + archive2Ref := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000002") + sharedCP := id.MustCheckpointID("aabbccddeeff") + remoteGen1CP := id.MustCheckpointID("112233445566") + remoteGen2CP := id.MustCheckpointID("223344556677") + localOnlyCP := id.MustCheckpointID("ffeeddccbbaa") + + bareDir := t.TempDir() + initCmd := exec.CommandContext(ctx, "git", "init", "--bare") + initCmd.Dir = bareDir + initCmd.Env = testutil.GitIsolatedEnv() + require.NoError(t, initCmd.Run()) + enableFilteredFetchServingForTest(t, bareDir) + bareURL := "file://" + bareDir + + localDir := t.TempDir() + testutil.InitRepo(t, localDir) + testutil.WriteFile(t, localDir, "f.txt", "init") + testutil.GitAdd(t, localDir, "f.txt") + testutil.GitCommit(t, localDir, "init") + addOrigin := exec.CommandContext(ctx, "git", "remote", "add", "origin", bareURL) + addOrigin.Dir = localDir + addOrigin.Env = testutil.GitIsolatedEnv() + out, err := addOrigin.CombinedOutput() + require.NoError(t, err, "add origin failed: %s", out) + localRepo, err := git.PlainOpen(localDir) + require.NoError(t, err) + writeV2Checkpoint(t, localRepo, sharedCP, "shared-session") + + pushCurrent := exec.CommandContext(ctx, "git", "push", bareDir, + string(fullCurrentRef)+":"+string(fullCurrentRef)) + pushCurrent.Dir = localDir + require.NoError(t, pushCurrent.Run()) + + remoteDir := t.TempDir() + testutil.InitRepo(t, remoteDir) + testutil.WriteFile(t, remoteDir, "f.txt", "init") + testutil.GitAdd(t, remoteDir, "f.txt") + testutil.GitCommit(t, remoteDir, "init") + fetchCurrent := exec.CommandContext(ctx, "git", "fetch", bareDir, + "+"+string(fullCurrentRef)+":"+string(fullCurrentRef)) + fetchCurrent.Dir = remoteDir + require.NoError(t, fetchCurrent.Run()) + + remoteRepo, err := git.PlainOpen(remoteDir) + require.NoError(t, err) + writeV2Checkpoint(t, remoteRepo, remoteGen1CP, "remote-gen-1") + rotateV2CurrentForTest(t, remoteRepo, archive1Ref) + writeV2Checkpoint(t, remoteRepo, remoteGen2CP, "remote-gen-2") + rotateV2CurrentForTest(t, remoteRepo, archive2Ref) + + pushRotated := exec.CommandContext(ctx, "git", "push", "--force", bareDir, + string(fullCurrentRef)+":"+string(fullCurrentRef), + string(archive1Ref)+":"+string(archive1Ref), + string(archive2Ref)+":"+string(archive2Ref)) + pushRotated.Dir = remoteDir + out, pushErr := pushRotated.CombinedOutput() + require.NoError(t, pushErr, "push rotated state failed: %s", out) + + bareRepo, err := git.PlainOpen(bareDir) + require.NoError(t, err) + matchedBlobHash := blobAtTopLevel(t, bareRepo, archive1Ref, paths.GenerationFileName) + unmatchedBlobHash := firstBlobInV2CheckpointShard(t, bareRepo, archive2Ref, remoteGen2CP) + + writeV2Checkpoint(t, localRepo, localOnlyCP, "local-session") + + t.Chdir(localDir) + err = fetchAndMergeRef(ctx, "origin", fullCurrentRef) + require.NoError(t, err) + + localRepo, err = git.PlainOpen(localDir) + require.NoError(t, err) + + require.Error(t, localRepo.Storer.HasEncodedObject(matchedBlobHash), + "matched archive generation.json must NOT be locally available; recovery should avoid full archive top-up") + require.Error(t, localRepo.Storer.HasEncodedObject(unmatchedBlobHash), + "unmatched archive shard blob must NOT be locally available; blobless probe over-fetched") + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.promisor")) + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.partialclonefilter")) + + bareRepo, err = git.PlainOpen(bareDir) + require.NoError(t, err) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, archive1Ref, sharedCP)) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, archive1Ref, localOnlyCP)) +} + +func blobAtTopLevel(t *testing.T, repo *git.Repository, refName plumbing.ReferenceName, filename string) plumbing.Hash { + t.Helper() + ref, err := repo.Reference(refName, true) + require.NoError(t, err) + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + tree, err := commit.Tree() + require.NoError(t, err) + entry, err := tree.FindEntry(filename) + require.NoError(t, err) + return entry.Hash +} + +func firstBlobInV2CheckpointShard(t *testing.T, repo *git.Repository, refName plumbing.ReferenceName, cpID id.CheckpointID) plumbing.Hash { + t.Helper() + ref, err := repo.Reference(refName, true) + require.NoError(t, err) + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + tree, err := commit.Tree() + require.NoError(t, err) + shard, err := tree.Tree(cpID.Path()) + require.NoError(t, err) + entries := make(map[string]object.TreeEntry) + require.NoError(t, checkpoint.FlattenTree(repo, shard, "", entries)) + require.NotEmpty(t, entries, "expected at least one blob in shard %s", cpID.Path()) + for _, entry := range entries { + return entry.Hash + } + return plumbing.ZeroHash +} + +func gitConfigValueForStrategyTest(t *testing.T, dir, key string) string { + t.Helper() + cmd := exec.CommandContext(t.Context(), "git", "config", "--local", "--get", key) + cmd.Dir = dir + cmd.Env = testutil.GitIsolatedEnv() + output, err := cmd.Output() + if err != nil { + return "" + } + return strings.TrimSpace(string(output)) +}