From f1c15a1ae99b2ad169236e9b9684fa1765cbb573 Mon Sep 17 00:00:00 2001
From: Fabian Meumertzheim <fabian@meumertzhe.im>
Date: Tue, 14 Oct 2025 13:12:49 -0700
Subject: [PATCH] Make naming scheme for repo contents cache entries more
 reliable

Use a UUID to name entries rather than a counter file. This has a number of advantages:

* It's simpler.
* Moving to the cache no longer requires exclusive locking on the predeclared inputs hash.
* Addresses potential repo staleness in case a counter ends up being reused after the contents cache has been removed. Consider the following situation:
    - Repo foo is fetched for the first time and moved into the repo contents cache under <cache>/<hash>/1 since there is no `counter` file.
    - The entire <cache> directory is deleted while the Bazel server is still alive.
    - On the next invocation, Skyframe FS diffing notices that <cache>/<hash>/1 is missing and marks it as non-existent.
    - Repo foo is refetched and moved into the repo contents cache under <cache>/<hash>/1 since there is again no `counter` file.
    - The `FileStateValue` for that path continues to mark it as non-existent, which causes all Skyframe dependents to still consider it to be missing even after it has been restored.
  Using a UUID prevents this since the refetched repo would symlink to a directory for which `FileStateFunction` hasn't been evaluated yet.

Work towards #26450

Closes #26683.

PiperOrigin-RevId: 819365781
Change-Id: I357246747632a36f190c8748f87511e3980bf2e4
(cherry picked from commit e66fe55aa187e8eaacbbd949d7e02e2a8cc56ce3)
---
 .../repository/cache/RepoContentsCache.java   | 78 +++++++------------
 1 file changed, 27 insertions(+), 51 deletions(-)
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java
index 2f72a2cf9ae6e5..d7faafe70bb9fb 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java
@@ -31,10 +31,10 @@
 import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.build.lib.vfs.Symlinks;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.time.Instant;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.UUID;
 import javax.annotation.Nullable;
@@ -47,12 +47,9 @@
  * transitive bzl digest, repo attrs, starlark semantics, etc). Each distinct predeclared inputs
  * hash is its own entry directory in the first layer.
  *
- * <p>Inside each entry directory are pairs of directories and files {@code <N, N.recorded_inputs>}
- * where {@code N} is an integer. The file {@code N.recorded_inputs} contains the recorded inputs
- * and their values of a cached repo, and the directory {@code N} contains the cached repo contents.
- * There is also a file named {@code counter} that stores the next available {@code N} for this
- * entry directory, and a file named {@code lock} to ensure exclusive access to the {@code counter}
- * file.
+ * <p>Inside each entry directory are pairs of directories and files {@code <UUID,
+ * UUID.recorded_inputs>}. The file {@code UUID.recorded_inputs} contains the recorded inputs and
+ * their values of a cached repo, and the directory {@code UUID} contains the cached repo contents.
  *
  * <p>On a cache hit (that is, the predeclared inputs hash matches, and recorded inputs are
  * up-to-date), the recorded inputs file has its mtime updated. Cached repos whose recorded inputs
@@ -124,17 +121,19 @@ public ImmutableList<CandidateRepo> getCandidateRepos(String predeclaredInputHas
     Preconditions.checkState(path != null);
     Path entryDir = path.getRelative(predeclaredInputHash);
     try {
+      // Prefer more recently used cache entries over older ones. They're more likely to be
+      // up-to-date; plus, if a repo is force-fetched, we want to use the new repo instead of always
+      // being stuck with the old one. Since the inputs file is touched on use, we can just sort by
+      // mtime. This is slightly more complex than in runGc below as the files may be touched
+      // concurrently and we need to ensure that the equality relation is consistent.
+      var mtimes = new HashMap<Path, Long>();
       return entryDir.getDirectoryEntries().stream()
           .filter(path -> path.getBaseName().endsWith(RECORDED_INPUTS_SUFFIX))
-          // Prefer newer cache entries over older ones. They're more likely to be up-to-date; plus,
-          // if a repo is force-fetched, we want to use the new repo instead of always being stuck
-          // with the old one.
-          // To "prefer newer cache entries", we sort the entry file names by length DESC and then
-          // lexicographically DESC. This approximates sorting by converting to int and then DESC,
-          // but is defensive against non-numerically named entries.
           .sorted(
-              Comparator.comparing((Path path) -> path.getBaseName().length())
-                  .thenComparing(Path::getBaseName)
+              Comparator.comparingLong(
+                      (Path path) ->
+                          mtimes.computeIfAbsent(
+                              path, RepoContentsCache::getLastModifiedTimeOrZero))
                   .reversed())
           .map(CandidateRepo::fromRecordedInputsFile)
           .collect(toImmutableList());
@@ -163,9 +162,9 @@ public Path moveToCache(
     Preconditions.checkState(path != null);
 
     Path entryDir = path.getRelative(predeclaredInputHash);
-    String counter = getNextCounterInDir(entryDir);
-    Path cacheRecordedInputsFile = entryDir.getChild(counter + RECORDED_INPUTS_SUFFIX);
-    Path cacheRepoDir = entryDir.getChild(counter);
+    String uniqueEntryName = UUID.randomUUID().toString();
+    Path cacheRecordedInputsFile = entryDir.getChild(uniqueEntryName + RECORDED_INPUTS_SUFFIX);
+    Path cacheRepoDir = entryDir.getChild(uniqueEntryName);
 
     cacheRepoDir.deleteTree();
     cacheRepoDir.getParentDirectory().createDirectoryAndParents();
@@ -187,27 +186,6 @@ public Path moveToCache(
     return cacheRepoDir;
   }
 
-  private static String getNextCounterInDir(Path entryDir)
-      throws IOException, InterruptedException {
-    Path counterFile = entryDir.getRelative("counter");
-    // This use of FileSystemLock.get is safe since the predeclared input hash is part of entryDir's
-    // path and in particular includes the canonical repository name. This ensures that the same
-    // lock file will not be acquired concurrently by multiple threads, which isn't supported.
-    try (var lock = FileSystemLock.get(entryDir.getRelative("lock"), LockMode.EXCLUSIVE)) {
-      int c = 0;
-      if (counterFile.exists()) {
-        try {
-          c = Integer.parseInt(FileSystemUtils.readContent(counterFile, StandardCharsets.UTF_8));
-        } catch (NumberFormatException e) {
-          // ignored
-        }
-      }
-      String counter = Integer.toString(c + 1);
-      FileSystemUtils.writeContent(counterFile, StandardCharsets.UTF_8, counter);
-      return counter;
-    }
-  }
-
   public void acquireSharedLock() throws IOException, InterruptedException {
     Preconditions.checkState(path != null);
     Preconditions.checkState(sharedLock == null, "this process already has the shared lock");
@@ -273,18 +251,7 @@ private void runGc(Duration maxAge) throws InterruptedException, IOException {
       var recordedInputsFiles =
           path.getChild(dirent.getName()).getDirectoryEntries().stream()
               .filter(file -> file.getBaseName().endsWith(RECORDED_INPUTS_SUFFIX))
-              .sorted(
-                  comparingLong(
-                          (Path path) -> {
-                            try {
-                              return path.getLastModifiedTime();
-                            } catch (IOException e) {
-                              // If we can't read the mtime from the entry, it's broken and treated
-                              // as outdated.
-                              return 0;
-                            }
-                          })
-                      .reversed())
+              .sorted(comparingLong(RepoContentsCache::getLastModifiedTimeOrZero).reversed())
               .collect(toImmutableList());
       var seen = new HashSet<HashCode>();
       for (Path recordedInputsFile : recordedInputsFiles) {
@@ -305,4 +272,13 @@ private void runGc(Duration maxAge) throws InterruptedException, IOException {
       }
     }
   }
+
+  private static long getLastModifiedTimeOrZero(Path path) {
+    try {
+      return path.getLastModifiedTime();
+    } catch (IOException e) {
+      // If we can't read the mtime from the entry, it's broken and treated as outdated.
+      return 0L;
+    }
+  }
 }