From f1c15a1ae99b2ad169236e9b9684fa1765cbb573 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Tue, 14 Oct 2025 13:12:49 -0700 Subject: [PATCH] Make naming scheme for repo contents cache entries more reliable Use a UUID to name entries rather than a counter file. This has a number of advantages: * It's simpler. * Moving to the cache no longer requires exclusive locking on the predeclared inputs hash. * Addresses potential repo staleness in case a counter ends up being reused after the contents cache has been removed. Consider the following situation: - Repo foo is fetched for the first time and moved into the repo contents cache under //1 since there is no `counter` file. - The entire directory is deleted while the Bazel server is still alive. - On the next invocation, Skyframe FS diffing notices that //1 is missing and marks it as non-existent. - Repo foo is refetched and moved into the repo contents cache under //1 since there is again no `counter` file. - The `FileStateValue` for that path continues to mark it as non-existent, which causes all Skyframe dependents to still consider it to be missing even after it has been restored. Using a UUID prevents this since the refetched repo would symlink to a directory for which `FileStateFunction` hasn't been evaluated yet. Work towards #26450 Closes #26683. PiperOrigin-RevId: 819365781 Change-Id: I357246747632a36f190c8748f87511e3980bf2e4 (cherry picked from commit e66fe55aa187e8eaacbbd949d7e02e2a8cc56ce3) --- .../repository/cache/RepoContentsCache.java | 78 +++++++------------ 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java index 2f72a2cf9ae6e5..d7faafe70bb9fb 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java @@ -31,10 +31,10 @@ import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.Symlinks; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.UUID; import javax.annotation.Nullable; @@ -47,12 +47,9 @@ * transitive bzl digest, repo attrs, starlark semantics, etc). Each distinct predeclared inputs * hash is its own entry directory in the first layer. * - *

Inside each entry directory are pairs of directories and files {@code } - * where {@code N} is an integer. The file {@code N.recorded_inputs} contains the recorded inputs - * and their values of a cached repo, and the directory {@code N} contains the cached repo contents. - * There is also a file named {@code counter} that stores the next available {@code N} for this - * entry directory, and a file named {@code lock} to ensure exclusive access to the {@code counter} - * file. + *

Inside each entry directory are pairs of directories and files {@code }. The file {@code UUID.recorded_inputs} contains the recorded inputs and + * their values of a cached repo, and the directory {@code UUID} contains the cached repo contents. * *

On a cache hit (that is, the predeclared inputs hash matches, and recorded inputs are * up-to-date), the recorded inputs file has its mtime updated. Cached repos whose recorded inputs @@ -124,17 +121,19 @@ public ImmutableList getCandidateRepos(String predeclaredInputHas Preconditions.checkState(path != null); Path entryDir = path.getRelative(predeclaredInputHash); try { + // Prefer more recently used cache entries over older ones. They're more likely to be + // up-to-date; plus, if a repo is force-fetched, we want to use the new repo instead of always + // being stuck with the old one. Since the inputs file is touched on use, we can just sort by + // mtime. This is slightly more complex than in runGc below as the files may be touched + // concurrently and we need to ensure that the equality relation is consistent. + var mtimes = new HashMap(); return entryDir.getDirectoryEntries().stream() .filter(path -> path.getBaseName().endsWith(RECORDED_INPUTS_SUFFIX)) - // Prefer newer cache entries over older ones. They're more likely to be up-to-date; plus, - // if a repo is force-fetched, we want to use the new repo instead of always being stuck - // with the old one. - // To "prefer newer cache entries", we sort the entry file names by length DESC and then - // lexicographically DESC. This approximates sorting by converting to int and then DESC, - // but is defensive against non-numerically named entries. .sorted( - Comparator.comparing((Path path) -> path.getBaseName().length()) - .thenComparing(Path::getBaseName) + Comparator.comparingLong( + (Path path) -> + mtimes.computeIfAbsent( + path, RepoContentsCache::getLastModifiedTimeOrZero)) .reversed()) .map(CandidateRepo::fromRecordedInputsFile) .collect(toImmutableList()); @@ -163,9 +162,9 @@ public Path moveToCache( Preconditions.checkState(path != null); Path entryDir = path.getRelative(predeclaredInputHash); - String counter = getNextCounterInDir(entryDir); - Path cacheRecordedInputsFile = entryDir.getChild(counter + RECORDED_INPUTS_SUFFIX); - Path cacheRepoDir = entryDir.getChild(counter); + String uniqueEntryName = UUID.randomUUID().toString(); + Path cacheRecordedInputsFile = entryDir.getChild(uniqueEntryName + RECORDED_INPUTS_SUFFIX); + Path cacheRepoDir = entryDir.getChild(uniqueEntryName); cacheRepoDir.deleteTree(); cacheRepoDir.getParentDirectory().createDirectoryAndParents(); @@ -187,27 +186,6 @@ public Path moveToCache( return cacheRepoDir; } - private static String getNextCounterInDir(Path entryDir) - throws IOException, InterruptedException { - Path counterFile = entryDir.getRelative("counter"); - // This use of FileSystemLock.get is safe since the predeclared input hash is part of entryDir's - // path and in particular includes the canonical repository name. This ensures that the same - // lock file will not be acquired concurrently by multiple threads, which isn't supported. - try (var lock = FileSystemLock.get(entryDir.getRelative("lock"), LockMode.EXCLUSIVE)) { - int c = 0; - if (counterFile.exists()) { - try { - c = Integer.parseInt(FileSystemUtils.readContent(counterFile, StandardCharsets.UTF_8)); - } catch (NumberFormatException e) { - // ignored - } - } - String counter = Integer.toString(c + 1); - FileSystemUtils.writeContent(counterFile, StandardCharsets.UTF_8, counter); - return counter; - } - } - public void acquireSharedLock() throws IOException, InterruptedException { Preconditions.checkState(path != null); Preconditions.checkState(sharedLock == null, "this process already has the shared lock"); @@ -273,18 +251,7 @@ private void runGc(Duration maxAge) throws InterruptedException, IOException { var recordedInputsFiles = path.getChild(dirent.getName()).getDirectoryEntries().stream() .filter(file -> file.getBaseName().endsWith(RECORDED_INPUTS_SUFFIX)) - .sorted( - comparingLong( - (Path path) -> { - try { - return path.getLastModifiedTime(); - } catch (IOException e) { - // If we can't read the mtime from the entry, it's broken and treated - // as outdated. - return 0; - } - }) - .reversed()) + .sorted(comparingLong(RepoContentsCache::getLastModifiedTimeOrZero).reversed()) .collect(toImmutableList()); var seen = new HashSet(); for (Path recordedInputsFile : recordedInputsFiles) { @@ -305,4 +272,13 @@ private void runGc(Duration maxAge) throws InterruptedException, IOException { } } } + + private static long getLastModifiedTimeOrZero(Path path) { + try { + return path.getLastModifiedTime(); + } catch (IOException e) { + // If we can't read the mtime from the entry, it's broken and treated as outdated. + return 0L; + } + } }