diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java index 7e52ddd22f8477..5b081891b99a6b 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java @@ -1740,7 +1740,11 @@ UploadManifest buildUploadManifest(RemoteAction action, SpawnResult spawnResult) spawnResult.exitCode(), spawnResult.getStartTime(), spawnResult.getWallTimeInMs(), - /* preserveExecutableBit= */ false); + /* preserveExecutableBit= */ false, + // Preserve the historical (and somewhat inconsistent) handling of + // absolute symlinks for spawn outputs: resolve them to the file or + // directory they point to. See {@link UploadManifest#addFiles}. + /* preserveAbsoluteSymlinks= */ false); } } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java index 7111f7c2457e48..53bd2e8594b8b9 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java @@ -64,6 +64,7 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Collection; +import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -317,6 +318,27 @@ private void doMaterialize(RepositoryName repo, ExtendedEventHandler reporter) // target. prefetch(walkResult.symlinks()); + // Materialize any other repos whose contents are referenced by absolute symlinks in this + // repo. The repo rule's impl is not re-executed on a cache hit, so any sibling repos that + // the original execution materialized through `rctx.path(label)` (typical for "subproject" + // style repos that mirror entries of a sibling) are not materialized again here unless we + // explicitly request it. Without this, the symlinks would dangle at the OS level. + var referencedRepoNames = new LinkedHashSet(); + for (var symlinkPath : walkResult.symlinks()) { + var target = externalFs.getPath(symlinkPath).readSymbolicLink(); + if (target.isAbsolute() + && target.startsWith(externalDirectory) + && target.segmentCount() > externalDirectorySegmentCount) { + var referencedRepoName = target.getSegment(externalDirectorySegmentCount); + if (!referencedRepoName.equals(repo.getName())) { + referencedRepoNames.add(referencedRepoName); + } + } + } + for (var referencedRepoName : referencedRepoNames) { + ensureMaterialized(RepositoryName.createUnvalidated(referencedRepoName), reporter); + } + // After the repo has been copied, atomically materialize the marker file. This ensures that the // repo doesn't have to be refetched after the next server restart. var markerFile = nativeFs.getPath(externalDirectory.getChild(repo.getMarkerFileName())); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index 06bc24c8b8f0f1..e7a12733bf26db 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -211,7 +211,15 @@ public void addToCache( /* exitCode= */ 0, /* startTime= */ Instant.now(), /* wallTimeInMs= */ 0, - /* preserveExecutableBit= */ true) + /* preserveExecutableBit= */ true, + // Upload absolute symlinks (e.g. those created by + // `rctx.symlink(Label(...), ...)`) as symlinks instead of + // resolving them to the file or directory they point to. + // Resolving them pulls in content from a sibling repo, and + // any relative symlinks inside that content would then be + // materialized at a location where their `..` targets no + // longer resolve. + /* preserveAbsoluteSymlinks= */ true) .upload(context, cache, reporter); } catch (ExecException | IOException e) { reporter.handle( diff --git a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java index 7ce13a4cc4b073..33a3bd568cdf54 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java +++ b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java @@ -94,6 +94,7 @@ public class UploadManifest { private final ActionResult.Builder result; private final boolean allowAbsoluteSymlinks; private final boolean preserveExecutableBit; + private final boolean preserveAbsoluteSymlinks; private final ConcurrentHashMap digestToFile = new ConcurrentHashMap<>(); private final ConcurrentHashMap digestToBlobs = new ConcurrentHashMap<>(); @Nullable private ActionKey actionKey; @@ -112,7 +113,8 @@ public static UploadManifest create( int exitCode, Instant startTime, int wallTimeInMs, - boolean preserveExecutableBit) + boolean preserveExecutableBit, + boolean preserveAbsoluteSymlinks) throws ExecException, IOException, InterruptedException { ActionResult.Builder result = ActionResult.newBuilder(); result.setExitCode(exitCode); @@ -125,7 +127,8 @@ public static UploadManifest create( /* allowAbsoluteSymlinks= */ cacheCapabilities .getSymlinkAbsolutePathStrategy() .equals(SymlinkAbsolutePathStrategy.Value.ALLOWED), - preserveExecutableBit); + preserveExecutableBit, + preserveAbsoluteSymlinks); manifest.addFiles(outputFiles); if (outErr != null) { manifest.setStdoutStderr(outErr); @@ -177,20 +180,39 @@ public UploadManifest( remotePathResolver, result, allowAbsoluteSymlinks, - /* preserveExecutableBit= */ false); + /* preserveExecutableBit= */ false, + /* preserveAbsoluteSymlinks= */ false); } + @VisibleForTesting public UploadManifest( DigestUtil digestUtil, RemotePathResolver remotePathResolver, ActionResult.Builder result, boolean allowAbsoluteSymlinks, boolean preserveExecutableBit) { + this( + digestUtil, + remotePathResolver, + result, + allowAbsoluteSymlinks, + preserveExecutableBit, + /* preserveAbsoluteSymlinks= */ false); + } + + public UploadManifest( + DigestUtil digestUtil, + RemotePathResolver remotePathResolver, + ActionResult.Builder result, + boolean allowAbsoluteSymlinks, + boolean preserveExecutableBit, + boolean preserveAbsoluteSymlinks) { this.digestUtil = digestUtil; this.remotePathResolver = remotePathResolver; this.result = result; this.allowAbsoluteSymlinks = allowAbsoluteSymlinks; this.preserveExecutableBit = preserveExecutableBit; + this.preserveAbsoluteSymlinks = preserveAbsoluteSymlinks; } private void setStdoutStderr(FileOutErr outErr) throws IOException { @@ -224,7 +246,9 @@ private void setStdoutStderr(FileOutErr outErr) throws IOException { * *

For historical reasons, non-dangling absolute symlinks are uploaded as the file or directory * they point to. This is inconsistent with the treatment of non-dangling relative symlinks, which - * are uploaded as such, but fixing it would now require an incompatible change. For the purposes + * are uploaded as such, but fixing it for spawn outputs would now require an incompatible change. + * Callers that need consistent symlink handling (such as the remote repo contents cache) can set + * {@code preserveAbsoluteSymlinks} to upload absolute symlinks as symlinks too. For the purposes * of this check, a looping symlink is considered dangling. * *

All files are uploaded with the executable bit set, in accordance with input Merkle trees. @@ -270,7 +294,7 @@ void addFiles(Collection files) throws ExecException, IOException, Interru continue; } if (statFollow.isFile() && !statFollow.isSpecialFile()) { - if (target.isAbsolute()) { + if (target.isAbsolute() && !preserveAbsoluteSymlinks) { // Symlink to file uploaded as a file. addFile(digestUtil.compute(file, statFollow), file, statNoFollow); } else { @@ -280,7 +304,7 @@ void addFiles(Collection files) throws ExecException, IOException, Interru continue; } if (statFollow.isDirectory()) { - if (target.isAbsolute()) { + if (target.isAbsolute() && !preserveAbsoluteSymlinks) { // Symlink to directory uploaded as a directory. addDirectory(file); } else { @@ -487,7 +511,7 @@ private void visit(Path path, Dirent.Type type) { } catch (FileSymlinkLoopException e) { // Treat a looping symlink as a dangling symlink. } - if (statFollow == null || !target.isAbsolute()) { + if (statFollow == null || !target.isAbsolute() || preserveAbsoluteSymlinks) { // Symlink uploaded as a symlink. if (target.isAbsolute()) { checkAbsoluteSymlinkAllowed(path, target); diff --git a/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java b/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java index 4cff75fd81968f..3bd758f722a6d3 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java @@ -822,7 +822,8 @@ private ActionResult upload( /* exitCode= */ 0, /* startTime= */ null, /* wallTimeInMs= */ 0, - /* preserveExecutableBit= */ false); + /* preserveExecutableBit= */ false, + /* preserveAbsoluteSymlinks= */ false); return uploadManifest.upload(context, combinedCache, NullEventHandler.INSTANCE); } diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index 4f54dd04dff67b..cbd8f529418571 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -1058,6 +1058,132 @@ def testMaterializationWithDanglingExternalSymlink(self): expect_symlinks=True, watch_dep_file=False ) + def testMaterializationWithRelativeSymlinkEscapingMirroredSubdir(self): + # Mirrors the layout used by toolchains_llvm_bootstrapped where libcxx is + # created by rctx.symlink-ing each top-level entry of an inner subdirectory + # of a "raw" archive repo. The inner archive contains a relative symlink + # whose target escapes that subdirectory and points into a sibling + # directory of the raw archive (e.g. ../../../llvm/include/...). + # + # Without RRCC, accesses through the top-level absolute symlink resolve the + # deep relative symlink against the raw archive layout, so it stays valid. + # With RRCC, the top-level absolute symlinks are uploaded as resolved + # directories, so the materialized subproject has the deep relative symlink + # at a new location where its target no longer resolves. + if self.IsWindows(): + self.ScratchFile( + '.bazelrc', + [ + 'startup --windows_enable_symlinks', + ], + mode='a', + ) + self.ScratchFile( + 'MODULE.bazel', + [ + 'raw_repo_rule = use_repo_rule("//:raw_repo.bzl", "raw_repo_rule")', + 'raw_repo_rule(name = "raw_repo")', + 'my_repo_rule = use_repo_rule("//:my_repo.bzl", "my_repo_rule")', + 'my_repo_rule(name = "my_repo")', + ( + 'other_repo_rule =' + ' use_repo_rule("//:other_repo.bzl", "other_repo_rule")' + ), + 'other_repo_rule(name = "other")', + ], + ) + self.ScratchFile('BUILD.bazel') + # raw_repo contains "subproject/sub/inner_link.txt", a relative symlink + # that escapes the "subproject" directory. + self.ScratchFile( + 'raw_repo.bzl', + [ + 'def _raw_repo_impl(rctx):', + ' rctx.file("BUILD", "")', + ' rctx.file("subproject/BUILD",' + ' "exports_files([\'sub/regular.txt\',' + ' \'sub/inner_link.txt\'])")', + ' rctx.file("subproject/sub/regular.txt", "regular")', + ' rctx.file("other_dir/data.txt", "hello")', + ' res = rctx.execute(["ln", "-s",' + ' "../../other_dir/data.txt",' + ' "subproject/sub/inner_link.txt"])', + ' if res.return_code != 0:', + ' fail("ln failed: " + res.stderr)', + ' print("JUST FETCHED RAW_REPO")', + ' return rctx.repo_metadata(reproducible=True)', + 'raw_repo_rule = repository_rule(_raw_repo_impl)', + ], + ) + # my_repo mirrors raw_repo/subproject by symlinking each of its top-level + # entries. The symlinks are absolute, so when RRCC uploads my_repo they + # get resolved into the actual directory contents. + self.ScratchFile( + 'my_repo.bzl', + [ + 'def _my_repo_impl(rctx):', + ( + ' src_dir =' + ' rctx.path(Label("@raw_repo//:BUILD")).dirname.get_child(' + '"subproject")' + ), + ' for entry in src_dir.readdir():', + ' rctx.symlink(entry, entry.basename)', + ' print("JUST FETCHED MY_REPO")', + ' return rctx.repo_metadata(reproducible=True)', + 'my_repo_rule = repository_rule(_my_repo_impl)', + ], + ) + # other_repo reads the deep file through my_repo, which triggers + # materialization. It is not reproducible so it always re-runs and + # therefore always exercises the materialization path. + self.ScratchFile( + 'other_repo.bzl', + [ + 'def _other_impl(rctx):', + ' content = rctx.read(rctx.attr.deep_file)', + ' rctx.file("BUILD", "filegroup(name = \'haha\')")', + ' rctx.file("contents.txt", content)', + ' return rctx.repo_metadata()', + 'other_repo_rule = repository_rule(', + ' _other_impl,', + ' attrs = {', + ' "deep_file": attr.label(default =' + ' "@my_repo//:sub/inner_link.txt"),', + ' },', + ')', + ], + ) + + repo_dir = self.RepoDir('my_repo') + inner_link = os.path.join(repo_dir, 'sub/inner_link.txt') + + # First fetch: not cached. The raw_repo on-disk layout makes the deep + # symlink resolve through my_repo's top-level absolute symlink. + _, _, stderr = self.RunBazel(['build', '@other//:haha']) + stderr_text = '\n'.join(stderr) + self.assertIn('JUST FETCHED RAW_REPO', stderr_text) + self.assertIn('JUST FETCHED MY_REPO', stderr_text) + self.assertTrue(os.path.exists(inner_link)) + with open(inner_link) as f: + self.assertEqual(f.read(), 'hello') + + # After expunge: my_repo is cached in RRCC, not materialized yet. + self.RunBazel(['clean', '--expunge']) + + # Rebuild: my_repo is loaded from RRCC and materialized when other_repo + # resolves its deep_file label. The deep relative symlink must still + # resolve to readable content. + _, _, stderr = self.RunBazel(['build', '@other//:haha']) + stderr_text = '\n'.join(stderr) + self.assertNotIn('JUST FETCHED MY_REPO', stderr_text) + self.assertTrue( + os.path.exists(inner_link), + '%s should exist after RRCC materialization' % inner_link, + ) + with open(inner_link) as f: + self.assertEqual(f.read(), 'hello') + def testBzlFilePrefetching(self): self.ScratchFile( 'MODULE.bazel', diff --git a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py index 204f676710c940..c2eec02db2ec63 100644 --- a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py @@ -153,6 +153,112 @@ def testCachedAfterCleanExpunge(self): ) self.assertIn('JUST FETCHED', '\n'.join(stderr)) + def testMaterializationWithRelativeSymlinkEscapingMirroredSubdir(self): + # Mirrors the (RRCC-equivalent) test + # remote_repo_contents_cache_test.testMaterializationWithRelativeSymlinkEscapingMirroredSubdir. + # Unlike RRCC, LRCC sidesteps the entire problem class: `replantSymlinks` + # in `RepositoryUtils` marks a repo with cross-repo symlinks as not safe + # for local cache reuse, and `RepositoryFetchFunction` then skips + # `moveToCache` entirely. As a result the repo's impl runs on every + # invocation and the symlinks are recreated against the current output + # base, so there are never dangling references. + if self.IsWindows(): + self.skipTest('Test uses ln -s which is Unix-specific') + self.ScratchFile( + 'MODULE.bazel', + [ + 'raw_repo_rule = use_repo_rule("//:raw_repo.bzl", "raw_repo_rule")', + 'raw_repo_rule(name = "raw_repo")', + 'my_repo_rule = use_repo_rule("//:my_repo.bzl", "my_repo_rule")', + 'my_repo_rule(name = "my_repo")', + ( + 'other_repo_rule =' + ' use_repo_rule("//:other_repo.bzl", "other_repo_rule")' + ), + 'other_repo_rule(name = "other")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'raw_repo.bzl', + [ + 'def _raw_repo_impl(rctx):', + ' rctx.file("BUILD", "")', + ' rctx.file("subproject/BUILD",' + ' "exports_files([\'sub/regular.txt\',' + ' \'sub/inner_link.txt\'])")', + ' rctx.file("subproject/sub/regular.txt", "regular")', + ' rctx.file("other_dir/data.txt", "hello")', + ' res = rctx.execute(["ln", "-s",' + ' "../../other_dir/data.txt",' + ' "subproject/sub/inner_link.txt"])', + ' if res.return_code != 0:', + ' fail("ln failed: " + res.stderr)', + ' print("JUST FETCHED RAW_REPO")', + ' return rctx.repo_metadata(reproducible=True)', + 'raw_repo_rule = repository_rule(_raw_repo_impl)', + ], + ) + self.ScratchFile( + 'my_repo.bzl', + [ + 'def _my_repo_impl(rctx):', + ' src_dir =' + ' rctx.path(Label("@raw_repo//:BUILD")).dirname.get_child(' + '"subproject")', + ' for entry in src_dir.readdir():', + ' rctx.symlink(entry, entry.basename)', + ' print("JUST FETCHED MY_REPO")', + ' return rctx.repo_metadata(reproducible=True)', + 'my_repo_rule = repository_rule(_my_repo_impl)', + ], + ) + self.ScratchFile( + 'other_repo.bzl', + [ + 'def _other_impl(rctx):', + ' content = rctx.read(rctx.attr.deep_file)', + ' rctx.file("BUILD", "filegroup(name = \'haha\')")', + ' rctx.file("contents.txt", content)', + ' return rctx.repo_metadata()', + 'other_repo_rule = repository_rule(', + ' _other_impl,', + ' attrs = {', + ' "deep_file": attr.label(default =' + ' "@my_repo//:sub/inner_link.txt"),', + ' },', + ')', + ], + ) + + repo_dir = self.repoDir('my_repo') + inner_link = os.path.join(repo_dir, 'sub/inner_link.txt') + + # First fetch: not cached. + _, _, stderr = self.RunBazel(['build', '@other//:haha']) + stderr_text = '\n'.join(stderr) + self.assertIn('JUST FETCHED RAW_REPO', stderr_text) + self.assertIn('JUST FETCHED MY_REPO', stderr_text) + self.assertTrue(os.path.exists(inner_link)) + with open(inner_link) as f: + self.assertEqual(f.read(), 'hello') + + # After expunge: raw_repo is cached (only same-repo symlinks, portable), + # but my_repo is intentionally NOT cached because its cross-repo symlinks + # are not safe to reuse from a shared cache location. The impl runs + # again, recreating the symlinks against the current external root. + self.RunBazel(['clean', '--expunge']) + _, _, stderr = self.RunBazel(['build', '@other//:haha']) + stderr_text = '\n'.join(stderr) + self.assertNotIn('JUST FETCHED RAW_REPO', stderr_text) + self.assertIn('JUST FETCHED MY_REPO', stderr_text) + self.assertTrue( + os.path.exists(inner_link), + '%s should exist after rebuild' % inner_link, + ) + with open(inner_link) as f: + self.assertEqual(f.read(), 'hello') + def testNotCachedWhenPredeclaredInputsChange(self): self.ScratchFile( 'MODULE.bazel', diff --git a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java index fafc3f0e2635eb..7bd41839d40f70 100644 --- a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java +++ b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java @@ -421,7 +421,8 @@ private ActionResult execute( exitCode, startTime, (int) wallTime.toMillis(), - /* preserveExecutableBit= */ false); + /* preserveExecutableBit= */ false, + /* preserveAbsoluteSymlinks= */ false); result = manifest.upload(context, cache, NullEventHandler.INSTANCE); } catch (ExecException e) { if (errStatus == null) {