From 76e558734e2c6222f5f03dd46f2d9a624744d34a Mon Sep 17 00:00:00 2001 From: Sam Estep Date: Mon, 25 May 2026 09:27:11 -0400 Subject: [PATCH 1/6] List releases via Git instead of S3 --- src/main.rs | 172 +++++++++++++++++++++++----------------------------- 1 file changed, 77 insertions(+), 95 deletions(-) diff --git a/src/main.rs b/src/main.rs index 279bc91..a51bbf2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -437,6 +437,14 @@ impl Cache { cmd } + /// Like `git`, but with lazy fetch enabled. Only safe to call from the `fetch` subcommand, + /// which is the one place we allow Git to touch the network. + fn git_lazy(&self) -> Command { + let mut cmd = git(); + cmd.arg("-C").arg(self.path(CacheKey::Git)); + cmd + } + fn sha(&self, rev: &str) -> anyhow::Result { let output = self .git() @@ -531,40 +539,6 @@ impl Cache { } } -/// Query the nix-channels bucket to determine the next release. -async fn prerelease() -> anyhow::Result { - let config = aws_config::defaults(BehaviorVersion::latest()) - .no_credentials() - .region("us-east-1") - .load() - .await; - let s3 = s3::Client::new(&config); - let unstable = [ - Branch::NixpkgsUnstable, - Branch::NixosUnstableSmall, - Branch::NixosUnstable, - ]; - let outputs = try_join_all(unstable.map(|channel| { - s3.get_object() - .bucket("nix-channels") - .key(channel.to_string()) - .send() - })) - .await?; - let mut release = None; - let re = Regex::new(r"(\d\d\.\d\d)pre\d+\.\w+$").unwrap(); - for (output, channel) in outputs.into_iter().zip(unstable) { - let Some(url) = output.website_redirect_location else { - bail!("no redirect URL found for {channel}"); - }; - let Some(caps) = re.captures(&url) else { - bail!("failed to find prerelease version in {url}"); - }; - release = release.max(Some(caps[1].parse().unwrap())); - } - Ok(release.unwrap()) -} - struct PrefixId { re: Regex, } @@ -600,52 +574,44 @@ impl Remote { Self { cache, s3 } } - async fn releases(&self) -> anyhow::Result> { + fn releases(&self) -> anyhow::Result> { + // Stable releases are derived from the `release-*` branches in Nixpkgs, and the current + // prerelease is the version in `lib/.version` on `master`. Both come from Git rather than + // S3 so that we can recognize a new release as soon as branch-off happens, without having + // to wait for the corresponding beta channel to publish its first build. + let oldest = Release { year: 16, month: 9 }; let mut releases = BTreeSet::new(); - for (start, re) in [ - ( - "nixos/", - Regex::new(r"^nixos/(\d\d\.\d\d)(-small)?/$").unwrap(), - ), - ( - "nixpkgs/", - Regex::new(r"^nixpkgs/(\d\d\.\d\d)-darwin/$").unwrap(), - ), - ] { - let mut continuation_token = None; - loop { - let output = self - .s3 - .list_objects_v2() - .bucket(BUCKET) - .prefix(start) - .delimiter("/") - .set_continuation_token(continuation_token) - .send() - .await?; - let prefixes = output - .common_prefixes - .unwrap_or_default() - .into_iter() - .map(|item| item.prefix.ok_or_else(|| anyhow!("missing prefix"))) - .collect::>>()?; - for prefix in prefixes { - if let Some(caps) = re.captures(&prefix) { - let release: Release = caps[1].parse().unwrap(); - // Omit earlier releases: the bucket has no `git-revision` objects for them. - let oldest = Release { year: 16, month: 9 }; - if release >= oldest { - releases.insert(release); - } - } - } - match output.next_continuation_token { - Some(token) => continuation_token = Some(token), - None => break, - }; + let output = self + .cache + .git() + .args([ + "for-each-ref", + "--format=%(refname:strip=2)", + "refs/heads/release-*", + ]) + .output()?; + if !output.status.success() { + bail!("failed to list Nixpkgs release branches"); + } + let re = Regex::new(r"^release-(\d\d\.\d\d)$").unwrap(); + for line in String::from_utf8(output.stdout)?.lines() { + if let Some(caps) = re.captures(line) + && let Ok(release) = caps[1].parse::() + && release >= oldest + { + releases.insert(release); } } - releases.insert(prerelease().await?); + let output = self + .cache + .git_lazy() + .args(["show", "master:lib/.version"]) + .output()?; + if !output.status.success() { + bail!("failed to read lib/.version from Nixpkgs master"); + } + let prerelease: Release = String::from_utf8(output.stdout)?.trim().parse()?; + releases.insert(prerelease); Ok(releases.into_iter().collect()) } @@ -670,14 +636,28 @@ impl Remote { bail!("unexpected extra Git output"); }; let sha = match line.parse() { - Ok(sha) => sha, + Ok(sha) => Some(sha), Err(_) => { let key = format!("{prefix}git-revision"); let output = self.s3.get_object().bucket(BUCKET).key(key).send().await?; - String::from_utf8(output.body.collect().await?.to_vec())?.parse()? + let sha: Sha = + String::from_utf8(output.body.collect().await?.to_vec())?.parse()?; + // If the full SHA isn't in our local Git clone, this S3 build references a + // commit that landed after our `git fetch`. Discard it so the cache stays + // consistent with the local Git mirror and isn't ahead of it. + let exists = self + .cache + .git() + .args(["cat-file", "-e", &sha.to_string()]) + .stderr(Stdio::null()) + .status()? + .success(); + exists.then_some(sha) } }; - callback(sha, prefix); + if let Some(sha) = sha { + callback(sha, prefix); + } } } Ok(()) @@ -1216,8 +1196,24 @@ async fn main() -> anyhow::Result<()> { let mut remote = Remote::new(cache).await; + // Fetch from Git before reading anything else so that every subsequent step + // (release detection, channel fetching, listing `master` commits) sees a single + // point-in-time view of Nixpkgs. Any commits S3 publishes after this point get + // discarded by `Remote::git_revisions` to keep the cache consistent with the local + // Git mirror. + let status = remote + .cache + .git() + .args(["fetch", "--no-show-forced-updates"]) + .status()?; + // Without the `--no-show-forced-updates` flag, Git spends a lot of time figuring out + // that all the updates to refs/pull/*/head and refs/pull/*/merge were forced. + if !status.success() { + bail!("failed to fetch from Git"); + } + remote.cache.releases = { - let releases = remote.releases().await?; + let releases = remote.releases()?; let mut lines = String::new(); for release in &releases { writeln!(&mut lines, "{release}")?; @@ -1233,20 +1229,6 @@ async fn main() -> anyhow::Result<()> { ) .await?; - // We fetch from Git after fetching from S3 so that, once we're done, all the commit - // hashes we got from S3 should also be in our local Git clone. - let status = remote - .cache - .git() - .args(["fetch", "--no-show-forced-updates"]) - .status()?; - // Without the `--no-show-forced-updates` flag, Git prints spends a lot of time figuring - // out that all the updates to refs/pull/*/head and refs/pull/*/merge were forced. - if !status.success() { - bail!("failed to fetch from Git"); - } - - // List `master` commits only after `git fetch` so that we don't miss any new ones. let output = remote .cache .git() From 983253708972e5197f3d6fe405919a66d18cc37d Mon Sep 17 00:00:00 2001 From: Sam Estep Date: Mon, 25 May 2026 10:43:19 -0400 Subject: [PATCH 2/6] Don't `git fetch` after `git clone` --- src/main.rs | 61 +++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/main.rs b/src/main.rs index a51bbf2..5b64efa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1169,26 +1169,43 @@ async fn main() -> anyhow::Result<()> { let last_fetched = now(); let cache = match cache_result { - Ok(mut cache) => { - cache.last_fetched = last_fetched; - cache - } - Err(PartialCache { dir, missing_git }) => { + Err(PartialCache { dir, missing_git }) if missing_git => { let cache = Cache { dir, last_fetched, releases: Vec::new(), }; - if missing_git { - let repo = "https://github.com/NixOS/nixpkgs.git"; - // We shouldn't need any trees or blobs, only history information. - let status = git() - .args(["clone", "--mirror", "--filter=tree:0", repo]) - .arg(cache.path(CacheKey::Git)) - .status()?; - if !status.success() { - bail!("failed to clone {repo}"); + let repo = "https://github.com/NixOS/nixpkgs.git"; + // Other than `.lib/version`, we don't need any trees or blobs. + let status = git() + .args(["clone", "--mirror", "--filter=tree:0", repo]) + .arg(cache.path(CacheKey::Git)) + .status()?; + if !status.success() { + bail!("failed to clone {repo}"); + } + cache + } + _ => { + let cache = match cache_result { + Ok(mut cache) => { + cache.last_fetched = last_fetched; + cache } + Err(PartialCache { dir, .. }) => Cache { + dir, + last_fetched, + releases: Vec::new(), + }, + }; + let status = cache + .git() + .args(["fetch", "--no-show-forced-updates"]) + .status()?; + // Without `--no-show-forced-updates`, Git spends a lot of time figuring out + // that all the updates to refs/pull/*/head and refs/pull/*/merge were forced. + if !status.success() { + bail!("failed to fetch from Git"); } cache } @@ -1196,22 +1213,6 @@ async fn main() -> anyhow::Result<()> { let mut remote = Remote::new(cache).await; - // Fetch from Git before reading anything else so that every subsequent step - // (release detection, channel fetching, listing `master` commits) sees a single - // point-in-time view of Nixpkgs. Any commits S3 publishes after this point get - // discarded by `Remote::git_revisions` to keep the cache consistent with the local - // Git mirror. - let status = remote - .cache - .git() - .args(["fetch", "--no-show-forced-updates"]) - .status()?; - // Without the `--no-show-forced-updates` flag, Git spends a lot of time figuring out - // that all the updates to refs/pull/*/head and refs/pull/*/merge were forced. - if !status.success() { - bail!("failed to fetch from Git"); - } - remote.cache.releases = { let releases = remote.releases()?; let mut lines = String::new(); From c0ec904038f8531ed5afb86319d99b637456c81a Mon Sep 17 00:00:00 2001 From: Sam Estep Date: Mon, 25 May 2026 10:49:36 -0400 Subject: [PATCH 3/6] Deduplicate Git command methods --- src/main.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5b64efa..34ea426 100644 --- a/src/main.rs +++ b/src/main.rs @@ -425,23 +425,20 @@ impl Cache { self.dir.join(key.name()) } - fn git(&self) -> Command { + fn git_allow_lazy_fetch(&self) -> Command { let mut cmd = git(); + cmd.arg("-C").arg(self.path(CacheKey::Git)); + cmd + } + + fn git(&self) -> Command { + let mut cmd = self.git_allow_lazy_fetch(); // Because we did a blobless clone, some commands that wouldn't normally need network access // might try to lazily fetch objects. We consider it a bug for subcommands other than // `fetch` to access the network (modulo `nix flake update` as used by the `checkout` and // `bisect` subcommands), so here we disallow that. Unfortunately this seems to cause Git to // hang rather than simply exiting with an error, but it's better than nothing. - cmd.args(["--no-lazy-fetch", "-C"]) - .arg(self.path(CacheKey::Git)); - cmd - } - - /// Like `git`, but with lazy fetch enabled. Only safe to call from the `fetch` subcommand, - /// which is the one place we allow Git to touch the network. - fn git_lazy(&self) -> Command { - let mut cmd = git(); - cmd.arg("-C").arg(self.path(CacheKey::Git)); + cmd.arg("--no-lazy-fetch"); cmd } @@ -604,7 +601,7 @@ impl Remote { } let output = self .cache - .git_lazy() + .git_allow_lazy_fetch() .args(["show", "master:lib/.version"]) .output()?; if !output.status.success() { From 894cb46378512570b14946697d0e901f92b72567 Mon Sep 17 00:00:00 2001 From: Sam Estep Date: Mon, 25 May 2026 11:05:50 -0400 Subject: [PATCH 4/6] Fix typo --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 34ea426..0ce5905 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1173,7 +1173,7 @@ async fn main() -> anyhow::Result<()> { releases: Vec::new(), }; let repo = "https://github.com/NixOS/nixpkgs.git"; - // Other than `.lib/version`, we don't need any trees or blobs. + // Other than `lib/.version`, we don't need any trees or blobs. let status = git() .args(["clone", "--mirror", "--filter=tree:0", repo]) .arg(cache.path(CacheKey::Git)) From 8a51f0a4cb0018c7784cdb8682e8d0ac973db3e7 Mon Sep 17 00:00:00 2001 From: Sam Estep Date: Mon, 25 May 2026 11:06:05 -0400 Subject: [PATCH 5/6] Clean up `Remote::releases` implementation --- src/main.rs | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/main.rs b/src/main.rs index 0ce5905..f857b7d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -572,43 +572,40 @@ impl Remote { } fn releases(&self) -> anyhow::Result> { - // Stable releases are derived from the `release-*` branches in Nixpkgs, and the current - // prerelease is the version in `lib/.version` on `master`. Both come from Git rather than - // S3 so that we can recognize a new release as soon as branch-off happens, without having - // to wait for the corresponding beta channel to publish its first build. - let oldest = Release { year: 16, month: 9 }; - let mut releases = BTreeSet::new(); - let output = self + let branches = self .cache .git() .args([ "for-each-ref", - "--format=%(refname:strip=2)", + "--format=%(refname)", "refs/heads/release-*", ]) .output()?; - if !output.status.success() { + if !branches.status.success() { bail!("failed to list Nixpkgs release branches"); } - let re = Regex::new(r"^release-(\d\d\.\d\d)$").unwrap(); - for line in String::from_utf8(output.stdout)?.lines() { - if let Some(caps) = re.captures(line) - && let Ok(release) = caps[1].parse::() - && release >= oldest - { + let mut releases = BTreeSet::new(); + let re = Regex::new(r"^refs/heads/release-(\d\d\.\d\d)$").unwrap(); + for line in String::from_utf8(branches.stdout)?.lines() { + let Some(caps) = re.captures(line) else { + bail!("unexpected release branch name {line}") + }; + let release = caps[1].parse().unwrap(); + // We omit earlier releases: the bucket has no `git-revision` objects for them. + let oldest = Release { year: 16, month: 9 }; + if release >= oldest { releases.insert(release); } } - let output = self + let prerelease = self .cache .git_allow_lazy_fetch() .args(["show", "master:lib/.version"]) .output()?; - if !output.status.success() { - bail!("failed to read lib/.version from Nixpkgs master"); + if !prerelease.status.success() { + bail!("failed to check Nixpkgs prerelease version"); } - let prerelease: Release = String::from_utf8(output.stdout)?.trim().parse()?; - releases.insert(prerelease); + releases.insert(String::from_utf8(prerelease.stdout)?.parse()?); Ok(releases.into_iter().collect()) } From 8ccdd200efe263544e42f17ea8b9d9aca0cf258a Mon Sep 17 00:00:00 2001 From: Sam Estep Date: Mon, 25 May 2026 11:16:08 -0400 Subject: [PATCH 6/6] Clean up `Remote::git_revisions` --- src/main.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/main.rs b/src/main.rs index f857b7d..4b3da9a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -629,28 +629,25 @@ impl Remote { let Some(prefix) = prefixes.pop_front() else { bail!("unexpected extra Git output"); }; - let sha = match line.parse() { - Ok(sha) => Some(sha), + match line.parse() { + Ok(sha) => callback(sha, prefix), Err(_) => { let key = format!("{prefix}git-revision"); let output = self.s3.get_object().bucket(BUCKET).key(key).send().await?; let sha: Sha = String::from_utf8(output.body.collect().await?.to_vec())?.parse()?; - // If the full SHA isn't in our local Git clone, this S3 build references a - // commit that landed after our `git fetch`. Discard it so the cache stays - // consistent with the local Git mirror and isn't ahead of it. - let exists = self + // Discard commits that are more recent than our `git fetch` since we won't + // be able to do everything we need to with them. + if self .cache .git() .args(["cat-file", "-e", &sha.to_string()]) - .stderr(Stdio::null()) .status()? - .success(); - exists.then_some(sha) + .success() + { + callback(sha, prefix); + } } - }; - if let Some(sha) = sha { - callback(sha, prefix); } } }