From 283bac3688112993771def397b170c3a27d41c50 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Mon, 11 May 2026 14:57:33 -0400 Subject: [PATCH 1/2] Copy opaque.rs for forked implementation of FileEncoder --- compiler/rustc_metadata/src/rmeta/opaque.rs | 460 ++++++++++++++++++++ 1 file changed, 460 insertions(+) create mode 100644 compiler/rustc_metadata/src/rmeta/opaque.rs diff --git a/compiler/rustc_metadata/src/rmeta/opaque.rs b/compiler/rustc_metadata/src/rmeta/opaque.rs new file mode 100644 index 0000000000000..4242642c6643f --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/opaque.rs @@ -0,0 +1,460 @@ +use std::fs::File; +use std::io::{self, Write}; +use std::marker::PhantomData; +use std::ops::Range; +use std::path::{Path, PathBuf}; + +// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. +// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 +use crate::int_overflow::DebugStrictAdd; +use crate::leb128; +use crate::serialize::{Decodable, Decoder, Encodable, Encoder}; + +pub mod mem_encoder; + +// ----------------------------------------------------------------------------- +// Encoder +// ----------------------------------------------------------------------------- + +pub type FileEncodeResult = Result; + +pub const MAGIC_END_BYTES: &[u8] = b"rust-end-file"; + +/// The size of the buffer in `FileEncoder`. +const BUF_SIZE: usize = 64 * 1024; + +/// `FileEncoder` encodes data to file via fixed-size buffer. +/// +/// There used to be a `MemEncoder` type that encoded all the data into a +/// `Vec`. `FileEncoder` is better because its memory use is determined by the +/// size of the buffer, rather than the full length of the encoded data, and +/// because it doesn't need to reallocate memory along the way. +pub struct FileEncoder { + // The input buffer. For adequate performance, we need to be able to write + // directly to the unwritten region of the buffer, without calling copy_from_slice. + // Note that our buffer is always initialized so that we can do that direct access + // without unsafe code. Users of this type write many more than BUF_SIZE bytes, so the + // initialization is approximately free. + buf: Box<[u8; BUF_SIZE]>, + buffered: usize, + flushed: usize, + file: File, + // This is used to implement delayed error handling, as described in the + // comment on `trait Encoder`. + res: Result<(), io::Error>, + path: PathBuf, + #[cfg(debug_assertions)] + finished: bool, +} + +impl FileEncoder { + pub fn new>(path: P) -> io::Result { + // File::create opens the file for writing only. When -Zmeta-stats is enabled, the metadata + // encoder rewinds the file to inspect what was written. So we need to always open the file + // for reading and writing. + let file = + File::options().read(true).write(true).create(true).truncate(true).open(&path)?; + + Ok(FileEncoder { + buf: vec![0u8; BUF_SIZE].into_boxed_slice().try_into().unwrap(), + path: path.as_ref().into(), + buffered: 0, + flushed: 0, + file, + res: Ok(()), + #[cfg(debug_assertions)] + finished: false, + }) + } + + #[inline] + pub fn position(&self) -> usize { + // Tracking position this way instead of having a `self.position` field + // means that we only need to update `self.buffered` on a write call, + // as opposed to updating `self.position` and `self.buffered`. + self.flushed.debug_strict_add(self.buffered) + } + + #[cold] + #[inline(never)] + pub fn flush(&mut self) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + if self.res.is_ok() { + self.res = self.file.write_all(&self.buf[..self.buffered]); + } + self.flushed += self.buffered; + self.buffered = 0; + } + + #[inline] + pub fn file(&self) -> &File { + &self.file + } + + #[inline] + pub fn path(&self) -> &Path { + &self.path + } + + #[inline] + fn buffer_empty(&mut self) -> &mut [u8] { + // SAFETY: self.buffered is inbounds as an invariant of the type + unsafe { self.buf.get_unchecked_mut(self.buffered..) } + } + + #[cold] + #[inline(never)] + fn write_all_cold_path(&mut self, buf: &[u8]) { + self.flush(); + if let Some(dest) = self.buf.get_mut(..buf.len()) { + dest.copy_from_slice(buf); + self.buffered += buf.len(); + } else { + if self.res.is_ok() { + self.res = self.file.write_all(buf); + } + self.flushed += buf.len(); + } + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + if let Some(dest) = self.buffer_empty().get_mut(..buf.len()) { + dest.copy_from_slice(buf); + self.buffered = self.buffered.debug_strict_add(buf.len()); + } else { + self.write_all_cold_path(buf); + } + } + + /// Write up to `N` bytes to this encoder. + /// + /// This function can be used to avoid the overhead of calling memcpy for writes that + /// have runtime-variable length, but are small and have a small fixed upper bound. + /// + /// This can be used to do in-place encoding as is done for leb128 (without this function + /// we would need to write to a temporary buffer then memcpy into the encoder), and it can + /// also be used to implement the varint scheme we use for rmeta and dep graph encoding, + /// where we only want to encode the first few bytes of an integer. Copying in the whole + /// integer then only advancing the encoder state for the few bytes we care about is more + /// efficient than calling [`FileEncoder::write_all`], because variable-size copies are + /// always lowered to `memcpy`, which has overhead and contains a lot of logic we can bypass + /// with this function. Note that common architectures support fixed-size writes up to 8 bytes + /// with one instruction, so while this does in some sense do wasted work, we come out ahead. + #[inline] + pub fn write_with(&mut self, visitor: impl FnOnce(&mut [u8; N]) -> usize) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + let flush_threshold = const { BUF_SIZE.checked_sub(N).unwrap() }; + if std::intrinsics::unlikely(self.buffered > flush_threshold) { + self.flush(); + } + // SAFETY: We checked above that N < self.buffer_empty().len(), + // and if isn't, flush ensures that our empty buffer is now BUF_SIZE. + // We produce a post-mono error if N > BUF_SIZE. + let buf = unsafe { self.buffer_empty().first_chunk_mut::().unwrap_unchecked() }; + let written = visitor(buf); + // We have to ensure that an errant visitor cannot cause self.buffered to exceed BUF_SIZE. + if written > N { + Self::panic_invalid_write::(written); + } + self.buffered = self.buffered.debug_strict_add(written); + } + + #[cold] + #[inline(never)] + fn panic_invalid_write(written: usize) { + panic!("FileEncoder::write_with::<{N}> cannot be used to write {written} bytes"); + } + + /// Helper for calls where [`FileEncoder::write_with`] always writes the whole array. + #[inline] + pub fn write_array(&mut self, buf: [u8; N]) { + self.write_with(|dest| { + *dest = buf; + N + }) + } + + pub fn finish(&mut self) -> FileEncodeResult { + self.write_all(MAGIC_END_BYTES); + self.flush(); + #[cfg(debug_assertions)] + { + self.finished = true; + } + match std::mem::replace(&mut self.res, Ok(())) { + Ok(()) => Ok(self.position()), + Err(e) => Err((self.path.clone(), e)), + } + } +} + +#[cfg(debug_assertions)] +impl Drop for FileEncoder { + fn drop(&mut self) { + if !std::thread::panicking() { + assert!(self.finished); + } + } +} + +macro_rules! write_leb128 { + ($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => { + #[inline] + fn $this_fn(&mut self, v: $int_ty) { + self.write_with(|buf| leb128::$write_leb_fn(buf, v)) + } + }; +} + +impl Encoder for FileEncoder { + write_leb128!(emit_usize, usize, write_usize_leb128); + write_leb128!(emit_u128, u128, write_u128_leb128); + write_leb128!(emit_u64, u64, write_u64_leb128); + write_leb128!(emit_u32, u32, write_u32_leb128); + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.write_array([v]); + } + + write_leb128!(emit_isize, isize, write_isize_leb128); + write_leb128!(emit_i128, i128, write_i128_leb128); + write_leb128!(emit_i64, i64, write_i64_leb128); + write_leb128!(emit_i32, i32, write_i32_leb128); + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.write_all(s); + } +} + +// ----------------------------------------------------------------------------- +// Decoder +// ----------------------------------------------------------------------------- + +// Conceptually, `MemDecoder` wraps a `&[u8]` with a cursor into it that is always valid. +// This is implemented with three pointers, two which represent the original slice and a +// third that is our cursor. +// It is an invariant of this type that start <= current <= end. +// Additionally, the implementation of this type never modifies start and end. +pub struct MemDecoder<'a> { + start: *const u8, + current: *const u8, + end: *const u8, + _marker: PhantomData<&'a u8>, +} + +impl<'a> MemDecoder<'a> { + #[inline] + pub fn new(data: &'a [u8], position: usize) -> Result, ()> { + let data = data.strip_suffix(MAGIC_END_BYTES).ok_or(())?; + let Range { start, end } = data.as_ptr_range(); + Ok(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }) + } + + #[inline] + pub fn split_at(&self, position: usize) -> MemDecoder<'a> { + assert!(position <= self.len()); + // SAFETY: We checked above that this offset is within the original slice + let current = unsafe { self.start.add(position) }; + MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData } + } + + #[inline] + pub fn len(&self) -> usize { + // SAFETY: This recovers the length of the original slice, only using members we never modify. + unsafe { self.end.offset_from_unsigned(self.start) } + } + + #[inline] + pub fn remaining(&self) -> usize { + // SAFETY: This type guarantees current <= end. + unsafe { self.end.offset_from_unsigned(self.current) } + } + + #[cold] + #[inline(never)] + fn decoder_exhausted() -> ! { + panic!("MemDecoder exhausted") + } + + #[inline] + pub fn read_array(&mut self) -> [u8; N] { + self.read_raw_bytes(N).try_into().unwrap() + } + + /// While we could manually expose manipulation of the decoder position, + /// all current users of that method would need to reset the position later, + /// incurring the bounds check of set_position twice. + #[inline] + pub fn with_position(&mut self, pos: usize, func: F) -> T + where + F: Fn(&mut MemDecoder<'a>) -> T, + { + struct SetOnDrop<'a, 'guarded> { + decoder: &'guarded mut MemDecoder<'a>, + current: *const u8, + } + impl Drop for SetOnDrop<'_, '_> { + fn drop(&mut self) { + self.decoder.current = self.current; + } + } + + if pos >= self.len() { + Self::decoder_exhausted(); + } + let previous = self.current; + // SAFETY: We just checked if this add is in-bounds above. + unsafe { + self.current = self.start.add(pos); + } + let guard = SetOnDrop { current: previous, decoder: self }; + func(guard.decoder) + } +} + +macro_rules! read_leb128 { + ($this_fn:ident, $int_ty:ty, $read_leb_fn:ident) => { + #[inline] + fn $this_fn(&mut self) -> $int_ty { + leb128::$read_leb_fn(self) + } + }; +} + +impl<'a> Decoder for MemDecoder<'a> { + read_leb128!(read_usize, usize, read_usize_leb128); + read_leb128!(read_u128, u128, read_u128_leb128); + read_leb128!(read_u64, u64, read_u64_leb128); + read_leb128!(read_u32, u32, read_u32_leb128); + + #[inline] + fn read_u16(&mut self) -> u16 { + u16::from_le_bytes(self.read_array()) + } + + #[inline] + fn read_u8(&mut self) -> u8 { + if self.current == self.end { + Self::decoder_exhausted(); + } + // SAFETY: This type guarantees current <= end, and we just checked current == end. + unsafe { + let byte = *self.current; + self.current = self.current.add(1); + byte + } + } + + read_leb128!(read_isize, isize, read_isize_leb128); + read_leb128!(read_i128, i128, read_i128_leb128); + read_leb128!(read_i64, i64, read_i64_leb128); + read_leb128!(read_i32, i32, read_i32_leb128); + + #[inline] + fn read_i16(&mut self) -> i16 { + i16::from_le_bytes(self.read_array()) + } + + #[inline] + fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] { + if bytes > self.remaining() { + Self::decoder_exhausted(); + } + // SAFETY: We just checked if this range is in-bounds above. + unsafe { + let slice = std::slice::from_raw_parts(self.current, bytes); + self.current = self.current.add(bytes); + slice + } + } + + #[inline] + fn peek_byte(&self) -> u8 { + if self.current == self.end { + Self::decoder_exhausted(); + } + // SAFETY: This type guarantees current is inbounds or one-past-the-end, which is end. + // Since we just checked current == end, the current pointer must be inbounds. + unsafe { *self.current } + } + + #[inline] + fn position(&self) -> usize { + // SAFETY: This type guarantees start <= current + unsafe { self.current.offset_from_unsigned(self.start) } + } +} + +// Specializations for contiguous byte sequences follow. The default implementations for slices +// encode and decode each element individually. This isn't necessary for `u8` slices when using +// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding. +// Therefore, we can use more efficient implementations that process the entire sequence at once. + +// Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., +// since the default implementations call `encode` on their slices internally. +impl Encodable for [u8] { + fn encode(&self, e: &mut FileEncoder) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + +// Specialize decoding `Vec`. This specialization also applies to decoding `Box<[u8]>`s, etc., +// since the default implementations call `decode` to produce a `Vec` internally. +impl<'a> Decodable> for Vec { + fn decode(d: &mut MemDecoder<'a>) -> Self { + let len = Decoder::read_usize(d); + d.read_raw_bytes(len).to_owned() + } +} + +/// An integer that will always encode to 8 bytes. +pub struct IntEncodedWithFixedSize(pub u64); + +impl IntEncodedWithFixedSize { + pub const ENCODED_SIZE: usize = 8; +} + +impl Encodable for IntEncodedWithFixedSize { + #[inline] + fn encode(&self, e: &mut FileEncoder) { + let start_pos = e.position(); + e.write_array(self.0.to_le_bytes()); + let end_pos = e.position(); + debug_assert_eq!((end_pos - start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + } +} + +impl<'a> Decodable> for IntEncodedWithFixedSize { + #[inline] + fn decode(decoder: &mut MemDecoder<'a>) -> IntEncodedWithFixedSize { + let bytes = decoder.read_array::<{ IntEncodedWithFixedSize::ENCODED_SIZE }>(); + IntEncodedWithFixedSize(u64::from_le_bytes(bytes)) + } +} + +#[cfg(test)] +mod tests; From a5ada4ff23511ed421b3cc4add1319463c4a0240 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Mon, 11 May 2026 15:31:38 -0400 Subject: [PATCH 2/2] Convert crate hash to use metadata instead of HIR --- compiler/rustc_ast_lowering/src/lib.rs | 26 +- compiler/rustc_driver_impl/src/lib.rs | 8 +- compiler/rustc_hir/src/hir.rs | 13 + compiler/rustc_interface/src/passes.rs | 9 +- compiler/rustc_metadata/src/lib.rs | 1 + compiler/rustc_metadata/src/rmeta/encoder.rs | 81 ++++-- compiler/rustc_metadata/src/rmeta/mod.rs | 4 +- compiler/rustc_metadata/src/rmeta/opaque.rs | 262 +++--------------- compiler/rustc_metadata/src/rmeta/table.rs | 2 +- compiler/rustc_middle/src/hir/map.rs | 98 ++----- compiler/rustc_session/src/cstore.rs | 4 + .../foo.rs | 16 ++ .../rmake.rs | 41 +++ .../v1.rs | 12 + .../v2.rs | 13 + .../changing_macro.rs | 15 + .../proc-macro-env-changes-crate-hash/foo.rs | 9 + .../rmake.rs | 54 ++++ .../changing_macro.rs | 18 ++ .../foo.rs | 9 + .../rmake.rs | 62 +++++ 21 files changed, 410 insertions(+), 347 deletions(-) create mode 100644 tests/run-make/proc-macro-dep-source-changes-crate-hash/foo.rs create mode 100644 tests/run-make/proc-macro-dep-source-changes-crate-hash/rmake.rs create mode 100644 tests/run-make/proc-macro-dep-source-changes-crate-hash/v1.rs create mode 100644 tests/run-make/proc-macro-dep-source-changes-crate-hash/v2.rs create mode 100644 tests/run-make/proc-macro-env-changes-crate-hash/changing_macro.rs create mode 100644 tests/run-make/proc-macro-env-changes-crate-hash/foo.rs create mode 100644 tests/run-make/proc-macro-env-changes-crate-hash/rmake.rs create mode 100644 tests/run-make/proc-macro-global-asm-changes-crate-hash/changing_macro.rs create mode 100644 tests/run-make/proc-macro-global-asm-changes-crate-hash/foo.rs create mode 100644 tests/run-make/proc-macro-global-asm-changes-crate-hash/rmake.rs diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs index 71ce3d3b585f6..fbe293c1a851d 100644 --- a/compiler/rustc_ast_lowering/src/lib.rs +++ b/compiler/rustc_ast_lowering/src/lib.rs @@ -45,7 +45,6 @@ use rustc_attr_parsing::{AttributeParser, OmitDoc, Recovery, ShouldEmit}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::sorted_map::SortedMap; -use rustc_data_structures::stable_hash::{StableHash, StableHasher}; use rustc_data_structures::steal::Steal; use rustc_data_structures::tagged_ptr::TaggedRef; use rustc_errors::{DiagArgFromDisplay, DiagCtxtHandle}; @@ -506,25 +505,15 @@ fn index_crate<'a, 'b>( /// Compute the hash for the HIR of the full crate. /// This hash will then be part of the crate_hash which is stored in the metadata. -fn compute_hir_hash( - tcx: TyCtxt<'_>, - owners: &IndexSlice>, -) -> Fingerprint { - let mut hir_body_nodes: Vec<_> = owners +fn compute_hir_hash(owners: &IndexSlice>) -> Fingerprint { + owners .iter_enumerated() - .filter_map(|(def_id, info)| { + .filter_map(|(_, info)| { let info = info.as_owner()?; - let def_path_hash = tcx.hir_def_path_hash(def_id); - Some((def_path_hash, info)) + Some(info.fingerprint()) }) - .collect(); - hir_body_nodes.sort_unstable_by_key(|bn| bn.0); - - tcx.with_stable_hashing_context(|mut hcx| { - let mut stable_hasher = StableHasher::new(); - hir_body_nodes.stable_hash(&mut hcx, &mut stable_hasher); - stable_hasher.finish() - }) + .reduce(Fingerprint::combine_commutative) + .expect("HIR hash requested without any content") } pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { @@ -561,8 +550,7 @@ pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { } // Don't hash unless necessary, because it's expensive. - let opt_hir_hash = - if tcx.needs_hir_hash() { Some(compute_hir_hash(tcx, &owners)) } else { None }; + let opt_hir_hash = if tcx.needs_hir_hash() { Some(compute_hir_hash(&owners)) } else { None }; let delayed_resolver = Steal::new((resolver, krate)); mid_hir::Crate::new(owners, delayed_ids, delayed_resolver, opt_hir_hash) diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index aaac65721dfab..3e1d095acfcc0 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -324,10 +324,6 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send)) tcx.ensure_ok().analysis(()); - if let Some(metrics_dir) = &sess.opts.unstable_opts.metrics_dir { - dump_feature_usage_metrics(tcx, metrics_dir); - } - if callbacks.after_analysis(compiler, tcx) == Compilation::Stop { return early_exit(); } @@ -340,6 +336,10 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send)) let linker = Linker::codegen_and_build_linker(tcx, &*compiler.codegen_backend); + if let Some(metrics_dir) = &sess.opts.unstable_opts.metrics_dir { + dump_feature_usage_metrics(tcx, metrics_dir); + } + tcx.report_unused_features(); Some(linker) diff --git a/compiler/rustc_hir/src/hir.rs b/compiler/rustc_hir/src/hir.rs index 2211e717a8b50..aac5c563e507d 100644 --- a/compiler/rustc_hir/src/hir.rs +++ b/compiler/rustc_hir/src/hir.rs @@ -1659,6 +1659,19 @@ impl<'tcx> OwnerInfo<'tcx> { pub fn node(&self) -> OwnerNode<'tcx> { self.nodes.node() } + + // A fingerprint that identifies the contents of the OwnerInfo. + // It only depends on `nodes` and `attrs` because `parenting` and `trait_map` are + // deterministically calculated from `nodes` and `attrs`. + #[inline] + pub fn fingerprint(&self) -> Fingerprint { + let body = self + .nodes + .opt_hash_including_bodies + .expect("HIR hash requested without needs_hir_hash"); + let attrs = self.attrs.opt_hash.expect("HIR hash requested without needs_hir_hash"); + body.combine(attrs) + } } #[derive(Copy, Clone, Debug, StableHash)] diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index bcd1a52ce9dcd..626a48e96bac2 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -948,8 +948,13 @@ pub fn create_and_enter_global_ctxt FnOnce(TyCtxt<'tcx>) -> T>( let definitions = FreezeLock::new(Definitions::new(stable_crate_id)); let stable_crate_ids = FreezeLock::new(StableCrateIdMap::default()); - let untracked = - Untracked { cstore, source_span: AppendOnlyIndexVec::new(), definitions, stable_crate_ids }; + let untracked = Untracked { + cstore, + source_span: AppendOnlyIndexVec::new(), + definitions, + stable_crate_ids, + local_crate_hash: OnceLock::new(), + }; // We're constructing the HIR here; we don't care what we will // read, since we haven't even constructed the *input* to diff --git a/compiler/rustc_metadata/src/lib.rs b/compiler/rustc_metadata/src/lib.rs index 1dff5740ab3bc..f76e975620d99 100644 --- a/compiler/rustc_metadata/src/lib.rs +++ b/compiler/rustc_metadata/src/lib.rs @@ -1,5 +1,6 @@ // tidy-alphabetical-start #![allow(internal_features)] +#![feature(core_intrinsics)] #![feature(error_iter)] #![feature(file_buffered)] #![feature(gen_blocks)] diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index a0db004b7f4c4..33f59d0764c0b 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2,11 +2,14 @@ use std::borrow::Borrow; use std::collections::hash_map::Entry; use std::fs::File; use std::io::{Read, Seek, Write}; +use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::Arc; use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; use rustc_data_structures::memmap::{Mmap, MmapMut}; +use rustc_data_structures::owned_slice::slice_owned; +use rustc_data_structures::stable_hash::{StableHash, StableHasher}; use rustc_data_structures::sync::{par_for_each_in, par_join}; use rustc_data_structures::temp_dir::MaybeTempDir; use rustc_data_structures::thousands::usize_with_underscores; @@ -25,7 +28,7 @@ use rustc_middle::ty::AssocContainer; use rustc_middle::ty::codec::TyEncoder; use rustc_middle::ty::fast_reject::{self, TreatParams}; use rustc_middle::{bug, span_bug}; -use rustc_serialize::{Decodable, Decoder, Encodable, Encoder, opaque}; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use rustc_session::config::{CrateType, OptLevel, TargetModifier}; use rustc_span::hygiene::HygieneEncodeContext; @@ -40,7 +43,7 @@ use crate::errors::{FailCreateFileEncoder, FailWriteFile}; use crate::rmeta::*; pub(super) struct EncodeContext<'a, 'tcx> { - opaque: opaque::FileEncoder, + opaque: opaque::FileEncoder<'a>, tcx: TyCtxt<'tcx>, feat: &'tcx rustc_feature::Features, tables: TableBuilders, @@ -718,13 +721,16 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let denied_partial_mitigations = stat!("denied-partial-mitigations", || self .encode_enabled_denied_partial_mitigations()); + let hash = Svh::new(self.opaque.hash()); + tcx.untracked().local_crate_hash.set(hash).expect("local_crate_hash set twice"); + let root = stat!("final", || { let attrs = tcx.hir_krate_attrs(); self.lazy(CrateRoot { header: CrateHeader { name: tcx.crate_name(LOCAL_CRATE), triple: tcx.sess.opts.target_triple.clone(), - hash: tcx.crate_hash(LOCAL_CRATE), + hash, is_proc_macro_crate: proc_macro_data.is_some(), is_stub: false, }, @@ -2428,22 +2434,6 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { // there's no need to do dep-graph tracking for any of it. tcx.dep_graph.assert_ignored(); - // Generate the metadata stub manually, as that is a small file compared to full metadata. - if let Some(ref_path) = ref_path { - let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); - - with_encode_metadata_header(tcx, ref_path, |ecx| { - let header: LazyValue = ecx.lazy(CrateHeader { - name: tcx.crate_name(LOCAL_CRATE), - triple: tcx.sess.opts.target_triple.clone(), - hash: tcx.crate_hash(LOCAL_CRATE), - is_proc_macro_crate: false, - is_stub: true, - }); - header.position.get() - }) - } - let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata"); let dep_node = tcx.metadata_dep_node(); @@ -2462,6 +2452,31 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { Ok(_) => {} Err(err) => tcx.dcx().emit_fatal(FailCreateFileEncoder { err }), }; + + // Read the SVH from the old metadata header. + let file = std::fs::File::open(&source_file).unwrap(); + let mmap = unsafe { Mmap::map(file) }.unwrap(); + let owned = slice_owned(mmap, Deref::deref); + let blob = MetadataBlob::new(owned); + let header = blob.expect("file already created").get_header(); + tcx.untracked().local_crate_hash.set(header.hash).expect("local_crate_hash set twice"); + + // Generate the metadata stub manually, as that is a small file compared to full metadata. + if let Some(ref_path) = ref_path { + let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); + + with_encode_metadata_header(tcx, ref_path, |ecx| { + let header: LazyValue = ecx.lazy(CrateHeader { + name: tcx.crate_name(LOCAL_CRATE), + triple: tcx.sess.opts.target_triple.clone(), + hash: tcx.crate_hash(LOCAL_CRATE), + is_proc_macro_crate: false, + is_stub: true, + }); + header.position.get() + }) + } + return; }; @@ -2503,6 +2518,22 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { }, None, ); + + // Generate the metadata stub manually, as that is a small file compared to full metadata. + if let Some(ref_path) = ref_path { + let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); + + with_encode_metadata_header(tcx, ref_path, |ecx| { + let header: LazyValue = ecx.lazy(CrateHeader { + name: tcx.crate_name(LOCAL_CRATE), + triple: tcx.sess.opts.target_triple.clone(), + hash: tcx.crate_hash(LOCAL_CRATE), + is_proc_macro_crate: false, + is_stub: true, + }); + header.position.get() + }) + } } fn with_encode_metadata_header( @@ -2510,7 +2541,17 @@ fn with_encode_metadata_header( path: &Path, f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize, ) { - let mut encoder = opaque::FileEncoder::new(path) + let mut stable_hasher = StableHasher::new(); + let krate = tcx.hir_crate(()); + let hir_body_hash = krate.opt_hir_hash.expect("HIR hash missing while computing crate hash"); + tcx.with_stable_hashing_context(|mut hcx| { + // Add dep_tracking_hash to ensure the SVH changes when any tracked flag changes. + tcx.sess.opts.dep_tracking_hash(true).stable_hash(&mut hcx, &mut stable_hasher); + // Add HIR hash for untracked elements, e.g. DefKind::GlobalAsm. + hir_body_hash.stable_hash(&mut hcx, &mut stable_hasher); + }); + + let mut encoder = opaque::FileEncoder::new(path, &mut stable_hasher) .unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err })); encoder.emit_raw_bytes(METADATA_HEADER); diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index a3645a5556bf3..003eae359b4f1 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -35,7 +35,6 @@ use rustc_middle::mir::ConstValue; use rustc_middle::ty::fast_reject::SimplifiedType; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::util::Providers; -use rustc_serialize::opaque::FileEncoder; use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use rustc_session::config::{SymbolManglingVersion, TargetModifier}; use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib}; @@ -50,6 +49,7 @@ use crate::eii::EiiMapEncodedKeyValue; mod decoder; mod def_path_hash_map; mod encoder; +mod opaque; mod parameterized; mod table; @@ -364,7 +364,7 @@ macro_rules! define_tables { } impl TableBuilders { - fn encode(&self, buf: &mut FileEncoder) -> LazyTables { + fn encode(&self, buf: &mut opaque::FileEncoder<'_>) -> LazyTables { LazyTables { $($name1: self.$name1.encode(buf),)+ $($name2: self.$name2.encode(buf),)+ diff --git a/compiler/rustc_metadata/src/rmeta/opaque.rs b/compiler/rustc_metadata/src/rmeta/opaque.rs index 4242642c6643f..c946f5750aa0d 100644 --- a/compiler/rustc_metadata/src/rmeta/opaque.rs +++ b/compiler/rustc_metadata/src/rmeta/opaque.rs @@ -1,24 +1,20 @@ use std::fs::File; +use std::hash::Hasher; use std::io::{self, Write}; -use std::marker::PhantomData; -use std::ops::Range; use std::path::{Path, PathBuf}; -// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. -// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 -use crate::int_overflow::DebugStrictAdd; -use crate::leb128; -use crate::serialize::{Decodable, Decoder, Encodable, Encoder}; - -pub mod mem_encoder; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hash::StableHasher; +use rustc_serialize::int_overflow::DebugStrictAdd; +use rustc_serialize::{Encodable, Encoder, leb128}; // ----------------------------------------------------------------------------- // Encoder // ----------------------------------------------------------------------------- -pub type FileEncodeResult = Result; +pub(super) type FileEncodeResult = Result; -pub const MAGIC_END_BYTES: &[u8] = b"rust-end-file"; +pub(super) const MAGIC_END_BYTES: &[u8] = b"rust-end-file"; /// The size of the buffer in `FileEncoder`. const BUF_SIZE: usize = 64 * 1024; @@ -29,7 +25,7 @@ const BUF_SIZE: usize = 64 * 1024; /// `Vec`. `FileEncoder` is better because its memory use is determined by the /// size of the buffer, rather than the full length of the encoded data, and /// because it doesn't need to reallocate memory along the way. -pub struct FileEncoder { +pub(super) struct FileEncoder<'a> { // The input buffer. For adequate performance, we need to be able to write // directly to the unwritten region of the buffer, without calling copy_from_slice. // Note that our buffer is always initialized so that we can do that direct access @@ -43,12 +39,16 @@ pub struct FileEncoder { // comment on `trait Encoder`. res: Result<(), io::Error>, path: PathBuf, + stable_hasher: &'a mut StableHasher, #[cfg(debug_assertions)] finished: bool, } -impl FileEncoder { - pub fn new>(path: P) -> io::Result { +impl<'a> FileEncoder<'a> { + pub(super) fn new>( + path: P, + stable_hasher: &'a mut StableHasher, + ) -> io::Result { // File::create opens the file for writing only. When -Zmeta-stats is enabled, the metadata // encoder rewinds the file to inspect what was written. So we need to always open the file // for reading and writing. @@ -62,13 +62,14 @@ impl FileEncoder { flushed: 0, file, res: Ok(()), + stable_hasher, #[cfg(debug_assertions)] finished: false, }) } #[inline] - pub fn position(&self) -> usize { + pub(super) fn position(&self) -> usize { // Tracking position this way instead of having a `self.position` field // means that we only need to update `self.buffered` on a write call, // as opposed to updating `self.position` and `self.buffered`. @@ -77,7 +78,7 @@ impl FileEncoder { #[cold] #[inline(never)] - pub fn flush(&mut self) { + pub(super) fn flush(&mut self) { #[cfg(debug_assertions)] { self.finished = false; @@ -86,16 +87,17 @@ impl FileEncoder { self.res = self.file.write_all(&self.buf[..self.buffered]); } self.flushed += self.buffered; + self.stable_hasher.write(&self.buf[..self.buffered]); self.buffered = 0; } #[inline] - pub fn file(&self) -> &File { + pub(super) fn file(&self) -> &File { &self.file } #[inline] - pub fn path(&self) -> &Path { + pub(super) fn path(&self) -> &Path { &self.path } @@ -114,6 +116,7 @@ impl FileEncoder { self.buffered += buf.len(); } else { if self.res.is_ok() { + self.stable_hasher.write(buf); self.res = self.file.write_all(buf); } self.flushed += buf.len(); @@ -149,7 +152,10 @@ impl FileEncoder { /// with this function. Note that common architectures support fixed-size writes up to 8 bytes /// with one instruction, so while this does in some sense do wasted work, we come out ahead. #[inline] - pub fn write_with(&mut self, visitor: impl FnOnce(&mut [u8; N]) -> usize) { + pub(super) fn write_with( + &mut self, + visitor: impl FnOnce(&mut [u8; N]) -> usize, + ) { #[cfg(debug_assertions)] { self.finished = false; @@ -178,14 +184,14 @@ impl FileEncoder { /// Helper for calls where [`FileEncoder::write_with`] always writes the whole array. #[inline] - pub fn write_array(&mut self, buf: [u8; N]) { + pub(super) fn write_array(&mut self, buf: [u8; N]) { self.write_with(|dest| { *dest = buf; N }) } - pub fn finish(&mut self) -> FileEncodeResult { + pub(super) fn finish(&mut self) -> FileEncodeResult { self.write_all(MAGIC_END_BYTES); self.flush(); #[cfg(debug_assertions)] @@ -197,10 +203,15 @@ impl FileEncoder { Err(e) => Err((self.path.clone(), e)), } } + + pub(super) fn hash(&mut self) -> Fingerprint { + self.flush(); + self.stable_hasher.clone().finish() + } } #[cfg(debug_assertions)] -impl Drop for FileEncoder { +impl<'a> Drop for FileEncoder<'a> { fn drop(&mut self) { if !std::thread::panicking() { assert!(self.finished); @@ -217,7 +228,7 @@ macro_rules! write_leb128 { }; } -impl Encoder for FileEncoder { +impl Encoder for FileEncoder<'_> { write_leb128!(emit_usize, usize, write_usize_leb128); write_leb128!(emit_u128, u128, write_u128_leb128); write_leb128!(emit_u64, u64, write_u64_leb128); @@ -249,212 +260,11 @@ impl Encoder for FileEncoder { } } -// ----------------------------------------------------------------------------- -// Decoder -// ----------------------------------------------------------------------------- - -// Conceptually, `MemDecoder` wraps a `&[u8]` with a cursor into it that is always valid. -// This is implemented with three pointers, two which represent the original slice and a -// third that is our cursor. -// It is an invariant of this type that start <= current <= end. -// Additionally, the implementation of this type never modifies start and end. -pub struct MemDecoder<'a> { - start: *const u8, - current: *const u8, - end: *const u8, - _marker: PhantomData<&'a u8>, -} - -impl<'a> MemDecoder<'a> { - #[inline] - pub fn new(data: &'a [u8], position: usize) -> Result, ()> { - let data = data.strip_suffix(MAGIC_END_BYTES).ok_or(())?; - let Range { start, end } = data.as_ptr_range(); - Ok(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }) - } - - #[inline] - pub fn split_at(&self, position: usize) -> MemDecoder<'a> { - assert!(position <= self.len()); - // SAFETY: We checked above that this offset is within the original slice - let current = unsafe { self.start.add(position) }; - MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData } - } - - #[inline] - pub fn len(&self) -> usize { - // SAFETY: This recovers the length of the original slice, only using members we never modify. - unsafe { self.end.offset_from_unsigned(self.start) } - } - - #[inline] - pub fn remaining(&self) -> usize { - // SAFETY: This type guarantees current <= end. - unsafe { self.end.offset_from_unsigned(self.current) } - } - - #[cold] - #[inline(never)] - fn decoder_exhausted() -> ! { - panic!("MemDecoder exhausted") - } - - #[inline] - pub fn read_array(&mut self) -> [u8; N] { - self.read_raw_bytes(N).try_into().unwrap() - } - - /// While we could manually expose manipulation of the decoder position, - /// all current users of that method would need to reset the position later, - /// incurring the bounds check of set_position twice. - #[inline] - pub fn with_position(&mut self, pos: usize, func: F) -> T - where - F: Fn(&mut MemDecoder<'a>) -> T, - { - struct SetOnDrop<'a, 'guarded> { - decoder: &'guarded mut MemDecoder<'a>, - current: *const u8, - } - impl Drop for SetOnDrop<'_, '_> { - fn drop(&mut self) { - self.decoder.current = self.current; - } - } - - if pos >= self.len() { - Self::decoder_exhausted(); - } - let previous = self.current; - // SAFETY: We just checked if this add is in-bounds above. - unsafe { - self.current = self.start.add(pos); - } - let guard = SetOnDrop { current: previous, decoder: self }; - func(guard.decoder) - } -} - -macro_rules! read_leb128 { - ($this_fn:ident, $int_ty:ty, $read_leb_fn:ident) => { - #[inline] - fn $this_fn(&mut self) -> $int_ty { - leb128::$read_leb_fn(self) - } - }; -} - -impl<'a> Decoder for MemDecoder<'a> { - read_leb128!(read_usize, usize, read_usize_leb128); - read_leb128!(read_u128, u128, read_u128_leb128); - read_leb128!(read_u64, u64, read_u64_leb128); - read_leb128!(read_u32, u32, read_u32_leb128); - - #[inline] - fn read_u16(&mut self) -> u16 { - u16::from_le_bytes(self.read_array()) - } - - #[inline] - fn read_u8(&mut self) -> u8 { - if self.current == self.end { - Self::decoder_exhausted(); - } - // SAFETY: This type guarantees current <= end, and we just checked current == end. - unsafe { - let byte = *self.current; - self.current = self.current.add(1); - byte - } - } - - read_leb128!(read_isize, isize, read_isize_leb128); - read_leb128!(read_i128, i128, read_i128_leb128); - read_leb128!(read_i64, i64, read_i64_leb128); - read_leb128!(read_i32, i32, read_i32_leb128); - - #[inline] - fn read_i16(&mut self) -> i16 { - i16::from_le_bytes(self.read_array()) - } - - #[inline] - fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] { - if bytes > self.remaining() { - Self::decoder_exhausted(); - } - // SAFETY: We just checked if this range is in-bounds above. - unsafe { - let slice = std::slice::from_raw_parts(self.current, bytes); - self.current = self.current.add(bytes); - slice - } - } - - #[inline] - fn peek_byte(&self) -> u8 { - if self.current == self.end { - Self::decoder_exhausted(); - } - // SAFETY: This type guarantees current is inbounds or one-past-the-end, which is end. - // Since we just checked current == end, the current pointer must be inbounds. - unsafe { *self.current } - } - - #[inline] - fn position(&self) -> usize { - // SAFETY: This type guarantees start <= current - unsafe { self.current.offset_from_unsigned(self.start) } - } -} - -// Specializations for contiguous byte sequences follow. The default implementations for slices -// encode and decode each element individually. This isn't necessary for `u8` slices when using -// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding. -// Therefore, we can use more efficient implementations that process the entire sequence at once. - // Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., // since the default implementations call `encode` on their slices internally. -impl Encodable for [u8] { - fn encode(&self, e: &mut FileEncoder) { +impl Encodable> for [u8] { + fn encode(&self, e: &mut FileEncoder<'_>) { Encoder::emit_usize(e, self.len()); e.emit_raw_bytes(self); } } - -// Specialize decoding `Vec`. This specialization also applies to decoding `Box<[u8]>`s, etc., -// since the default implementations call `decode` to produce a `Vec` internally. -impl<'a> Decodable> for Vec { - fn decode(d: &mut MemDecoder<'a>) -> Self { - let len = Decoder::read_usize(d); - d.read_raw_bytes(len).to_owned() - } -} - -/// An integer that will always encode to 8 bytes. -pub struct IntEncodedWithFixedSize(pub u64); - -impl IntEncodedWithFixedSize { - pub const ENCODED_SIZE: usize = 8; -} - -impl Encodable for IntEncodedWithFixedSize { - #[inline] - fn encode(&self, e: &mut FileEncoder) { - let start_pos = e.position(); - e.write_array(self.0.to_le_bytes()); - let end_pos = e.position(); - debug_assert_eq!((end_pos - start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); - } -} - -impl<'a> Decodable> for IntEncodedWithFixedSize { - #[inline] - fn decode(decoder: &mut MemDecoder<'a>) -> IntEncodedWithFixedSize { - let bytes = decoder.read_array::<{ IntEncodedWithFixedSize::ENCODED_SIZE }>(); - IntEncodedWithFixedSize(u64::from_le_bytes(bytes)) - } -} - -#[cfg(test)] -mod tests; diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 26c5908563777..2172979cc77be 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -486,7 +486,7 @@ impl> TableBui } } - pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable { + pub(crate) fn encode(&self, buf: &mut opaque::FileEncoder<'_>) -> LazyTable { let pos = buf.position(); let width = self.width; diff --git a/compiler/rustc_middle/src/hir/map.rs b/compiler/rustc_middle/src/hir/map.rs index e914489acadff..d6394983f2bd7 100644 --- a/compiler/rustc_middle/src/hir/map.rs +++ b/compiler/rustc_middle/src/hir/map.rs @@ -9,16 +9,16 @@ use rustc_data_structures::stable_hash::{StableHash, StableHasher}; use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{DynSend, DynSync, par_for_each_in, spawn, try_par_for_each_in}; use rustc_hir::def::{DefKind, Res}; -use rustc_hir::def_id::{DefId, LOCAL_CRATE, LocalDefId, LocalModDefId}; +use rustc_hir::def_id::{DefId, LocalDefId, LocalModDefId}; use rustc_hir::definitions::{DefKey, DefPath, DefPathHash}; use rustc_hir::intravisit::Visitor; use rustc_hir::*; use rustc_hir_pretty as pprust_hir; use rustc_span::def_id::StableCrateId; -use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw, with_metavar_spans}; +use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw}; +use crate::hir::def_id::LOCAL_CRATE; use crate::hir::{ModuleItems, nested_filter}; -use crate::middle::debugger_visualizer::DebuggerVisualizerFile; use crate::query::LocalCrate; use crate::ty::TyCtxt; @@ -1122,78 +1122,30 @@ impl<'tcx> pprust_hir::PpAnn for TyCtxt<'tcx> { } pub(super) fn crate_hash(tcx: TyCtxt<'_>, _: LocalCrate) -> Svh { - let krate = tcx.hir_crate(()); - let hir_body_hash = krate.opt_hir_hash.expect("HIR hash missing while computing crate hash"); - - let upstream_crates = upstream_crates(tcx); - - let resolutions = tcx.resolutions(()); - - // We hash the final, remapped names of all local source files so we - // don't have to include the path prefix remapping commandline args. - // If we included the full mapping in the SVH, we could only have - // reproducible builds by compiling from the same directory. So we just - // hash the result of the mapping instead of the mapping itself. - let mut source_file_names: Vec<_> = tcx - .sess - .source_map() - .files() - .iter() - .filter(|source_file| source_file.cnum == LOCAL_CRATE) - .map(|source_file| source_file.stable_id) - .collect(); - - source_file_names.sort_unstable(); - - // We have to take care of debugger visualizers explicitly. The HIR (and - // thus `hir_body_hash`) contains the #[debugger_visualizer] attributes but - // these attributes only store the file path to the visualizer file, not - // their content. Yet that content is exported into crate metadata, so any - // changes to it need to be reflected in the crate hash. - let debugger_visualizers: Vec<_> = tcx - .debugger_visualizers(LOCAL_CRATE) - .iter() - // We ignore the path to the visualizer file since it's not going to be - // encoded in crate metadata and we already hash the full contents of - // the file. - .map(DebuggerVisualizerFile::path_erased) - .collect(); - - let crate_hash: Fingerprint = tcx.with_stable_hashing_context(|mut hcx| { - let mut stable_hasher = StableHasher::new(); - hir_body_hash.stable_hash(&mut hcx, &mut stable_hasher); - upstream_crates.stable_hash(&mut hcx, &mut stable_hasher); - source_file_names.stable_hash(&mut hcx, &mut stable_hasher); - debugger_visualizers.stable_hash(&mut hcx, &mut stable_hasher); - if tcx.sess.opts.incremental.is_some() { - let definitions = tcx.untracked().definitions.freeze(); - let mut owner_spans: Vec<_> = tcx - .hir_crate_items(()) - .definitions() - .map(|def_id| { - let def_path_hash = definitions.def_path_hash(def_id); - let span = tcx.source_span(def_id); - debug_assert_eq!(span.parent(), None); - (def_path_hash, span) - }) - .collect(); - owner_spans.sort_unstable_by_key(|bn| bn.0); - owner_spans.stable_hash(&mut hcx, &mut stable_hasher); - } - tcx.sess.opts.dep_tracking_hash(true).stable_hash(&mut hcx, &mut stable_hasher); - tcx.stable_crate_id(LOCAL_CRATE).stable_hash(&mut hcx, &mut stable_hasher); - // Hash visibility information since it does not appear in HIR. - // FIXME: Figure out how to remove `visibilities_for_hashing` by hashing visibilities on - // the fly in the resolver, storing only their accumulated hash in `ResolverGlobalCtxt`, - // and combining it with other hashes here. - resolutions.visibilities_for_hashing.stable_hash(&mut hcx, &mut stable_hasher); - with_metavar_spans(|mspans| { - mspans.freeze_and_get_read_spans().stable_hash(&mut hcx, &mut stable_hasher); + if tcx.needs_metadata() { + *tcx.untracked() + .local_crate_hash + .get() + .expect("crate_hash(LOCAL_CRATE) called before metadata encoding") + } else { + let krate = tcx.hir_crate(()); + let hir_body_hash = + krate.opt_hir_hash.expect("HIR hash missing while computing crate hash"); + + let upstream_crates = upstream_crates(tcx); + + let crate_hash: Fingerprint = tcx.with_stable_hashing_context(|mut hcx| { + let mut stable_hasher = StableHasher::new(); + hir_body_hash.stable_hash(&mut hcx, &mut stable_hasher); + upstream_crates.stable_hash(&mut hcx, &mut stable_hasher); + tcx.sess.opts.dep_tracking_hash(true).stable_hash(&mut hcx, &mut stable_hasher); + tcx.stable_crate_id(LOCAL_CRATE).stable_hash(&mut hcx, &mut stable_hasher); + + stable_hasher.finish() }); - stable_hasher.finish() - }); - Svh::new(crate_hash) + Svh::new(crate_hash) + } } fn upstream_crates(tcx: TyCtxt<'_>) -> Vec<(StableCrateId, Svh)> { diff --git a/compiler/rustc_session/src/cstore.rs b/compiler/rustc_session/src/cstore.rs index 39fe9c80923ec..bf64db91ce346 100644 --- a/compiler/rustc_session/src/cstore.rs +++ b/compiler/rustc_session/src/cstore.rs @@ -4,8 +4,10 @@ use std::any::Any; use std::path::PathBuf; +use std::sync::OnceLock; use rustc_abi::ExternAbi; +use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{self, AppendOnlyIndexVec, FreezeLock}; use rustc_hir::attrs::{CfgEntry, NativeLibKind, PeImportNameType}; use rustc_hir::def_id::{ @@ -223,4 +225,6 @@ pub struct Untracked { pub definitions: FreezeLock, /// The interned [StableCrateId]s. pub stable_crate_ids: FreezeLock, + /// The hash of the local crate as computed in metadata encoding. + pub local_crate_hash: OnceLock, } diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/foo.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/foo.rs new file mode 100644 index 0000000000000..d645ab8680949 --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/foo.rs @@ -0,0 +1,16 @@ +// Consumer crate. Byte-identical across all invocations of the test; +// only the tokens spliced in by `#[derive(ChangingDerive)]` change between +// builds, driven by which version of the proc-macro is on disk. + +#![crate_type = "rlib"] + +extern crate changing_macro; + +use changing_macro::ChangingDerive; + +#[derive(ChangingDerive)] +pub struct Foo; + +pub fn answer() -> u32 { + ANSWER +} diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/rmake.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/rmake.rs new file mode 100644 index 0000000000000..1d6f285464043 --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/rmake.rs @@ -0,0 +1,41 @@ +// Verifies that when the *source* of a proc-macro dependency changes (so the +// tokens it emits in the consumer crate change), the consumer crate's +// crate_hash / SVH changes. +// See https://github.com/rust-lang/rust/issues/94878 and PR #154724. + +//@ needs-crate-type: proc-macro + +use run_make_support::{diff, rfs, rustc}; + +fn build_in(dir: &str, macro_src: &str) -> String { + // Build the proc-macro and the consumer into `dir`, then dump the + // consumer's crate metadata root (which includes the SVH). + rustc() + .input(macro_src) + .crate_name("changing_macro") + .crate_type("proc-macro") + .out_dir(dir) + .run(); + rustc().input("foo.rs").library_search_path(dir).out_dir(dir).run(); + rustc().arg("-Zls=root").input(format!("{dir}/libfoo.rlib")).run().stdout_utf8() +} + +fn main() { + rfs::create_dir("v1"); + rfs::create_dir("v2"); + rfs::create_dir("v1_again"); + + // Build the consumer against proc-macro v1, then against v2. foo.rs is + // byte-identical across builds; only the tokens spliced in by the derive + // differ. + let v1 = build_in("v1", "v1.rs"); + let v2 = build_in("v2", "v2.rs"); + // The SVH (printed by `-Zls=root`) must differ between the two builds. + diff().expected_text("v1", &v1).actual_text("v2", v2).run_fail(); + + // Sanity: rebuilding against v1 reproduces the original dump, so the + // difference above is genuinely caused by the proc-macro source change + // and not by non-determinism in metadata encoding. + let v1_again = build_in("v1_again", "v1.rs"); + diff().expected_text("v1", &v1).actual_text("v1_again", v1_again).run(); +} diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/v1.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v1.rs new file mode 100644 index 0000000000000..056e3f142212c --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v1.rs @@ -0,0 +1,12 @@ +// First version of the proc-macro. Emits `pub const ANSWER: u32 = 1;`. + +#![crate_type = "proc-macro"] + +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro_derive(ChangingDerive)] +pub fn changing_derive(_input: TokenStream) -> TokenStream { + "pub const ANSWER: u32 = 1;".parse().unwrap() +} diff --git a/tests/run-make/proc-macro-dep-source-changes-crate-hash/v2.rs b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v2.rs new file mode 100644 index 0000000000000..c739934013967 --- /dev/null +++ b/tests/run-make/proc-macro-dep-source-changes-crate-hash/v2.rs @@ -0,0 +1,13 @@ +// Second version of the proc-macro. Source has changed: it now emits +// `pub const ANSWER: u32 = 2;`. Crate name and exported macro name match v1. + +#![crate_type = "proc-macro"] + +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro_derive(ChangingDerive)] +pub fn changing_derive(_input: TokenStream) -> TokenStream { + "pub const ANSWER: u32 = 2;".parse().unwrap() +} diff --git a/tests/run-make/proc-macro-env-changes-crate-hash/changing_macro.rs b/tests/run-make/proc-macro-env-changes-crate-hash/changing_macro.rs new file mode 100644 index 0000000000000..53c6e17bafad2 --- /dev/null +++ b/tests/run-make/proc-macro-env-changes-crate-hash/changing_macro.rs @@ -0,0 +1,15 @@ +// A proc-macro whose output depends on the value of `PROC_MACRO_DEP_TOKEN` +// at the time the *consumer* crate is compiled. The source of this crate is +// stable across the test; only the env var differs between runs. + +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro] +pub fn emit_token(_input: TokenStream) -> TokenStream { + let value = std::env::var("PROC_MACRO_DEP_TOKEN").unwrap(); + // Emit a constant whose value embeds the env var, so the tokens the + // consumer ends up with depend on the env var. + format!("pub const TOKEN: &str = {value:?};").parse().unwrap() +} diff --git a/tests/run-make/proc-macro-env-changes-crate-hash/foo.rs b/tests/run-make/proc-macro-env-changes-crate-hash/foo.rs new file mode 100644 index 0000000000000..04279a659397e --- /dev/null +++ b/tests/run-make/proc-macro-env-changes-crate-hash/foo.rs @@ -0,0 +1,9 @@ +#![crate_type = "rlib"] + +extern crate changing_macro; + +changing_macro::emit_token!(); + +pub fn get() -> &'static str { + TOKEN +} diff --git a/tests/run-make/proc-macro-env-changes-crate-hash/rmake.rs b/tests/run-make/proc-macro-env-changes-crate-hash/rmake.rs new file mode 100644 index 0000000000000..410bbce0562a9 --- /dev/null +++ b/tests/run-make/proc-macro-env-changes-crate-hash/rmake.rs @@ -0,0 +1,54 @@ +// Verifies that when a proc-macro's *output* changes without its source +// changing — here, because the proc-macro reads an environment variable at +// expansion time and we vary that variable between consumer builds — the +// consumer crate's crate_hash / SVH changes. +// +// Companion to `proc-macro-dep-source-changes-crate-hash`: that test covers +// the case where the proc-macro source (and therefore its compiled metadata) +// changes; this test covers the case where only the tokens produced during +// expansion change. Both must invalidate the consumer's crate_hash. +// +// Note: the env var is read at consumer-compile time (when the proc-macro +// runs), so we set it on the `rustc` invocation that builds `foo.rs`, not on +// the one that builds the proc-macro itself. +// See https://github.com/rust-lang/rust/issues/94878 and PR #154724. + +//@ needs-crate-type: proc-macro + +use run_make_support::{diff, rfs, rustc}; + +const ENV_VAR: &str = "PROC_MACRO_DEP_TOKEN"; + +fn build_in(dir: &str, value: &str) -> String { + // The proc-macro is built once per build, but its source is identical; + // what differs is the value of ENV_VAR seen during expansion in the + // consumer build. + rustc() + .input("changing_macro.rs") + .crate_name("changing_macro") + .crate_type("proc-macro") + .out_dir(dir) + .run(); + rustc().input("foo.rs").library_search_path(dir).out_dir(dir).env(ENV_VAR, value).run(); + rustc().arg("-Zls=root").input(format!("{dir}/libfoo.rlib")).run().stdout_utf8() +} + +fn main() { + rfs::create_dir("a"); + rfs::create_dir("b"); + rfs::create_dir("a_again"); + + // Build the consumer twice with the same proc-macro source but different + // values of ENV_VAR. foo.rs is byte-identical; only the tokens spliced in + // by the derive differ. + let a = build_in("a", "first"); + let b = build_in("b", "second"); + // The SVH (printed by `-Zls=root`) must differ between the two builds. + diff().expected_text("a", &a).actual_text("b", b).run_fail(); + + // Sanity: rebuilding with the original env value reproduces the original + // dump, so the difference above is genuinely caused by the env change and + // not by non-determinism in metadata encoding. + let a_again = build_in("a_again", "first"); + diff().expected_text("a", &a).actual_text("a_again", a_again).run(); +} diff --git a/tests/run-make/proc-macro-global-asm-changes-crate-hash/changing_macro.rs b/tests/run-make/proc-macro-global-asm-changes-crate-hash/changing_macro.rs new file mode 100644 index 0000000000000..7949ffb7d189d --- /dev/null +++ b/tests/run-make/proc-macro-global-asm-changes-crate-hash/changing_macro.rs @@ -0,0 +1,18 @@ +// A proc-macro that emits a `core::arch::global_asm!` block whose template +// depends on `PROC_MACRO_ASM_TOKEN`, read at expansion time. The source of +// this crate is stable across the test; only the env var differs between +// runs, so the only thing that changes in the consumer is the asm template +// spliced in by the macro. +// +// The body is a pure assembler comment, so it assembles to nothing on every +// target we test on. + +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro] +pub fn emit_global_asm(_input: TokenStream) -> TokenStream { + let value = std::env::var("PROC_MACRO_ASM_TOKEN").unwrap(); + format!(r##"core::arch::global_asm!("# {}");"##, value).parse().unwrap() +} diff --git a/tests/run-make/proc-macro-global-asm-changes-crate-hash/foo.rs b/tests/run-make/proc-macro-global-asm-changes-crate-hash/foo.rs new file mode 100644 index 0000000000000..f2c6a4905e182 --- /dev/null +++ b/tests/run-make/proc-macro-global-asm-changes-crate-hash/foo.rs @@ -0,0 +1,9 @@ +// Consumer crate. Byte-identical across all invocations of the test; +// only the asm template inside the `global_asm!` block spliced in by +// `changing_macro::emit_global_asm!` differs between builds. + +#![crate_type = "rlib"] + +extern crate changing_macro; + +changing_macro::emit_global_asm!(); diff --git a/tests/run-make/proc-macro-global-asm-changes-crate-hash/rmake.rs b/tests/run-make/proc-macro-global-asm-changes-crate-hash/rmake.rs new file mode 100644 index 0000000000000..805ceb31df096 --- /dev/null +++ b/tests/run-make/proc-macro-global-asm-changes-crate-hash/rmake.rs @@ -0,0 +1,62 @@ +// Verifies that when a proc-macro emits a `global_asm!` block whose template +// changes between builds (here driven by an env var read at expansion time), +// the consumer crate's crate_hash / SVH changes. +// +// This exercises an item kind (`DefKind::GlobalAsm`) whose body lives only +// in HIR and is *not* recorded in any way by the metadata encoder: +// `should_encode_span`, `should_encode_attrs`, `should_encode_visibility`, +// `should_encode_generics`, `should_encode_type` and `should_encode_mir` are +// all false for `GlobalAsm`, and `def_kind.has_codegen_attrs()` is false too +// (see `compiler/rustc_metadata/src/rmeta/encoder.rs`). All that ends up in +// the rmeta byte stream for a `global_asm!` invocation is the fixed-size +// `DefKind::GlobalAsm` enum discriminant in the def_kind table, which is +// identical regardless of the asm template's contents. The asm template +// itself is only read out of HIR later by `MonoItem::GlobalAsm` codegen in +// `rustc_monomorphize::collector`. +// +// Companion to `proc-macro-dep-source-changes-crate-hash` and +// `proc-macro-env-changes-crate-hash`. +// See https://github.com/rust-lang/rust/issues/94878 and PR #154724. + +//@ needs-crate-type: proc-macro + +use run_make_support::{diff, rfs, rustc}; + +const ENV_VAR: &str = "PROC_MACRO_ASM_TOKEN"; + +fn build_in(dir: &str, value: &str) -> String { + // The proc-macro is built once per build, but its source is identical; + // what differs is the value of ENV_VAR seen during expansion in the + // consumer build. + rustc() + .input("changing_macro.rs") + .crate_name("changing_macro") + .crate_type("proc-macro") + .out_dir(dir) + .run(); + rustc().input("foo.rs").library_search_path(dir).out_dir(dir).env(ENV_VAR, value).run(); + rustc().arg("-Zls=root").input(format!("{dir}/libfoo.rlib")).run().stdout_utf8() +} + +fn main() { + rfs::create_dir("a"); + rfs::create_dir("b"); + rfs::create_dir("a_again"); + + // Build the consumer twice with the same proc-macro source but different + // values of ENV_VAR. foo.rs is byte-identical; only the asm template + // spliced in by `emit_global_asm!` differs. + let a = build_in("a", "first"); + let b = build_in("b", "second"); + // The SVH (printed by `-Zls=root`) must differ between the two builds. + // Under PR #154724 without an HIR-hash contribution, the rmeta encoder + // writes no bytes that depend on the asm template, so the two SVHs are + // identical and this `run_fail` fails (the dumps match). + diff().expected_text("a", &a).actual_text("b", b).run_fail(); + + // Sanity: rebuilding with the original env value reproduces the original + // dump, so the difference above is genuinely caused by the env change + // and not by non-determinism in metadata encoding. + let a_again = build_in("a_again", "first"); + diff().expected_text("a", &a).actual_text("a_again", a_again).run(); +}