From ddc49d4f949375afc39c8fa175eb82e51bddc516 Mon Sep 17 00:00:00 2001 From: Sacha Ayoun Date: Mon, 25 May 2026 15:12:41 +0100 Subject: [PATCH 1/2] Cache file_id by SourceFile stable id in translate_raw_span MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spans are translated once per statement, terminator, local, and item, and each call walked path components and hashed a `FileName` to dedup the file registration. Almost every hit lands on a handful of source files, so caching by rustc's `StableSourceFileId` lets us skip the work after the first miss per file. Measured on signalapp/SparsePostQuantumRatchet, hyperfine 10 runs: 6.355 s → 6.061 s (1.05× faster). --- .../obol-driver/translate/translate_crate.rs | 1 + src/bin/obol-driver/translate/translate_ctx.rs | 7 +++++++ .../obol-driver/translate/translate_meta.rs | 18 +++++++++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/bin/obol-driver/translate/translate_crate.rs b/src/bin/obol-driver/translate/translate_crate.rs index 9334940..d6f5a6c 100644 --- a/src/bin/obol-driver/translate/translate_crate.rs +++ b/src/bin/obol-driver/translate/translate_crate.rs @@ -567,6 +567,7 @@ pub fn translate<'tcx, 'ctx>( id_map: Default::default(), reverse_id_map: Default::default(), file_to_id: Default::default(), + source_file_to_id: Default::default(), items_to_translate: Default::default(), processed: Default::default(), cached_item_metas: Default::default(), diff --git a/src/bin/obol-driver/translate/translate_ctx.rs b/src/bin/obol-driver/translate/translate_ctx.rs index 2a1d37c..6878fa9 100644 --- a/src/bin/obol-driver/translate/translate_ctx.rs +++ b/src/bin/obol-driver/translate/translate_ctx.rs @@ -2,12 +2,14 @@ extern crate rustc_hir; extern crate rustc_middle; extern crate rustc_public; +extern crate rustc_span; use super::translate_crate::TransItemSource; use charon_lib::ast::*; use charon_lib::formatter::{FmtCtx, IntoFormatter}; use charon_lib::options::TranslateOptions; use rustc_middle::ty::TyCtxt; +use rustc_span::StableSourceFileId; use std::cell::RefCell; use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::PathBuf; @@ -34,6 +36,11 @@ pub struct TranslateCtx<'tcx> { pub reverse_id_map: HashMap, /// The reverse filename map. pub file_to_id: HashMap, + /// Direct cache: rustc `SourceFile` stable id → `FileId`. + /// `translate_raw_span` is called once per statement/terminator/local/item, and almost all + /// hits land on a handful of source files. This avoids the per-span filename normalization, + /// path component walks, and `FileName` hashing that `file_to_id` would otherwise re-do. + pub source_file_to_id: HashMap, /// Cache of StableMir type IDs to our translated types. pub type_trans_cache: HashMap, diff --git a/src/bin/obol-driver/translate/translate_meta.rs b/src/bin/obol-driver/translate/translate_meta.rs index a154223..206f0a5 100644 --- a/src/bin/obol-driver/translate/translate_meta.rs +++ b/src/bin/obol-driver/translate/translate_meta.rs @@ -119,9 +119,21 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx> { let span = rustc_public::rustc_internal::internal(self.tcx, *span); let span = span.source_callsite(); let smap: &rustc_span::source_map::SourceMap = self.tcx.sess.psess.source_map(); - let filename = smap.span_to_filename(span); - let filename = self.translate_filename(&filename); - let file_id = self.register_file(filename, span); + // Resolving the file via `span_to_filename` + `translate_filename` walks path components + // and hashes a `FileName` on every call. Spans are translated once per statement / + // terminator / local / item, so caching by the rustc `SourceFile` stable id skips that + // work after the first hit. + let source_file = smap.lookup_source_file(span.lo()); + let file_id = match self.source_file_to_id.get(&source_file.stable_id) { + Some(id) => *id, + None => { + let filename = smap.span_to_filename(span); + let filename = self.translate_filename(&filename); + let id = self.register_file(filename, span); + self.source_file_to_id.insert(source_file.stable_id, id); + id + } + }; let convert_loc = |pos: rustc_span::BytePos| -> Loc { let loc = smap.lookup_char_pos(pos); From a9bbacf362d877178ba1cf93fd9d20d1244bcf65 Mon Sep 17 00:00:00 2001 From: Sacha Ayoun Date: Mon, 25 May 2026 15:23:37 +0100 Subject: [PATCH 2/2] Memoize translate_raw_span by ty::Span Many MIR statements share the same expansion span (especially within inlined or macro-generated code), and each call ran `lookup_char_pos` twice and re-resolved the source file. Caching `ty::Span -> SpanData` short-circuits that. Profile: `translate_span_from_smir` inclusive drops from 1.02% to 0.43%. Hyperfine (20 runs, SpQR): 5.919 s -> 5.885 s. --- .../obol-driver/translate/translate_crate.rs | 1 + src/bin/obol-driver/translate/translate_ctx.rs | 3 +++ .../obol-driver/translate/translate_meta.rs | 18 +++++++++++------- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/bin/obol-driver/translate/translate_crate.rs b/src/bin/obol-driver/translate/translate_crate.rs index d6f5a6c..9cd6759 100644 --- a/src/bin/obol-driver/translate/translate_crate.rs +++ b/src/bin/obol-driver/translate/translate_crate.rs @@ -568,6 +568,7 @@ pub fn translate<'tcx, 'ctx>( reverse_id_map: Default::default(), file_to_id: Default::default(), source_file_to_id: Default::default(), + span_cache: Default::default(), items_to_translate: Default::default(), processed: Default::default(), cached_item_metas: Default::default(), diff --git a/src/bin/obol-driver/translate/translate_ctx.rs b/src/bin/obol-driver/translate/translate_ctx.rs index 6878fa9..8459934 100644 --- a/src/bin/obol-driver/translate/translate_ctx.rs +++ b/src/bin/obol-driver/translate/translate_ctx.rs @@ -41,6 +41,9 @@ pub struct TranslateCtx<'tcx> { /// hits land on a handful of source files. This avoids the per-span filename normalization, /// path component walks, and `FileName` hashing that `file_to_id` would otherwise re-do. pub source_file_to_id: HashMap, + /// Per-span cache: `ty::Span` → translated `SpanData`. The same span is translated repeatedly + /// when statements share a macro call site or were generated from the same MIR node. + pub span_cache: HashMap, /// Cache of StableMir type IDs to our translated types. pub type_trans_cache: HashMap, diff --git a/src/bin/obol-driver/translate/translate_meta.rs b/src/bin/obol-driver/translate/translate_meta.rs index 206f0a5..2940c40 100644 --- a/src/bin/obol-driver/translate/translate_meta.rs +++ b/src/bin/obol-driver/translate/translate_meta.rs @@ -115,14 +115,17 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx> { } } - pub fn translate_raw_span(&mut self, span: &ty::Span) -> meta::SpanData { - let span = rustc_public::rustc_internal::internal(self.tcx, *span); + pub fn translate_raw_span(&mut self, rspan: &ty::Span) -> meta::SpanData { + // Top-level cache by `ty::Span`: many MIR statements share the same expansion span. + if let Some(cached) = self.span_cache.get(rspan) { + return *cached; + } + + let span = rustc_public::rustc_internal::internal(self.tcx, *rspan); let span = span.source_callsite(); let smap: &rustc_span::source_map::SourceMap = self.tcx.sess.psess.source_map(); // Resolving the file via `span_to_filename` + `translate_filename` walks path components - // and hashes a `FileName` on every call. Spans are translated once per statement / - // terminator / local / item, so caching by the rustc `SourceFile` stable id skips that - // work after the first hit. + // and hashes a `FileName` on every call, so cache by the rustc `SourceFile` stable id. let source_file = smap.lookup_source_file(span.lo()); let file_id = match self.source_file_to_id.get(&source_file.stable_id) { Some(id) => *id, @@ -145,8 +148,9 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx> { let beg = convert_loc(span.lo()); let end = convert_loc(span.hi()); - // Put together - meta::SpanData { file_id, beg, end } + let data = meta::SpanData { file_id, beg, end }; + self.span_cache.insert(*rspan, data); + data } pub(crate) fn translate_span_from_smir(&mut self, span: &ty::Span) -> Span {