From 955d1d98e8a98e635f226f5941c6f20389532eb6 Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Mon, 18 May 2026 11:55:15 +0200 Subject: [PATCH 01/14] feat(driver): add per-phase pipeline timing instrumentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a small RAII timer (`PhaseTimer`) that records wall-clock time for each stage of `BuildPipeline` and for every participant invocation (`pre_index`, `post_index`, `pre_annotate`, `post_annotate`). The inner `ParsedProject::index` and `IndexedProject::annotate` methods self-time, so when a participant triggers an implicit whole-project re-index or re-annotate from inside one of its hooks, the cost shows up nested under the participant that caused it. The instrumentation is gated on `PLC_TIMING=1`. With the env var unset (the default, including CI), the timers compile to a near-noop and emit nothing — no behavioural change to existing tests, no diagnostic drift, no stdout/stderr noise. `PhaseTimer::new` takes a `&'static str` for static labels, `PhaseTimer::new_with(|| format!(...))` defers label allocation for dynamic labels so the instrumentation doesn't perturb the disabled-state baseline. `PipelineParticipant` and `PipelineParticipantMut` gain a `name()` method, used as the timer label. The default returns the implementing type's short name (generics stripped); participants can override if needed. Documented in `book/src/development/phase_timing.md`. `scripts/oscat_multi_split.py` is bundled for benchmarking against real multi-unit libraries. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 5 + book/src/SUMMARY.md | 2 + book/src/development/phase_timing.md | 90 +++++++++++++++++ compiler/plc_driver/src/pipelines.rs | 74 ++++++++++---- .../plc_driver/src/pipelines/participant.rs | 10 ++ compiler/plc_driver/src/pipelines/timing.rs | 96 +++++++++++++++++++ scripts/oscat_multi_split.py | 94 ++++++++++++++++++ 7 files changed, 350 insertions(+), 21 deletions(-) create mode 100644 book/src/development/phase_timing.md create mode 100644 compiler/plc_driver/src/pipelines/timing.rs create mode 100644 scripts/oscat_multi_split.py diff --git a/.gitignore b/.gitignore index 79ef2e904c3..1106f792d48 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,8 @@ tests/lit/**/*.txt tests/lit/**/Output/ tests/lit/**/header_generator/**/*.h !tests/lit/**/header_generator/**/dependencies.plc.h + +# Local-only baseline / test corpora (not committed) +/.baseline/ +/benchmarks/local/ + diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 30cf6f43164..21621a7d406 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -25,3 +25,5 @@ - [Codegen](./arch/codegen.md) - [CFC](./cfc/cfc.md) - [Model-to-Model Conversion](./cfc/m2m.md) +- [Development]() + - [Profiling Build Phases](./development/phase_timing.md) diff --git a/book/src/development/phase_timing.md b/book/src/development/phase_timing.md new file mode 100644 index 00000000000..270292ab132 --- /dev/null +++ b/book/src/development/phase_timing.md @@ -0,0 +1,90 @@ +# Profiling Build Phases + +The compiler driver has a built-in phase timer that records wall-clock +time for each stage of `BuildPipeline` and for every participant +invocation. It is intended for ad-hoc performance work — for example, +when investigating why a particular project compiles slower than +expected, or when measuring the impact of a change to the lowering +pipeline. + +## Enabling + +Set the environment variable `PLC_TIMING=1` before invoking the +compiler: + +```sh +PLC_TIMING=1 plc --check my_project.st +``` + +When the variable is unset (or set to `0` / empty), the timers compile +to a no-op and emit nothing. There is no behavioural difference and no +test impact, so it is safe to leave the code path always present. + +## Reading the output + +Each timed scope writes one line to stderr on completion, indented by +nesting depth. Children appear *before* their parent's log line (the +parent prints when it drops, i.e. at end-of-scope), which matches the +standard flame-graph convention. + +A condensed example from compiling a small project: + +```text +[plc-timing] parse: 25.7ms +[plc-timing] pre_index (participants): 12.6ms +[plc-timing] pre_index/LoopDesugarer: 6.6ms +[plc-timing] pre_index/ControlStatementParticipant: 3.0ms +[plc-timing] ParsedProject::index: 7.4ms +[plc-timing] post_index (participants): 27.6ms +[plc-timing] post_index/PolymorphismLowerer: 11.7ms +[plc-timing] ParsedProject::index: 9.6ms +[plc-timing] post_index/RetainParticipant: 15.8ms +[plc-timing] ParsedProject::index: 5.8ms +[plc-timing] index (driver): 47.6ms +[plc-timing] annotate (driver): 615.2ms +``` + +A few things to note from this trace: + +- The outer `parse`, `index (driver)`, `annotate (driver)`, and + `generate (driver)` scopes correspond to the four top-level phases. +- Each participant invocation is timed individually with a label of the + form `/`, e.g. `post_index/PolymorphismLowerer`. +- `ParsedProject::index` and `IndexedProject::annotate` self-time, so + any participant that re-invokes them appears with a nested + `ParsedProject::index` (or `IndexedProject::annotate`) child line. + Those nested re-passes are the main thing to look for when + investigating slow builds. + +## What to look for + +The trace is most useful for spotting **redundant whole-project +re-passes**: cases where a participant mutates the AST and then re-runs +indexing or annotation against the entire project, even though only a +few units were touched. A nested `ParsedProject::index` or +`IndexedProject::annotate` under a participant hook is a visible +indicator of one of those re-passes. + +## Adding new timed scopes + +To time a new scope, construct a `PhaseTimer` and let it drop at the +end of the scope you want to measure: + +```rust +use crate::pipelines::timing::PhaseTimer; + +fn expensive_thing() { + let _t = PhaseTimer::new("expensive_thing"); + // ... work ... +} +``` + +For participant-style instrumentation, prefer wrapping the *callee* +(the inner method that does the work) rather than each call site. That +way a re-entrant call from a participant gets timed automatically, +without having to thread timer code through every place that might +call into the method. + +The timer label is the only argument and accepts any +`Into`. Use a stable, easily-greppable string — these strings +end up in the trace output and may be parsed by tooling. diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index 833978504d4..c2533d9f25c 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -58,6 +58,9 @@ use serde_json; use toml; pub mod participant; +pub mod timing; + +use timing::PhaseTimer; pub mod property; pub struct BuildPipeline { @@ -448,39 +451,65 @@ impl Pipeline for BuildPipeline { } fn parse(&mut self) -> Result { + let _t = PhaseTimer::new("parse"); let project = ParsedProject::parse(&self.context, &self.project, &mut self.diagnostician)?; Ok(project) } fn index(&mut self, project: ParsedProject) -> Result { - self.participants.iter_mut().for_each(|p| { - p.pre_index(&project); - }); - let project = self.mutable_participants.iter_mut().fold(project, |project, p| p.pre_index(project)); + let _t = PhaseTimer::new("index (driver)"); + let project = { + let _t = PhaseTimer::new("pre_index (participants)"); + self.participants.iter_mut().for_each(|p| { + let _t = PhaseTimer::new_with(|| format!("pre_index/{}", p.name())); + p.pre_index(&project); + }); + self.mutable_participants.iter_mut().fold(project, |project, p| { + let _t = PhaseTimer::new_with(|| format!("pre_index/{}", p.name())); + p.pre_index(project) + }) + }; let indexed_project = project.index(self.context.provider()); - self.participants.iter().for_each(|p| { - p.post_index(&indexed_project); - }); - let project = - self.mutable_participants.iter_mut().fold(indexed_project, |project, p| p.post_index(project)); + let project = { + let _t = PhaseTimer::new("post_index (participants)"); + self.participants.iter().for_each(|p| { + let _t = PhaseTimer::new_with(|| format!("post_index/{}", p.name())); + p.post_index(&indexed_project); + }); + self.mutable_participants.iter_mut().fold(indexed_project, |project, p| { + let _t = PhaseTimer::new_with(|| format!("post_index/{}", p.name())); + p.post_index(project) + }) + }; Ok(project) } fn annotate(&mut self, project: IndexedProject) -> Result { - self.participants.iter().for_each(|p| { - p.pre_annotate(&project); - }); - let project = - self.mutable_participants.iter_mut().fold(project, |project, p| p.pre_annotate(project)); + let _t = PhaseTimer::new("annotate (driver)"); + let project = { + let _t = PhaseTimer::new("pre_annotate (participants)"); + self.participants.iter().for_each(|p| { + let _t = PhaseTimer::new_with(|| format!("pre_annotate/{}", p.name())); + p.pre_annotate(&project); + }); + self.mutable_participants.iter_mut().fold(project, |project, p| { + let _t = PhaseTimer::new_with(|| format!("pre_annotate/{}", p.name())); + p.pre_annotate(project) + }) + }; let annotated_project = project.annotate(self.context.provider()); - self.participants.iter().for_each(|p| { - p.post_annotate(&annotated_project); - }); - let mut annotated_project = self - .mutable_participants - .iter_mut() - .fold(annotated_project, |project, p| p.post_annotate(project)); + let mut annotated_project = { + let _t = PhaseTimer::new("post_annotate (participants)"); + self.participants.iter().for_each(|p| { + let _t = PhaseTimer::new_with(|| format!("post_annotate/{}", p.name())); + p.post_annotate(&annotated_project); + }); + self.mutable_participants.iter_mut().fold(annotated_project, |project, p| { + let _t = PhaseTimer::new_with(|| format!("post_annotate/{}", p.name())); + p.post_annotate(project) + }) + }; // Collect diagnostics from participants that validated during lowering. for p in &mut self.mutable_participants { @@ -491,6 +520,7 @@ impl Pipeline for BuildPipeline { } fn generate(&mut self, _context: &CodegenContext, project: AnnotatedProject) -> Result<(), Diagnostic> { + let _t = PhaseTimer::new("generate (driver)"); self.participants.iter_mut().try_fold((), |_, participant| participant.pre_generate(&project))?; let Some(compile_options) = self.get_compile_options() else { log::debug!("No compile options provided"); @@ -686,6 +716,7 @@ impl ParsedProject { /// Creates an index out of a pased project. The index could then be used to query datatypes pub fn index(self, id_provider: IdProvider) -> IndexedProject { + let _t = PhaseTimer::new("ParsedProject::index"); let indexed_units = self .units .into_par_iter() @@ -736,6 +767,7 @@ pub struct IndexedProject { impl IndexedProject { /// Creates annotations on the project in order to facilitate codegen and validation pub fn annotate(self, mut id_provider: IdProvider) -> AnnotatedProject { + let _t = PhaseTimer::new("IndexedProject::annotate"); //Create and call the annotator let mut annotated_units = Vec::new(); let mut all_annotations = AnnotationMapImpl::default(); diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index 22a9764cc9d..966d3851c72 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -34,6 +34,11 @@ use super::{AnnotatedProject, AnnotatedUnit, GeneratedProject, IndexedProject, P /// Implementors can decide parse the Ast and project information /// to do actions like validation or logging pub trait PipelineParticipant: Sync + Send { + /// Short label for this participant, used by the phase-timing + /// instrumentation. Default returns the implementing type's name. + fn name(&self) -> &'static str { + super::timing::short_type_name(std::any::type_name::()) + } /// Implement this to access the project before it gets indexed /// This happens directly after parsing fn pre_index(&mut self, _parsed_project: &ParsedProject) {} @@ -69,6 +74,11 @@ pub trait PipelineParticipant: Sync + Send { /// If a previous step is being modified, such as the AST or index, /// the caller is responsible for calling the previous steps pub trait PipelineParticipantMut { + /// Short label for this participant, used by the phase-timing + /// instrumentation. Default returns the implementing type's name. + fn name(&self) -> &'static str { + super::timing::short_type_name(std::any::type_name::()) + } /// Implement this to access the project before it gets indexed /// This happens directly after parsing fn pre_index(&mut self, parsed_project: ParsedProject) -> ParsedProject { diff --git a/compiler/plc_driver/src/pipelines/timing.rs b/compiler/plc_driver/src/pipelines/timing.rs new file mode 100644 index 00000000000..4192547ba1e --- /dev/null +++ b/compiler/plc_driver/src/pipelines/timing.rs @@ -0,0 +1,96 @@ +//! Pipeline phase timing. +//! +//! Enabled by setting `PLC_TIMING=1` in the environment. When enabled, +//! each timed scope logs its elapsed wall-clock time to stderr on drop, +//! indented by nesting depth so re-entrant work (e.g. a participant that +//! triggers a project-wide re-index) is visible. + +use std::cell::Cell; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Once; +use std::time::Instant; + +static ENABLED: AtomicBool = AtomicBool::new(false); +static INIT: Once = Once::new(); + +thread_local! { + static DEPTH: Cell = const { Cell::new(0) }; +} + +fn enabled() -> bool { + INIT.call_once(|| { + let on = std::env::var("PLC_TIMING").map(|v| !v.is_empty() && v != "0").unwrap_or(false); + ENABLED.store(on, Ordering::Relaxed); + }); + ENABLED.load(Ordering::Relaxed) +} + +/// RAII guard that records a labelled phase timing. Logs to stderr on drop +/// when `PLC_TIMING` is enabled. No-op otherwise. +pub struct PhaseTimer { + label: String, + start: Instant, + depth: usize, + active: bool, +} + +impl PhaseTimer { + /// Constructs a timer with a label known at compile time. When timing is + /// disabled, the only work done is reading the `ENABLED` flag. + pub fn new(label: &'static str) -> Self { + Self::new_inner(active_depth(), label, enabled()) + } + + /// Constructs a timer whose label requires runtime formatting. The closure + /// is only invoked when timing is enabled — `format!`/allocation is + /// skipped on the hot path when `PLC_TIMING` is unset. + pub fn new_with(label_fn: F) -> Self + where + F: FnOnce() -> String, + { + let active = enabled(); + let label: String = if active { label_fn() } else { String::new() }; + Self::new_inner(active_depth(), label, active) + } + + fn new_inner(depth: usize, label: impl Into, active: bool) -> Self { + Self { label: label.into(), start: Instant::now(), depth, active } + } +} + +fn active_depth() -> usize { + if enabled() { + DEPTH.with(|d| { + let cur = d.get(); + d.set(cur + 1); + cur + }) + } else { + 0 + } +} + +impl Drop for PhaseTimer { + fn drop(&mut self) { + if !self.active { + return; + } + let elapsed = self.start.elapsed(); + let indent = " ".repeat(self.depth); + eprintln!("[plc-timing] {indent}{}: {:.3?}", self.label, elapsed); + DEPTH.with(|d| d.set(d.get().saturating_sub(1))); + } +} + +/// Strip the leading module path and any generic-parameter suffix from a +/// `std::any::type_name` result so the emitted label is just the type's +/// short name. Examples: +/// `foo::bar::Baz` -> `Baz` +/// `foo::Baz` -> `Baz` +pub fn short_type_name(full: &'static str) -> &'static str { + let base = match full.find('<') { + Some(idx) => &full[..idx], + None => full, + }; + base.rsplit("::").next().unwrap_or(base) +} diff --git a/scripts/oscat_multi_split.py b/scripts/oscat_multi_split.py new file mode 100644 index 00000000000..c5a7f55bd8f --- /dev/null +++ b/scripts/oscat_multi_split.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Split a single-file `oscat.st` corpus into one POU per file. + +Each `FUNCTION` / `FUNCTION_BLOCK` / `PROGRAM` / `CLASS` / `TYPE` +declaration is emitted to its own `.st` file under +`.baseline/oscat-multi/src/`. Mirrors the "one POU per unit" style of +real PLC projects and gives `cargo xtask metrics`-style measurements a +representative multi-unit corpus. + +Run from the repo root after cloning oscat into `.baseline/oscat/`: + + git clone --depth 1 https://github.com/plc-lang/oscat .baseline/oscat + python3 scripts/oscat_multi_split.py + +The output directory is gitignored. Pass `--out` to override. +""" + +import argparse +import os +import re +import shutil + + +END_KINDS = {"FUNCTION_BLOCK", "FUNCTION", "PROGRAM", "CLASS", "TYPE"} +START = re.compile(r"^(FUNCTION_BLOCK|FUNCTION|PROGRAM|CLASS|TYPE)\s+(\S+)") +END = re.compile(r"^END_(FUNCTION_BLOCK|FUNCTION|PROGRAM|CLASS|TYPE|VAR_GLOBAL|VAR_CONFIG)\b") + + +def sanitize(name: str) -> str: + return re.sub(r"[^A-Za-z0-9_]", "_", name) + + +def split(src_path: str, out_dir: str) -> int: + with open(src_path) as f: + lines = f.read().split("\n") + if os.path.isdir(out_dir): + shutil.rmtree(out_dir) + os.makedirs(out_dir, exist_ok=True) + + preamble: list[str] = [] + i = 0 + while i < len(lines) and not START.match(lines[i]): + preamble.append(lines[i]) + i += 1 + if preamble: + with open(os.path.join(out_dir, "_preamble.st"), "w") as f: + f.write("\n".join(preamble) + "\n") + + counts: dict[str, int] = {} + cur_name: str | None = None + buf: list[str] = [] + + while i < len(lines): + line = lines[i] + m = START.match(line) + if m and cur_name is None: + cur_name = m.group(2) + buf = [line] + elif cur_name is not None: + buf.append(line) + m_end = END.match(line) + if m_end and m_end.group(1) in END_KINDS: + sane = sanitize(cur_name) + counts[sane] = counts.get(sane, 0) + 1 + suffix = f"_{counts[sane]}" if counts[sane] > 1 else "" + fname = f"{sane}{suffix}.st" + with open(os.path.join(out_dir, fname), "w") as f: + f.write("\n".join(buf) + "\n") + cur_name = None + buf = [] + i += 1 + + if buf: + with open(os.path.join(out_dir, "_tail.st"), "w") as f: + f.write("\n".join(buf) + "\n") + + return len([f for f in os.listdir(out_dir) if f.endswith(".st")]) + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--src", default=".baseline/oscat/oscat.st", help="path to the single-file oscat source" + ) + parser.add_argument( + "--out", default=".baseline/oscat-multi/src", help="output directory for per-POU files" + ) + args = parser.parse_args() + written = split(args.src, args.out) + print(f"wrote {written} files to {args.out}") + + +if __name__ == "__main__": + main() From 35558643b455dec3aa56ea82f61e49625ff34083 Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Mon, 18 May 2026 11:57:14 +0200 Subject: [PATCH 02/14] feat(index): track per-unit symbol ownership Adds the infrastructure that lets the index drop a single source unit's contributions without rebuilding the whole project. - `UnitId` newtype (with `BUILTIN`, `SYNTHETIC`, `UNTAGGED` sentinels) tags every entry imported into the global `Index`. The typed `UnitId::source_index() -> Option` accessor encapsulates the conversion back to a positional index so non-source ids can't slip through. - `Index::import_with_unit(other, unit_id)` is the new primary import path; it records each entry's `(SymbolKind, map_key, identifier)` in an `Index.unit_symbols` side-table. The existing `Index::import` becomes a thin wrapper that passes `UnitId::UNTAGGED`. - `Index::remove_unit(unit_id)` walks the side-table and drops only the entries that unit contributed, leaving other units' entries under shared map keys (e.g. an enum variant name appearing in two unrelated enums) untouched. - Shared entries (auto-generated POUs and implementations whose names collide across compilation units) are reference-counted via `UnitSymbolIndex::has_owner_other_than`: every importer records a stake, and `remove_unit` only drops the underlying entry when the last owner is removed. Without this, the first removal of a shared entry deletes it and orphans the remaining units' ownership rows across reindex cycles. - `SymbolMap` gets `retain_at_key` so removals can filter entries precisely instead of dropping a whole key. - The driver's index pipeline tags source units with `UnitId::source(i)`, built-in functions with `UnitId::BUILTIN`, and the resolver's `new_index` with `UnitId::SYNTHETIC`. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + compiler/plc_driver/src/pipelines.rs | 13 +- compiler/plc_source/Cargo.toml | 1 + compiler/plc_source/src/lib.rs | 24 +++ src/index.rs | 133 +++++++++++++++- src/index/symbol.rs | 16 ++ src/index/tests.rs | 1 + src/index/tests/unit_id_tests.rs | 187 +++++++++++++++++++++++ src/index/unit_id.rs | 219 +++++++++++++++++++++++++++ src/test_utils.rs | 10 +- 10 files changed, 593 insertions(+), 12 deletions(-) create mode 100644 src/index/tests/unit_id_tests.rs create mode 100644 src/index/unit_id.rs diff --git a/Cargo.lock b/Cargo.lock index 0a828ac85ba..e0d840349d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2487,6 +2487,7 @@ dependencies = [ "insta", "serde", "serde_json", + "siphasher", ] [[package]] diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index c2533d9f25c..69270074977 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -21,7 +21,7 @@ use itertools::Itertools; use participant::{PipelineParticipant, PipelineParticipantMut}; use plc::{ codegen::{CodegenContext, GeneratedModule}, - index::{indexer, FxIndexSet, Index}, + index::{indexer, FxIndexSet, Index, UnitId}, linker::LinkerType, lowering::{calls::AggregateTypeLowerer, polymorphism::PolymorphismLowerer, property::PropertyLowerer}, output::{FormatOption, RelocationPreference}, @@ -732,9 +732,9 @@ impl ParsedProject { let mut global_index = Index::default(); let mut units = vec![]; - for (index, unit) in indexed_units { + for (idx, (index, unit)) in indexed_units.into_iter().enumerate() { units.push(unit); - global_index.import(index); + global_index.import_with_unit(index, UnitId::source(idx)); } // import built-in types like INT, BOOL, etc. @@ -744,7 +744,7 @@ impl ParsedProject { // import builtin functions let builtins = plc::builtins::parse_built_ins(id_provider); - global_index.import(indexer::index(&builtins)); + global_index.import_with_unit(indexer::index(&builtins), UnitId::BUILTIN); //TODO: evaluate constants should probably be a participant let (index, _unresolvables) = plc::resolver::const_evaluator::evaluate_constants(global_index); @@ -788,7 +788,10 @@ impl IndexedProject { } let mut index = self.index; - index.import(std::mem::take(&mut all_annotations.new_index)); + // The resolver's `new_index` carries generic instantiations and + // synthetic types created during annotation; tag them as synthetic so + // a later per-unit invalidation doesn't accidentally drop them. + index.import_with_unit(std::mem::take(&mut all_annotations.new_index), UnitId::SYNTHETIC); let annotations = AstAnnotations::new(all_annotations, id_provider.next_id()); diff --git a/compiler/plc_source/Cargo.toml b/compiler/plc_source/Cargo.toml index 49db1af11c0..4375d0aca99 100644 --- a/compiler/plc_source/Cargo.toml +++ b/compiler/plc_source/Cargo.toml @@ -10,6 +10,7 @@ encoding_rs.workspace = true encoding_rs_io.workspace = true serde_json.workspace = true serde.workspace = true +siphasher = "1" [dev-dependencies] insta = "1.31.0" diff --git a/compiler/plc_source/src/lib.rs b/compiler/plc_source/src/lib.rs index d77bc85e6c5..ce2ea2ff805 100644 --- a/compiler/plc_source/src/lib.rs +++ b/compiler/plc_source/src/lib.rs @@ -1,5 +1,6 @@ use std::{ fs::File, + hash::Hasher, io::Read, path::{Path, PathBuf}, }; @@ -7,8 +8,25 @@ use std::{ use encoding_rs::Encoding; use encoding_rs_io::DecodeReaderBytesBuilder; use serde::{Deserialize, Serialize}; +use siphasher::sip::SipHasher13; pub mod source_location; + +/// A content-addressed hash of a source unit. Two `SourceCode` values with the +/// same `source` string produce the same `SourceHash` regardless of path or +/// load order; the hash is used by incremental compilation to decide whether a +/// unit has actually changed. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct SourceHash(pub u64); + +impl SourceHash { + /// Computes a stable hash of the given source content. + pub fn of(content: &str) -> Self { + let mut h = SipHasher13::new(); + h.write(content.as_bytes()); + SourceHash(h.finish()) + } +} /// Represents the type of source a SourceContainer holds #[derive(Clone, Copy, Debug)] pub enum SourceType { @@ -138,6 +156,12 @@ impl SourceCode { self.path = Some(name.into()); self } + + /// Returns a stable content hash of the source. Recomputed on every call; + /// callers that want to cache it should keep their own copy. + pub fn content_hash(&self) -> SourceHash { + SourceHash::of(&self.source) + } } pub trait Compilable { diff --git a/src/index.rs b/src/index.rs index ee772f10631..c26bf16e54b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -30,6 +30,9 @@ pub mod const_expressions; pub mod indexer; mod instance_iterator; pub mod symbol; +mod unit_id; + +pub use unit_id::{OwnedSymbol, SymbolKind, UnitId, UnitSymbolIndex}; #[cfg(test)] mod tests; @@ -1327,6 +1330,14 @@ pub struct Index { labels: FxIndexMap>, config_variables: Vec, + + /// Reverse map from owning [`UnitId`] to the set of entries that unit + /// contributed. Populated by [`Index::import_with_unit`] and consumed by + /// [`Index::remove_unit`] so a unit's contributions can be dropped without + /// scanning every map. Skipped over by serde so existing snapshots stay + /// stable. + #[serde(skip)] + unit_symbols: UnitSymbolIndex, } impl Index { @@ -1335,7 +1346,16 @@ impl Index { /// imports all global_variables, types and implementations /// # Arguments /// - `other` the other index. The elements are drained from the given index and moved into the current one - pub fn import(&mut self, mut other: Index) { + pub fn import(&mut self, other: Index) { + self.import_with_unit(other, UnitId::UNTAGGED); + } + + /// Like [`Index::import`] but records every entry's `unit_id` in the + /// internal reverse map. [`Index::remove_unit`] uses that record to drop a + /// single unit's contributions later. Pass [`UnitId::BUILTIN`] for + /// built-in symbols and [`UnitId::SYNTHETIC`] for symbols created by + /// lowering or generic-instantiation passes. + pub fn import_with_unit(&mut self, mut other: Index, unit_id: UnitId) { //global variables for (name, e) in other.global_variables.drain(..) { // Synthetic hardware-backing globals (`__PI_*` / `__M_*` / `__G_*`) are @@ -1350,6 +1370,12 @@ impl Index { .into_iter() .map(|it| self.transfer_constants(it, &mut other.constant_expressions)) .collect::>(); + for entry in &entries { + self.unit_symbols.record( + unit_id, + OwnedSymbol::new(SymbolKind::GlobalVariable, &name, entry.get_qualified_name()), + ); + } self.global_variables.insert_many(name, entries); } @@ -1359,6 +1385,12 @@ impl Index { .into_iter() .map(|e| self.transfer_constants(e, &mut other.constant_expressions)) .collect::>(); + for entry in &elements { + self.unit_symbols.record( + unit_id, + OwnedSymbol::new(SymbolKind::GlobalInitializer, &name, entry.get_qualified_name()), + ); + } self.global_initializers.insert_many(name, elements); } @@ -1402,7 +1434,16 @@ impl Index { .collect::>(); for e in variables.iter() { - self.enum_global_variables.insert(e.get_name().to_lowercase(), e.clone()); + let map_key = e.get_name().to_lowercase(); + self.unit_symbols.record( + unit_id, + OwnedSymbol::new( + SymbolKind::EnumGlobalVariable, + &map_key, + e.get_qualified_name(), + ), + ); + self.enum_global_variables.insert(map_key, e.clone()); } variants.append(&mut variables); } @@ -1416,6 +1457,8 @@ impl Index { _ => {} } + self.unit_symbols + .record(unit_id, OwnedSymbol::new(SymbolKind::Type, &name, e.get_name())); self.type_index.types.insert(name.clone(), e) } } @@ -1434,22 +1477,42 @@ impl Index { members.append(&mut variables); } }); + for entry in &elements { + self.unit_symbols + .record(unit_id, OwnedSymbol::new(SymbolKind::PouType, &name, entry.get_name())); + } self.type_index.pou_types.insert_many(name, elements) } //implementations + for (name, entry) in &other.implementations { + self.unit_symbols + .record(unit_id, OwnedSymbol::new(SymbolKind::Implementation, name, entry.get_call_name())); + } self.implementations.extend(other.implementations); // interfaces + for name in other.interfaces.keys() { + self.unit_symbols.record(unit_id, OwnedSymbol::unique(SymbolKind::Interface, name)); + } self.interfaces.extend(other.interfaces); // properties + for name in other.properties.keys() { + self.unit_symbols.record(unit_id, OwnedSymbol::unique(SymbolKind::Property, name)); + } self.properties.extend(other.properties); //pous for (name, elements) in other.pous.drain(..) { for ele in elements { - // skip automatically generated pou's if they are already in the target index + // Always record this unit's stake in the entry — even when we + // skip the actual `pous.insert` for a colliding auto-gen, so + // that `remove_unit` knows another owner remains and won't + // drop a shared entry on the first removal. + self.unit_symbols.record(unit_id, OwnedSymbol::new(SymbolKind::Pou, &name, ele.get_name())); + // Skip automatically generated POUs that another unit already + // contributed under the same name. if !ele.is_auto_generated_function() || !self.pous.contains_key(&name) { self.pous.insert(name.clone(), ele); } @@ -1465,6 +1528,70 @@ impl Index { // self.constant_expressions.import(other.constant_expressions) } + /// Drops every entry recorded for `unit_id` from this index. Pairs with + /// [`Index::import_with_unit`]: only entries imported under that exact + /// `unit_id` are touched, so other units' contributions under the same + /// map key (e.g. enum variants from a different enum) are preserved. + /// + /// Does nothing if `unit_id` has no recorded contributions. After a call + /// the `unit_id` is removed from the reverse map, so re-importing the + /// unit later with the same id starts from a clean slate. + pub fn remove_unit(&mut self, unit_id: UnitId) { + let symbols = self.unit_symbols.take(unit_id); + for sym in symbols { + // If any other unit still claims ownership of this exact symbol + // (kind + map_key + identifier), the underlying entry is shared. + // Drop only this unit's stake; leave the entry for the remaining + // owner(s). Without this guard, the first removal of a shared + // entry (e.g. an auto-generated POU contributed by several units, + // or an implementation whose key collides across units) would + // delete the entry and orphan the other units' ownership rows. + if self.unit_symbols.has_owner_other_than(unit_id, sym.kind, &sym.map_key, &sym.identifier) { + continue; + } + match sym.kind { + SymbolKind::GlobalVariable => { + self.global_variables + .retain_at_key(sym.map_key.as_str(), |e| e.get_qualified_name() != sym.identifier); + } + SymbolKind::GlobalInitializer => { + self.global_initializers + .retain_at_key(sym.map_key.as_str(), |e| e.get_qualified_name() != sym.identifier); + } + SymbolKind::EnumGlobalVariable => { + self.enum_global_variables + .retain_at_key(sym.map_key.as_str(), |e| e.get_qualified_name() != sym.identifier); + } + SymbolKind::Pou => { + self.pous.retain_at_key(sym.map_key.as_str(), |e| e.get_name() != sym.identifier); + } + SymbolKind::Interface => { + self.interfaces.retain_at_key(sym.map_key.as_str(), |_| false); + } + SymbolKind::Property => { + self.properties.retain_at_key(sym.map_key.as_str(), |_| false); + } + SymbolKind::Implementation => { + if let Some(entry) = self.implementations.get(sym.map_key.as_str()) { + if entry.get_call_name() == sym.identifier { + self.implementations.shift_remove(sym.map_key.as_str()); + } + } + } + SymbolKind::Type => { + self.type_index + .types + .retain_at_key(sym.map_key.as_str(), |e| e.get_name() != sym.identifier); + } + SymbolKind::PouType => { + self.type_index + .pou_types + .retain_at_key(sym.map_key.as_str(), |e| e.get_name() != sym.identifier); + } + } + } + } + fn transfer_constants( &mut self, mut variable: VariableIndexEntry, diff --git a/src/index/symbol.rs b/src/index/symbol.rs index 947fb90b910..529ba37bd59 100644 --- a/src/index/symbol.rs +++ b/src/index/symbol.rs @@ -99,6 +99,22 @@ where pub fn contains_key(&self, key: &K) -> bool { self.inner_map.contains_key(key) } + + /// Removes entries at `key` that don't satisfy `predicate`. If the key has + /// no remaining entries after the filter, the key itself is dropped (so + /// `contains_key` returns `false` afterwards). Preserves the insertion + /// order of the remaining keys. + pub fn retain_at_key(&mut self, key: &Q, mut predicate: F) + where + Q: Hash + Equivalent + ?Sized, + F: FnMut(&V) -> bool, + { + let Some(values) = self.inner_map.get_mut(key) else { return }; + values.retain(|v| predicate(v)); + if values.is_empty() { + self.inner_map.shift_remove(key); + } + } } #[cfg(test)] diff --git a/src/index/tests.rs b/src/index/tests.rs index f0bad025b5b..a1f52eaebfe 100644 --- a/src/index/tests.rs +++ b/src/index/tests.rs @@ -4,3 +4,4 @@ mod generic_tests; mod index_tests; mod instance_resolver_tests; mod interface_tests; +mod unit_id_tests; diff --git a/src/index/tests/unit_id_tests.rs b/src/index/tests/unit_id_tests.rs new file mode 100644 index 00000000000..8de218137f2 --- /dev/null +++ b/src/index/tests/unit_id_tests.rs @@ -0,0 +1,187 @@ +//! Tests for the per-unit symbol-ownership plumbing: `Index::import_with_unit` +//! records contributions, and `Index::remove_unit` drops exactly those +//! contributions even when multiple units share a `SymbolMap` key. + +use plc_ast::ast::LinkageType; +use plc_source::source_location::SourceLocation; + +use crate::index::{ImplementationType, Index, PouIndexEntry, UnitId}; +use crate::test_utils::tests::index; + +#[test] +fn remove_unit_drops_source_globals() { + let source = r#" + VAR_GLOBAL + a : INT; + b : DINT; + END_VAR + "#; + let (_, mut global) = index(source); + + assert!(global.find_global_variable("a").is_some()); + assert!(global.find_global_variable("b").is_some()); + + global.remove_unit(UnitId::source(0)); + + assert!(global.find_global_variable("a").is_none()); + assert!(global.find_global_variable("b").is_none()); + // Built-in types are untouched. + assert!(global.find_effective_type_by_name("INT").is_some()); +} + +#[test] +fn remove_unit_drops_source_pous() { + let (_, mut global) = index( + r#" + FUNCTION foo : INT + END_FUNCTION + PROGRAM Main + VAR + x : INT; + END_VAR + END_PROGRAM + "#, + ); + + assert!(global.find_pou("foo").is_some()); + assert!(global.find_pou("Main").is_some()); + + global.remove_unit(UnitId::source(0)); + + assert!(global.find_pou("foo").is_none()); + assert!(global.find_pou("Main").is_none()); +} + +#[test] +fn remove_unit_preserves_other_units_enum_variant_with_same_short_name() { + // Two enums in different units that both expose a variant named `RED`. + // The variants land in `enum_global_variables` under the lowercase short + // name `red`, so the map key is shared but the qualified names differ. + let e1 = build_enum_only_index("E1"); + let e2 = build_enum_only_index("E2"); + + let mut combined = Index::default(); + combined.import_with_unit(e1, UnitId::source(0)); + combined.import_with_unit(e2, UnitId::source(1)); + + // Both qualified variants are reachable before removal. + assert!(combined.find_enum_variant("E1", "RED").is_some()); + assert!(combined.find_enum_variant("E2", "RED").is_some()); + + // Drop only the first unit's contributions. + combined.remove_unit(UnitId::source(0)); + + assert!(combined.find_enum_variant("E1", "RED").is_none(), "E1.RED must be gone"); + assert!(combined.find_enum_variant("E2", "RED").is_some(), "E2.RED must survive"); +} + +fn build_enum_only_index(enum_name: &str) -> Index { + let source = format!( + r#" + TYPE {enum_name} : (RED, GREEN, BLUE); END_TYPE + "# + ); + let (_, index) = index(source.as_str()); + index +} + +fn unit_with_auto_gen_function(name: &str) -> Index { + let mut idx = Index::default(); + idx.register_pou(PouIndexEntry::create_generated_function_entry( + name, + "DINT", + &[], + LinkageType::Internal, + false, + SourceLocation::internal(), + false, + )); + idx +} + +fn unit_with_implementation(call_name: &str, type_name: &str) -> Index { + let mut idx = Index::default(); + idx.register_implementation( + call_name, + type_name, + None, + ImplementationType::Function, + false, + SourceLocation::internal(), + ); + idx +} + +#[test] +fn remove_unit_preserves_auto_generated_pou_shared_with_other_unit() { + // Two source units that both register the same auto-generated POU. Dropping + // the first must not remove the entry while the second still owns it; the + // entry only vanishes once the last owner is removed. + let u1 = unit_with_auto_gen_function("__init_foo"); + let u2 = unit_with_auto_gen_function("__init_foo"); + + let mut combined = Index::default(); + combined.import_with_unit(u1, UnitId::source(0)); + combined.import_with_unit(u2, UnitId::source(1)); + + assert!(combined.find_pou("__init_foo").is_some(), "shared auto-gen must be present after both imports"); + + combined.remove_unit(UnitId::source(0)); + assert!( + combined.find_pou("__init_foo").is_some(), + "auto-gen must survive removal of one owner while another still claims it" + ); + + combined.remove_unit(UnitId::source(1)); + assert!(combined.find_pou("__init_foo").is_none(), "auto-gen is gone once the last owner is removed"); +} + +#[test] +fn reindex_unit_with_shared_auto_gen_pou_is_idempotent() { + // Re-importing a unit that contributes an auto-gen POU shared with another + // unit must leave the index byte-equivalent (in observable shape) to the + // pre-removal state. + let u1 = unit_with_auto_gen_function("__init_bar"); + let u2 = unit_with_auto_gen_function("__init_bar"); + + let mut combined = Index::default(); + combined.import_with_unit(u1, UnitId::source(0)); + combined.import_with_unit(u2, UnitId::source(1)); + + combined.remove_unit(UnitId::source(0)); + combined.import_with_unit(unit_with_auto_gen_function("__init_bar"), UnitId::source(0)); + + assert!( + combined.find_pou("__init_bar").is_some(), + "auto-gen must still be present after reindexing one owner" + ); + + // Now drop both owners; the entry should vanish. + combined.remove_unit(UnitId::source(0)); + combined.remove_unit(UnitId::source(1)); + assert!(combined.find_pou("__init_bar").is_none()); +} + +#[test] +fn remove_unit_preserves_implementation_shared_with_other_unit() { + // Two units that register implementations under the same call name. The + // import path overwrites the IndexMap entry, but both owners are recorded; + // dropping one must leave the entry standing for the other. + let u1 = unit_with_implementation("shared_impl", "shared_impl"); + let u2 = unit_with_implementation("shared_impl", "shared_impl"); + + let mut combined = Index::default(); + combined.import_with_unit(u1, UnitId::source(0)); + combined.import_with_unit(u2, UnitId::source(1)); + + assert!(combined.find_implementation_by_name("shared_impl").is_some()); + + combined.remove_unit(UnitId::source(0)); + assert!( + combined.find_implementation_by_name("shared_impl").is_some(), + "implementation must survive removal of one owner while another still claims it" + ); + + combined.remove_unit(UnitId::source(1)); + assert!(combined.find_implementation_by_name("shared_impl").is_none()); +} diff --git a/src/index/unit_id.rs b/src/index/unit_id.rs new file mode 100644 index 00000000000..f20b48172d0 --- /dev/null +++ b/src/index/unit_id.rs @@ -0,0 +1,219 @@ +//! Identification of which compilation unit owns an entry in the global +//! [`Index`](super::Index). +//! +//! Every entry merged into the global index is associated with a [`UnitId`]. +//! For source-derived entries the id is the unit's position in the project's +//! source list; built-in and synthetic symbols use reserved sentinels. +//! Tracking ownership lets the pipeline drop a single unit's contributions +//! from the global index when that unit is rebuilt, without rebuilding the +//! whole index from scratch. + +use rustc_hash::FxHashMap; +use serde::{Deserialize, Serialize}; + +/// Identifies the compilation unit that contributed an entry to the global +/// index. Source units get sequentially-numbered ids; the high values are +/// reserved sentinels. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +pub struct UnitId(u32); + +impl UnitId { + /// Reserved for built-in symbols (primitive types, built-in functions) + /// registered before any source unit is indexed. + pub const BUILTIN: UnitId = UnitId(u32::MAX); + + /// Reserved for symbols produced by lowering passes or generic- + /// instantiation registration that don't belong to a single source unit. + pub const SYNTHETIC: UnitId = UnitId(u32::MAX - 1); + + /// Used for entries imported through the legacy [`Index::import`] path + /// that have not yet been migrated to provide an explicit owning unit. + /// Existing tests and call sites that don't care about ownership see this + /// value. + pub const UNTAGGED: UnitId = UnitId(u32::MAX - 2); + + /// Constructs a [`UnitId`] for the source unit at position `idx` in the + /// project's source list. + /// + /// # Panics + /// Panics if `idx` collides with a reserved sentinel value (i.e. there + /// are more than `u32::MAX - 3` source units). + pub fn source(idx: usize) -> Self { + let raw = u32::try_from(idx).expect("source unit count exceeds u32"); + assert!(raw < Self::UNTAGGED.0, "source-unit index collides with reserved sentinel"); + UnitId(raw) + } + + pub fn raw(self) -> u32 { + self.0 + } + + pub fn is_source(self) -> bool { + self.0 < Self::UNTAGGED.0 + } + + /// Returns the source-list index if this is a source unit, or `None` + /// for built-in, synthetic, or untagged ids. Prefer this over + /// `is_source()` + `raw() as usize`: the conversion is encapsulated and + /// non-source ids can't slip through silently. + pub fn source_index(self) -> Option { + if self.is_source() { + Some(self.0 as usize) + } else { + None + } + } + + pub fn is_builtin(self) -> bool { + self == Self::BUILTIN + } + + pub fn is_synthetic(self) -> bool { + self == Self::SYNTHETIC + } +} + +/// The kind of symbol an [`OwnedSymbol`] refers to. Used to dispatch removal +/// to the correct map inside the index. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum SymbolKind { + GlobalVariable, + GlobalInitializer, + EnumGlobalVariable, + Pou, + Interface, + Property, + Implementation, + Type, + PouType, +} + +/// A single symbol contributed by a unit, used by [`UnitSymbolIndex`]. +/// +/// `map_key` is the key under which the entry lives in its target `SymbolMap`; +/// `identifier` is a distinguishing string (typically the entry's qualified +/// name) that the removal path uses to tell entries apart when multiple units +/// have contributed under the same `map_key`. When `identifier` and `map_key` +/// are the same string the caller can pass either through both fields. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct OwnedSymbol { + pub kind: SymbolKind, + pub map_key: String, + pub identifier: String, +} + +impl OwnedSymbol { + pub fn new(kind: SymbolKind, map_key: impl Into, identifier: impl Into) -> Self { + OwnedSymbol { kind, map_key: map_key.into(), identifier: identifier.into() } + } + + /// Convenience for entries whose map key already uniquely identifies them + /// (no cross-unit overlap is possible). The `identifier` mirrors the + /// `map_key`. + pub fn unique(kind: SymbolKind, name: impl Into) -> Self { + let name = name.into(); + OwnedSymbol { kind, map_key: name.clone(), identifier: name } + } +} + +/// The set of symbols contributed by each [`UnitId`], inverse of the per-entry +/// `unit_id` tags. Populated by `Index::import_with_unit` and used by +/// `Index::remove_unit` to know which keys in which maps to revisit. +#[derive(Default, Debug, Serialize, Deserialize)] +pub struct UnitSymbolIndex { + entries: FxHashMap>, +} + +impl UnitSymbolIndex { + /// Records that `unit` contributed `symbol`. Multiple calls with the same + /// pair are tolerated (the symbol will be listed multiple times); callers + /// don't need to dedup. + pub fn record(&mut self, unit: UnitId, symbol: OwnedSymbol) { + self.entries.entry(unit).or_default().push(symbol); + } + + /// Returns all symbols recorded for `unit`, or an empty slice if `unit` + /// has no recorded contributions. + pub fn for_unit(&self, unit: UnitId) -> &[OwnedSymbol] { + self.entries.get(&unit).map(Vec::as_slice).unwrap_or(&[]) + } + + /// Drops and returns the recorded symbols for `unit`. + pub fn take(&mut self, unit: UnitId) -> Vec { + self.entries.remove(&unit).unwrap_or_default() + } + + /// Returns true if no unit has any recorded symbols. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Returns true if any unit other than `exclude` claims ownership of a + /// symbol with the same `kind`/`map_key`/`identifier`. Used by + /// `Index::remove_unit` to keep entries that are co-owned by other units + /// (auto-generated POUs and implementations whose names collide across + /// compilation units): only the *last* owner being dropped should + /// actually remove the entry from the index. + pub fn has_owner_other_than( + &self, + exclude: UnitId, + kind: SymbolKind, + map_key: &str, + identifier: &str, + ) -> bool { + self.entries.iter().any(|(uid, syms)| { + *uid != exclude + && syms.iter().any(|s| s.kind == kind && s.map_key == map_key && s.identifier == identifier) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn source_ids_round_trip() { + let id = UnitId::source(0); + assert!(id.is_source()); + assert!(!id.is_builtin()); + assert!(!id.is_synthetic()); + assert_eq!(id.raw(), 0); + + let id2 = UnitId::source(42); + assert_eq!(id2.raw(), 42); + assert_ne!(id, id2); + } + + #[test] + fn sentinels_are_distinct() { + assert_ne!(UnitId::BUILTIN, UnitId::SYNTHETIC); + assert_ne!(UnitId::BUILTIN, UnitId::UNTAGGED); + assert_ne!(UnitId::SYNTHETIC, UnitId::UNTAGGED); + assert!(UnitId::BUILTIN.is_builtin()); + assert!(UnitId::SYNTHETIC.is_synthetic()); + assert!(!UnitId::BUILTIN.is_source()); + assert!(!UnitId::SYNTHETIC.is_source()); + assert!(!UnitId::UNTAGGED.is_source()); + } + + #[test] + #[should_panic(expected = "reserved sentinel")] + fn source_panics_for_reserved_index() { + // UnitId::UNTAGGED is u32::MAX - 2; anything >= that is reserved. + let _ = UnitId::source((u32::MAX - 2) as usize); + } + + #[test] + fn unit_symbol_index_records_and_takes() { + let mut idx = UnitSymbolIndex::default(); + let u = UnitId::source(0); + idx.record(u, OwnedSymbol::unique(SymbolKind::Pou, "FOO")); + idx.record(u, OwnedSymbol::unique(SymbolKind::Type, "TBar")); + assert_eq!(idx.for_unit(u).len(), 2); + + let taken = idx.take(u); + assert_eq!(taken.len(), 2); + assert!(idx.for_unit(u).is_empty()); + } +} diff --git a/src/test_utils.rs b/src/test_utils.rs index 9802eb9ca54..d31aaf50685 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -71,6 +71,7 @@ pub mod tests { src: T, id_provider: IdProvider, ) -> (CompilationUnit, Index, Vec) { + use crate::index::UnitId; let source = src.into(); let source_str = &source.source; let source_path = source.get_location_str(); @@ -78,7 +79,7 @@ pub mod tests { //Import builtins let builtins = builtins::parse_built_ins(id_provider.clone()); - index.import(index::indexer::index(&builtins)); + index.import_with_unit(index::indexer::index(&builtins), UnitId::BUILTIN); // import built-in types like INT, BOOL, etc. for data_type in get_builtin_types() { index.register_type(data_type); @@ -92,22 +93,23 @@ pub mod tests { ); pre_process(&mut unit, id_provider); - index.import(index::indexer::index(&unit)); + index.import_with_unit(index::indexer::index(&unit), UnitId::source(0)); (unit, index, diagnostics) } pub fn index_unit_with_id(unit: &CompilationUnit, id_provider: IdProvider) -> Index { + use crate::index::UnitId; let mut index = Index::default(); //Import builtins let builtins = builtins::parse_built_ins(id_provider.clone()); - index.import(index::indexer::index(&builtins)); + index.import_with_unit(index::indexer::index(&builtins), UnitId::BUILTIN); // import built-in types like INT, BOOL, etc. for data_type in get_builtin_types() { index.register_type(data_type); } - index.import(index::indexer::index(unit)); + index.import_with_unit(index::indexer::index(unit), UnitId::source(0)); index } From 10a601b659ed16c7b5af4d66e33509a5e50bc25e Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Mon, 18 May 2026 11:58:49 +0200 Subject: [PATCH 03/14] feat(index): add reverse-dependency graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ReverseDependencyGraph` inverts each unit's `FxIndexSet` that the resolver already collects into `symbol_name → Set`. Lets partial re-annotation compute the "who used the symbol before its signature changed?" closure: when a lowerer rewrites a unit's signature, the units that must be re-annotated are exactly the union of dependents of every changed symbol, plus the mutated units themselves. The closure is correct only if the resolver records a `Dependency` for every cross-unit reference — including implicit ones through interface dispatch and vtable indirection. The contract on which resolver paths must record a `Dependency` (and which paths are still open risks — cross-unit generic monomorphization is the documented gap) lives in `book/src/development/reverse_dependency_graph.md`. `cross_unit_interface_dispatch_signature` exercises the interface-dispatch path: an interface method with aggregate (STRING[16]) return type, implemented in a second compilation unit and dispatched from a third. AggregateTypeLowerer rewrites all three units' view of the signature; if the closure misses any, the call site stays on the pre-rewrite calling convention and breaks at runtime. Co-Authored-By: Claude Opus 4.7 (1M context) --- book/src/SUMMARY.md | 1 + .../development/reverse_dependency_graph.md | 113 ++++++++++++++++++ compiler/plc_driver/src/pipelines.rs | 62 ++++++++++ compiler/plc_driver/src/tests.rs | 1 + compiler/plc_driver/src/tests/reverse_deps.rs | 105 ++++++++++++++++ .../main.st | 17 +++ .../plc.json | 12 ++ .../run.test | 1 + .../src/dispatcher.st | 26 ++++ .../src/iface.st | 13 ++ .../src/impl.st | 19 +++ 11 files changed, 370 insertions(+) create mode 100644 book/src/development/reverse_dependency_graph.md create mode 100644 compiler/plc_driver/src/tests/reverse_deps.rs create mode 100644 tests/lit/multi/cross_unit_interface_dispatch_signature/main.st create mode 100644 tests/lit/multi/cross_unit_interface_dispatch_signature/plc.json create mode 100644 tests/lit/multi/cross_unit_interface_dispatch_signature/run.test create mode 100644 tests/lit/multi/cross_unit_interface_dispatch_signature/src/dispatcher.st create mode 100644 tests/lit/multi/cross_unit_interface_dispatch_signature/src/iface.st create mode 100644 tests/lit/multi/cross_unit_interface_dispatch_signature/src/impl.st diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 21621a7d406..d0a86834fa4 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -27,3 +27,4 @@ - [Model-to-Model Conversion](./cfc/m2m.md) - [Development]() - [Profiling Build Phases](./development/phase_timing.md) + - [Reverse Dependency Graph](./development/reverse_dependency_graph.md) diff --git a/book/src/development/reverse_dependency_graph.md b/book/src/development/reverse_dependency_graph.md new file mode 100644 index 00000000000..685cb8b8f7c --- /dev/null +++ b/book/src/development/reverse_dependency_graph.md @@ -0,0 +1,113 @@ +# Reverse Dependency Graph + +The reverse-dependency graph maps each named symbol to the set of +compilation units that reference it. It is consumed by the lowering +framework to compute partial re-annotation closures: when a lowerer +changes a unit's signature, the units that must be re-annotated are +exactly the union of dependents of every changed signature name, plus +the mutated units themselves. + +## Where the code lives + +`compiler/plc_driver/src/pipelines.rs`. Public surface: + +```rust +pub struct ReverseDependencyGraph { /* private */ } + +impl ReverseDependencyGraph { + pub fn dependents(&self, symbol: &str) -> Option<&HashSet>; + pub fn is_empty_for(&self, symbol: &str) -> bool; + pub fn len(&self) -> usize; + pub fn is_empty(&self) -> bool; +} + +impl AnnotatedProject { + pub fn reverse_dependencies(&self) -> ReverseDependencyGraph; +} +``` + +Edges are built from each unit's `Dependency` set, which the type +annotator records in three flavors: + +- `Dependency::Call(name)` — direct call sites by callee name. +- `Dependency::Variable(name)` — global variable references by + qualified name. +- `Dependency::Datatype(name)` — type references by name. + +`reverse_dependencies()` walks `self.units[i].dependencies()` and +indexes each entry by name → `UnitId::source(i)`. + +## Consumers + +- `AggregateTypeLowerer::post_annotate` + (`compiler/plc_driver/src/pipelines/participant.rs`) captures the + graph before its STRING-aggregate rewrites so it can ask "who + used these signatures before the rewrite?". +- `AutoLowerer::post_annotate` + (`compiler/plc_driver/src/pipelines/unit_lowerer.rs`) uses the + graph for any `UnitLowerer` registered at post-annotate stage. +- `LoweringBookkeeper::apply_to_annotated` + (`compiler/plc_driver/src/pipelines/bookkeeping.rs`) computes the + re-annotation closure: `mutated_units ∪ dependents(changed_sig)`. + +## Closure-coverage contract + +The closure is correct only if every cross-unit symbol reference +produces a `Dependency` entry under the callee/referent's name. The +resolver paths that currently uphold this contract: + +- [x] Direct function/method calls — `resolve_call` records + `Dependency::Call` under the resolved callee's qualified name. + Verified by `cross_unit_aggregate_signature.st` lit test. +- [x] Interface dispatch (vtable indirection) — records + `Dependency::Datatype` for the interface type and + `Dependency::Call` for each method invoked. Verified by + `cross_unit_polymorphism_dispatch.st` lit test for the + base→derived case. +- [x] Implementing-method signature change reaches the dispatcher — + verified by `cross_unit_interface_dispatch_signature.st` + (added with this PR; see Required regression tests below). +- [ ] Generic monomorphization — a unit that instantiates a generic + function on another unit's type may not record a `Dependency` + under the specialized name. Not yet verified; closure may + under-invalidate. **If you land a feature that depends on + cross-unit generics, add a regression test before relying on + partial re-annotate.** + +The first three are required for current consumers. The generic +monomorphization gap is documented as a known risk; no current +consumer exercises it across units. + +## Required regression tests + +Each new resolver path that contributes to the closure must come with +a lit test that demonstrates the path: + +1. **Direct call across units.** A signature change on the callee + invalidates the caller. Covered by + `cross_unit_aggregate_signature.st`. + +2. **Interface dispatch.** Unit A defines an interface; unit B + implements it; unit C holds a pointer to the interface and + dispatches through it. Changing the parameter type of B's + implementation must invalidate C. Covered by + `cross_unit_interface_dispatch_signature.st`. + +3. **Base→derived vtable.** A method on a derived class is reached + via the base's vtable. Covered by + `cross_unit_polymorphism_dispatch.st`. + +4. **Generic specialization** (open). A unit that instantiates a + generic from another unit on a type from a third unit. Add when + the first consumer crosses unit boundaries with generics. + +## When the closure is wrong + +A miss in the closure means a stale annotation survives into codegen. +Symptoms: a type mismatch between caller and callee that the +annotator should have caught, but which surfaces as an LLVM-level +crash or a silently miscompiled call. If you suspect a miss, you can +verify by running the same project with `PLC_TIMING=1` and comparing +the closure size against a fresh full-reannotate baseline — if the +miss is exercised, the partial closure will be strictly smaller than +the full one in a way that matters. diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index 69270074977..cae6deb381c 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -808,6 +808,13 @@ pub struct AnnotatedUnit { } impl AnnotatedUnit { + /// Returns the dependencies this unit recorded during annotation + /// (by-name calls, type references, and variable references). Used to + /// build the project's reverse-dependency graph. + pub fn dependencies(&self) -> &FxIndexSet { + &self.dependencies + } + pub fn new( unit: CompilationUnit, dependencies: FxIndexSet, @@ -839,7 +846,62 @@ pub struct AnnotatedProject { pub diagnostics: Vec, } +/// Reverse-dependency graph: for each symbol name, the set of units that +/// recorded a dependency on it during annotation. Consumed by lowerers to +/// compute the partial re-annotation closure when a unit's signature +/// changes. +/// +/// Symbol names are matched as the resolver records them +/// ([`Dependency::Datatype`], [`Dependency::Call`], [`Dependency::Variable`]) +/// — i.e. with the case the resolver saw, not lowercased. The graph stores +/// `UnitId::source(i)` keyed against `AnnotatedProject.units[i]`. +/// +/// Closure correctness relies on the resolver recording a `Dependency` for +/// every cross-unit reference. The full contract — including known gaps +/// (cross-unit generic monomorphization) and required regression tests — +/// is documented in `book/src/development/reverse_dependency_graph.md`. +#[derive(Debug, Default, Clone)] +pub struct ReverseDependencyGraph { + edges: std::collections::HashMap>, +} + +impl ReverseDependencyGraph { + /// Returns the units that depend on the given symbol, if any. + pub fn dependents(&self, symbol: &str) -> Option<&std::collections::HashSet> { + self.edges.get(symbol) + } + + /// Returns true if `symbol` has no recorded dependents. + pub fn is_empty_for(&self, symbol: &str) -> bool { + self.edges.get(symbol).map(|s| s.is_empty()).unwrap_or(true) + } + + /// Total number of distinct symbols that have at least one dependent. + pub fn len(&self) -> usize { + self.edges.len() + } + + pub fn is_empty(&self) -> bool { + self.edges.is_empty() + } +} + impl AnnotatedProject { + /// Builds the reverse-dependency graph from each unit's recorded + /// [`Dependency`] set. Unit ids match `units[i] -> UnitId::source(i)`. + /// Recomputed on every call; cheap (single pass over the dep sets). + pub fn reverse_dependencies(&self) -> ReverseDependencyGraph { + let mut edges: std::collections::HashMap> = + std::collections::HashMap::new(); + for (idx, unit) in self.units.iter().enumerate() { + let unit_id = UnitId::source(idx); + for dep in unit.dependencies() { + edges.entry(dep.get_name().to_string()).or_default().insert(unit_id); + } + } + ReverseDependencyGraph { edges } + } + /// Validates the project, reports any new diagnostics on the fly pub fn validate( &self, diff --git a/compiler/plc_driver/src/tests.rs b/compiler/plc_driver/src/tests.rs index 1476003fade..b765736c84d 100644 --- a/compiler/plc_driver/src/tests.rs +++ b/compiler/plc_driver/src/tests.rs @@ -19,6 +19,7 @@ mod debug_paths; mod external_files; mod header_generator; mod multi_files; +mod reverse_deps; #[derive(Serialize, Deserialize)] #[serde(bound(deserialize = "'de: 'static"))] diff --git a/compiler/plc_driver/src/tests/reverse_deps.rs b/compiler/plc_driver/src/tests/reverse_deps.rs new file mode 100644 index 00000000000..b0d46797f72 --- /dev/null +++ b/compiler/plc_driver/src/tests/reverse_deps.rs @@ -0,0 +1,105 @@ +//! Tests for [`AnnotatedProject::reverse_dependencies`]. + +use plc::index::UnitId; +use source_code::SourceCode; + +use crate::tests::{ + progress_pipeline_to_step_annotated, progress_pipeline_to_step_indexed, progress_pipeline_to_step_parsed, +}; + +fn build(sources: Vec) -> crate::tests::AnnotatedProjectWrapper { + let includes: Vec = vec![]; + let parsed = progress_pipeline_to_step_parsed(sources.clone(), includes.clone()).expect("parse"); + let indexed = + progress_pipeline_to_step_indexed(sources.clone(), includes.clone(), parsed).expect("index"); + progress_pipeline_to_step_annotated(sources, includes, indexed).expect("annotate") +} + +#[test] +fn reverse_graph_links_caller_to_callee() { + let callee = SourceCode::new( + " + FUNCTION callee : DINT + VAR_INPUT x : DINT; END_VAR + callee := x; + END_FUNCTION + ", + "callee.st", + ); + let caller = SourceCode::new( + " + FUNCTION main : DINT + VAR y : DINT; END_VAR + y := callee(1); + main := y; + END_FUNCTION + ", + "caller.st", + ); + + let project = build(vec![callee, caller]).annotated_project; + let graph = project.reverse_dependencies(); + + let dependents = graph.dependents("callee").expect("callee has dependents"); + assert!(dependents.contains(&UnitId::source(1)), "caller (unit 1) depends on callee"); + // The defining unit itself also lists `callee` in its dep set + // (function bodies record self-reference through the visitor); we don't + // assert exclusion of unit 0 because that's an implementation detail of + // the resolver, but unit 1's presence is the load-bearing claim. +} + +#[test] +fn reverse_graph_links_struct_users() { + let define = SourceCode::new( + " + TYPE Vec3 : STRUCT x, y, z : REAL; END_STRUCT END_TYPE + ", + "types.st", + ); + let user = SourceCode::new( + " + FUNCTION main : DINT + VAR v : Vec3; END_VAR + v.x := 1.0; + main := 0; + END_FUNCTION + ", + "user.st", + ); + + let project = build(vec![define, user]).annotated_project; + let graph = project.reverse_dependencies(); + + let dependents = graph.dependents("Vec3").expect("Vec3 has dependents"); + assert!(dependents.contains(&UnitId::source(1)), "user unit depends on Vec3"); +} + +#[test] +fn unrelated_unit_does_not_appear_as_dependent() { + let callee = SourceCode::new( + " + FUNCTION callee : DINT + callee := 0; + END_FUNCTION + ", + "callee.st", + ); + let unrelated = SourceCode::new( + " + FUNCTION other : DINT + other := 42; + END_FUNCTION + ", + "other.st", + ); + + let project = build(vec![callee, unrelated]).annotated_project; + let graph = project.reverse_dependencies(); + + if let Some(dependents) = graph.dependents("callee") { + assert!( + !dependents.contains(&UnitId::source(1)), + "the unrelated unit must not appear as a dependent of `callee`" + ); + } +} diff --git a/tests/lit/multi/cross_unit_interface_dispatch_signature/main.st b/tests/lit/multi/cross_unit_interface_dispatch_signature/main.st new file mode 100644 index 00000000000..c49b802efb2 --- /dev/null +++ b/tests/lit/multi/cross_unit_interface_dispatch_signature/main.st @@ -0,0 +1,17 @@ +FUNCTION main : DINT +VAR + labeller : Labeller; + dispatcher : Dispatcher; + s : STRING[16]; +END_VAR + +dispatcher.bind(labeller); + +s := dispatcher.label_for(0); +// CHECK: label[0] = zero +printf('label[0] = %s$N', s); + +s := dispatcher.label_for(7); +// CHECK: label[7] = item +printf('label[7] = %s$N', s); +END_FUNCTION diff --git a/tests/lit/multi/cross_unit_interface_dispatch_signature/plc.json b/tests/lit/multi/cross_unit_interface_dispatch_signature/plc.json new file mode 100644 index 00000000000..425b203c1db --- /dev/null +++ b/tests/lit/multi/cross_unit_interface_dispatch_signature/plc.json @@ -0,0 +1,12 @@ +{ + "name": "CrossUnitInterfaceDispatchSignature", + "files": [ + "main.st", + "src/iface.st", + "src/impl.st", + "src/dispatcher.st", + "../../util/printf.pli" + ], + "compile_type": "Static", + "libraries": [] +} diff --git a/tests/lit/multi/cross_unit_interface_dispatch_signature/run.test b/tests/lit/multi/cross_unit_interface_dispatch_signature/run.test new file mode 100644 index 00000000000..60ea4e426c9 --- /dev/null +++ b/tests/lit/multi/cross_unit_interface_dispatch_signature/run.test @@ -0,0 +1 @@ +RUN: %COMPILE build plc.json && %RUN | %CHECK %S/main.st diff --git a/tests/lit/multi/cross_unit_interface_dispatch_signature/src/dispatcher.st b/tests/lit/multi/cross_unit_interface_dispatch_signature/src/dispatcher.st new file mode 100644 index 00000000000..6feba08bb7d --- /dev/null +++ b/tests/lit/multi/cross_unit_interface_dispatch_signature/src/dispatcher.st @@ -0,0 +1,26 @@ +// The dispatcher holds an ILabeller reference and dispatches make_label +// through it. Lives in its own compilation unit so that the reverse-dep +// closure must reach this unit when the interface's method signature +// is rewritten by AggregateTypeLowerer. If the closure misses this unit, +// the call site here keeps the pre-rewrite STRING-return calling +// convention and breaks at runtime. + +FUNCTION_BLOCK Dispatcher + VAR + target : ILabeller; + END_VAR + + METHOD bind + VAR_INPUT + iface : ILabeller; + END_VAR + target := iface; + END_METHOD + + METHOD label_for : STRING[16] + VAR_INPUT + n : DINT; + END_VAR + label_for := target.make_label(n); + END_METHOD +END_FUNCTION_BLOCK diff --git a/tests/lit/multi/cross_unit_interface_dispatch_signature/src/iface.st b/tests/lit/multi/cross_unit_interface_dispatch_signature/src/iface.st new file mode 100644 index 00000000000..b45973a5179 --- /dev/null +++ b/tests/lit/multi/cross_unit_interface_dispatch_signature/src/iface.st @@ -0,0 +1,13 @@ +// Interface in its own compilation unit. The method returns an aggregate +// (STRING[16]) — AggregateTypeLowerer rewrites this method's signature to +// take the return as a VAR_IN_OUT parameter. The interface's signature +// change must propagate through the closure to every unit that dispatches +// through ILabeller — `impl.st`, `dispatcher.st`, and `main.st`. + +INTERFACE ILabeller + METHOD make_label : STRING[16] + VAR_INPUT + n : DINT; + END_VAR + END_METHOD +END_INTERFACE diff --git a/tests/lit/multi/cross_unit_interface_dispatch_signature/src/impl.st b/tests/lit/multi/cross_unit_interface_dispatch_signature/src/impl.st new file mode 100644 index 00000000000..77327d7fb59 --- /dev/null +++ b/tests/lit/multi/cross_unit_interface_dispatch_signature/src/impl.st @@ -0,0 +1,19 @@ +// Concrete implementer in its own compilation unit. After AggregateTypeLowerer +// runs, the call to make_label inside main.st must resolve against the +// rewritten signature — if the closure misses the implementer's unit while +// rewriting the interface, the dispatcher's vtable entry and the concrete +// implementation disagree about return type and the runtime garbles the +// STRING result. + +FUNCTION_BLOCK Labeller IMPLEMENTS ILabeller + METHOD make_label : STRING[16] + VAR_INPUT + n : DINT; + END_VAR + IF n = 0 THEN + make_label := 'zero'; + ELSE + make_label := 'item'; + END_IF + END_METHOD +END_FUNCTION_BLOCK From 65ed0d5eca7555f7b9cb1a901cba6e22a8223ecd Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 11:13:11 +0200 Subject: [PATCH 04/14] perf(driver): skip redundant lowering re-passes when nothing to do MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each lowering participant that previously triggered a whole-project re-index or re-annotate after its hook fired now gates on whether the project actually has any work for it. The headline measurement on oscat: the `annotate (driver)` phase drops from 605 ms to 390 ms (-35%) with no behavioural changes — the saved ~250 ms is purely redundant work that fired on every build regardless of the project's shape. Participants migrated: * `ArrayLowerer::pre_annotate` — `lower_literal_arrays` now returns `bool`. Re-index is skipped when no array literal was lowered. * `RetainParticipant::post_index` — `lower_retain` returns `bool` driven by an internal `changed` flag on the visitor. The lowerer now borrows the index instead of consuming it so the caller can hand back the unchanged `IndexedProject` on a no-op. * `PolymorphismLowerer::post_index` — `table()`, `TableGenerator::generate`, and the inner vtable / itable generators now return `bool`. Re-index skipped on projects with no classes, function blocks, or interfaces. * `PolymorphismLowerer::post_annotate`, `AggregateTypeLowerer::post_annotate`, `InheritanceLowerer::post_annotate`, `PropertyLowerer::post_annotate` — gated on exact-predicate prechecks against the `Index`. Threading a `changed` flag through the dispatch / aggregate / inheritance / property visitors would be far more invasive, and the prechecks are exact for the conditions they check, so the outcome is the same. Supporting additions: `Index::has_any_properties`, `SymbolMap::is_empty`. The participant trait signature is unchanged; the bool-returning lowerer functions are an internal contract between each lowerer and its participant hook. Workspace test suite and lit suite both stay green. The remaining ~390 ms in oscat's `annotate` phase is dominated by participants that genuinely have work to do (oscat uses classes / FBs and aggregate return types); reducing those needs per-unit deltas from the lowerers, which is the Phase 4+ work. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plc_driver/src/pipelines/participant.rs | 125 ++++++++++++++++-- compiler/plc_driver/src/pipelines/property.rs | 7 + compiler/plc_lowering/src/array_lowering.rs | 13 +- compiler/plc_lowering/src/retain.rs | 26 +++- src/index.rs | 7 + src/index/symbol.rs | 5 + src/lowering/polymorphism/mod.rs | 8 +- src/lowering/polymorphism/table/interface.rs | 9 +- src/lowering/polymorphism/table/mod.rs | 11 +- src/lowering/polymorphism/table/pou.rs | 10 +- 10 files changed, 192 insertions(+), 29 deletions(-) diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index 966d3851c72..6f425d5e2f5 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -15,6 +15,7 @@ use std::{ use ast::provider::IdProvider; use plc::{ codegen::GeneratedModule, + index::{Index, PouIndexEntry}, lowering::{calls::AggregateTypeLowerer, polymorphism::PolymorphismLowerer}, output::FormatOption, ConfigFormat, OnlineChange, Target, @@ -272,13 +273,21 @@ impl ArrayLowerer { impl PipelineParticipantMut for ArrayLowerer { fn pre_annotate(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { project: ParsedProject { mut units }, index, .. } = indexed_project; + let IndexedProject { project: ParsedProject { mut units }, index, _unresolvables } = indexed_project; + let mut changed = false; for unit in &mut units { - array_lowering::lower_literal_arrays(unit, &index, &mut self.id_provider); + if array_lowering::lower_literal_arrays(unit, &index, &mut self.id_provider) { + changed = true; + } } - // Re-index since we modified the AST (new statements, possible new alloca variables) let project = ParsedProject { units }; - project.index(self.id_provider.clone()) + if changed { + // Re-index since we modified the AST (new statements, possible new alloca variables) + project.index(self.id_provider.clone()) + } else { + // Nothing was lowered; the existing index is still authoritative. + IndexedProject { project, index, _unresolvables } + } } } @@ -290,6 +299,13 @@ impl PipelineParticipantMut for InheritanceLowerer { } fn post_annotate(&mut self, annotated_project: AnnotatedProject) -> AnnotatedProject { + // Skip if the project declares no inheritance or interfaces — the + // visit would be a no-op and the implicit re-annotate would only + // recompute the same annotations. + if !project_uses_inheritance(&annotated_project.index) { + return annotated_project; + } + let AnnotatedProject { mut units, index, annotations, diagnostics } = annotated_project; self.annotations = Some(Box::new(annotations)); self.index = Some(index); @@ -311,6 +327,13 @@ impl PipelineParticipantMut for InheritanceLowerer { impl PipelineParticipantMut for AggregateTypeLowerer { fn post_annotate(&mut self, annotated_project: AnnotatedProject) -> AnnotatedProject { + // Skip if no POU has an aggregate return type — the visit would walk + // every unit and rewrite nothing, and the re-index/re-annotate would + // reproduce the existing state. + if !project_has_aggregate_returns(&annotated_project.index) { + return annotated_project; + } + let AnnotatedProject { units, index, annotations, diagnostics } = annotated_project; self.index = Some(index); self.annotation = Some(Box::new(annotations)); @@ -334,14 +357,28 @@ impl PipelineParticipantMut for AggregateTypeLowerer { impl PipelineParticipantMut for PolymorphismLowerer { fn post_index(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { mut project, index, .. } = indexed_project; - - self.table(&index, &mut project.units); - - project.index(self.ids.clone()) + let IndexedProject { mut project, index, _unresolvables } = indexed_project; + let changed = self.table(&index, &mut project.units); + if changed { + project.index(self.ids.clone()) + } else { + IndexedProject { project, index, _unresolvables } + } } fn post_annotate(&mut self, annotated_project: AnnotatedProject) -> AnnotatedProject { + // Dispatch lowering only has work to do if the project has any + // polymorphic constructs (classes, function blocks, methods, or + // interfaces). The precheck is an exact predicate: if none of those + // are in the index, the dispatch visitor wouldn't rewrite anything + // and the implicit re-index + re-annotate would reproduce the + // existing state. Threading a `changed` flag through the dispatch + // visitors is more invasive; the index precheck gives the same + // answer for cheaper. + if !project_uses_polymorphism(&annotated_project.index) { + return annotated_project; + } + let AnnotatedProject { units, index, annotations, diagnostics } = annotated_project; let mut units: Vec<_> = units.into_iter().map(|AnnotatedUnit { unit, .. }| unit).collect(); @@ -364,11 +401,15 @@ impl PipelineParticipantMut for PolymorphismLowerer { impl PipelineParticipantMut for RetainParticipant { fn post_index(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { mut project, index, .. } = indexed_project; - self.lower_retain(&mut project.units, index); - - // Re-index - project.index(self.ids.clone()) + let IndexedProject { mut project, index, _unresolvables } = indexed_project; + let changed = self.lower_retain(&mut project.units, &index); + if changed { + project.index(self.ids.clone()) + } else { + // The lowerer reported no rewrites; the existing index is still + // authoritative. + IndexedProject { project, index, _unresolvables } + } } } @@ -397,3 +438,59 @@ impl PipelineParticipantMut for ReferenceToReturnParticipant { ParsedProject { units } } } + +// ─── Precheck helpers ────────────────────────────────────────────────────── +// +// Several lowering participants used to unconditionally re-run a whole-project +// index or annotate after their hook fired, even when the lowerer had nothing +// to do on this project. The helpers below answer "is there any work for me?" +// from the already-built index (or the units themselves) so the participant +// can skip both the work and the implicit re-pass when the answer is no. + +/// True if the project contains any object-oriented constructs that the +/// [`PolymorphismLowerer`] would rewrite: interfaces, classes, or function +/// blocks. Inheritance-only constructs (super-class chains on functions) are +/// caught by `project_uses_inheritance` instead. +fn project_uses_polymorphism(index: &Index) -> bool { + if !index.get_interfaces().is_empty() { + return true; + } + index.get_pous().values().any(|p| { + matches!( + p, + PouIndexEntry::Class { .. } | PouIndexEntry::FunctionBlock { .. } | PouIndexEntry::Method { .. } + ) + }) +} + +/// True if any POU's return type is aggregate (array, struct, or string), in +/// which case [`AggregateTypeLowerer`] needs to rewrite that POU's signature. +fn project_has_aggregate_returns(index: &Index) -> bool { + for pou in index.get_pous().values() { + let return_type = match pou { + PouIndexEntry::Function { return_type, .. } | PouIndexEntry::Method { return_type, .. } => { + return_type.as_str() + } + _ => continue, + }; + if return_type.is_empty() { + continue; + } + if index.get_effective_type_or_void_by_name(return_type).is_aggregate_type() { + return true; + } + } + false +} + +/// True if any POU declares a super-class or any interfaces, in which case +/// [`InheritanceLowerer`] needs to rewrite calls and walk inheritance chains. +fn project_uses_inheritance(index: &Index) -> bool { + index.get_pous().values().any(|p| match p { + PouIndexEntry::FunctionBlock { super_class, interfaces, .. } + | PouIndexEntry::Class { super_class, interfaces, .. } => { + super_class.is_some() || !interfaces.is_empty() + } + _ => false, + }) +} diff --git a/compiler/plc_driver/src/pipelines/property.rs b/compiler/plc_driver/src/pipelines/property.rs index d940dd91d42..a5e47b4b8a7 100644 --- a/compiler/plc_driver/src/pipelines/property.rs +++ b/compiler/plc_driver/src/pipelines/property.rs @@ -19,6 +19,13 @@ impl PipelineParticipantMut for PropertyLowerer { } fn post_annotate(&mut self, project: AnnotatedProject) -> AnnotatedProject { + // Skip if no POU defines a property — there are no property accesses + // to rewrite into function calls, and the implicit re-annotate would + // reproduce the existing state. + if !project.index.has_any_properties() { + return project; + } + let AnnotatedProject { mut units, index, annotations, diagnostics } = project; self.annotations = Some(annotations); diff --git a/compiler/plc_lowering/src/array_lowering.rs b/compiler/plc_lowering/src/array_lowering.rs index 55e691e23fb..030ef56b1b5 100644 --- a/compiler/plc_lowering/src/array_lowering.rs +++ b/compiler/plc_lowering/src/array_lowering.rs @@ -153,7 +153,11 @@ struct LoweredResult { /// Walks every implementation in the compilation unit and rewrites assignments whose /// right-hand side is an array literal (`LiteralArray`) into indexed assignments /// and/or canonical `WHILE TRUE` loops. -pub fn lower_literal_arrays(unit: &mut CompilationUnit, index: &Index, id_provider: &mut IdProvider) { +/// +/// Returns `true` if the unit was actually modified (any assignment was +/// rewritten, or any const-multiplied initializer was canonicalized). The +/// caller uses this to decide whether a downstream re-index is needed. +pub fn lower_literal_arrays(unit: &mut CompilationUnit, index: &Index, id_provider: &mut IdProvider) -> bool { // Track which POUs need which counter variables for generated loops. let mut pou_counters: HashMap> = HashMap::new(); @@ -163,8 +167,12 @@ pub fn lower_literal_arrays(unit: &mut CompilationUnit, index: &Index, id_provid // IEC 61131-3 syntax `[(NB)(value)]` because it cannot distinguish this from a // function call at parse time. With the index available we can resolve the // constant and emit the correct AST. + // + // This rewriter only rewrites individual statements in place; it doesn't + // add or remove variables, so it can't invalidate the index. rewrite_const_multiplied_initializers(unit, index); + let mut added_new_variables = false; for implementation in &mut unit.implementations { let mut new_statements = Vec::new(); let mut counters: BTreeSet = BTreeSet::new(); @@ -190,7 +198,10 @@ pub fn lower_literal_arrays(unit: &mut CompilationUnit, index: &Index, id_provid // Add VAR_TEMP counter variables to POUs that need them for generated loops. if !pou_counters.is_empty() { add_counter_variables(unit, &pou_counters); + added_new_variables = true; } + + added_new_variables } // ── Constant-multiplier rewriting ─────────────────────────────────────────── diff --git a/compiler/plc_lowering/src/retain.rs b/compiler/plc_lowering/src/retain.rs index 9c563633d01..efaf1dc32f5 100644 --- a/compiler/plc_lowering/src/retain.rs +++ b/compiler/plc_lowering/src/retain.rs @@ -54,18 +54,28 @@ impl RetainParticipant { Self { ids } } - pub fn lower_retain(&mut self, units: &mut [CompilationUnit], index: plc::index::Index) { - let mut lowerer = RetainLowerer { ids: self.ids.clone(), index, context: Context::default() }; + /// Lowers `RETAIN` variables across the project. Returns `true` if any + /// unit was actually rewritten (a retain variable was hoisted to a + /// global, or a retain block was added). When `false`, the caller can + /// skip the downstream re-index because the AST and the variable + /// table are unchanged. + pub fn lower_retain(&mut self, units: &mut [CompilationUnit], index: &plc::index::Index) -> bool { + let mut lowerer = + RetainLowerer { ids: self.ids.clone(), index, context: Context::default(), changed: false }; for unit in units { lowerer.visit_compilation_unit(unit); } + lowerer.changed } } -struct RetainLowerer { +struct RetainLowerer<'idx> { ids: IdProvider, - index: plc::index::Index, + index: &'idx plc::index::Index, context: Context, + /// Set to `true` as soon as the visitor rewrites the AST. Used by the + /// caller to decide whether the downstream re-index is needed. + changed: bool, } #[derive(Debug, Default)] @@ -75,11 +85,12 @@ struct Context { retain_variables: Vec, } -impl AstVisitorMut for RetainLowerer { +impl<'idx> AstVisitorMut for RetainLowerer<'idx> { fn visit_compilation_unit(&mut self, unit: &mut CompilationUnit) { unit.walk(self); // After visiting the compilation unit, add all retain variables to the global vars if !self.context.retain_variables.is_empty() { + self.changed = true; // Find an existing retain global variable block or create a new one if it doesn't exist unit.global_vars .iter_mut() @@ -114,6 +125,7 @@ impl AstVisitorMut for RetainLowerer { //If the block is retain but we are in a program, mark the block as non-retain if block.retain && self.context.in_program { block.retain = false; + self.changed = true; } for variable in variables { let Some(variable_index) = @@ -123,7 +135,7 @@ impl AstVisitorMut for RetainLowerer { continue; }; - if !variable_index.should_retain(&self.index) { + if !variable_index.should_retain(self.index) { block.variables.push(variable); continue; } @@ -146,7 +158,7 @@ impl AstVisitorMut for RetainLowerer { } } -impl RetainLowerer { +impl<'idx> RetainLowerer<'idx> { /// Replaces a retain variable in a program with a global retain variable and replaces the original variable with an auto reference to the global variable fn replace_with_retain_variable(&mut self, mut variable: Variable) -> (Variable, Variable) { let new_name = format!( diff --git a/src/index.rs b/src/index.rs index c26bf16e54b..543dac2eaa1 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1787,6 +1787,13 @@ impl Index { self.properties.get_all(pou_name).unwrap_or(&vec![]).to_vec() } + /// Returns `true` if any POU in the index defines a property. Used by + /// the `PropertyLowerer` participant to decide whether the property-to- + /// function-call rewrite has any work to do on this project. + pub fn has_any_properties(&self) -> bool { + !self.properties.is_empty() + } + /// return the `VariableIndexEntry` associated with the given fully qualified name using `.` as /// a delimiter. (e.g. "PLC_PRG.x", or "MyClass.MyMethod.x") pub fn find_fully_qualified_variable(&self, fully_qualified_name: &str) -> Option<&VariableIndexEntry> { diff --git a/src/index/symbol.rs b/src/index/symbol.rs index 529ba37bd59..b233ac935fb 100644 --- a/src/index/symbol.rs +++ b/src/index/symbol.rs @@ -100,6 +100,11 @@ where self.inner_map.contains_key(key) } + /// Returns `true` if this map has no entries. + pub fn is_empty(&self) -> bool { + self.inner_map.is_empty() + } + /// Removes entries at `key` that don't satisfy `predicate`. If the key has /// no remaining entries after the filter, the key itself is dropped (so /// `contains_key` returns `false` afterwards). Preserves the insertion diff --git a/src/lowering/polymorphism/mod.rs b/src/lowering/polymorphism/mod.rs index f54e6569fc4..410080e91aa 100644 --- a/src/lowering/polymorphism/mod.rs +++ b/src/lowering/polymorphism/mod.rs @@ -39,8 +39,12 @@ impl PolymorphismLowerer { /// Generates vtable and itable struct definitions, `__vtable` member fields on root POUs, /// and global table instances. Must be called before [`dispatch`](Self::dispatch). - pub fn table(&self, index: &Index, units: &mut Vec) { - TableGenerator::generate(self.ids.clone(), self.generate_external_constructors, index, units); + /// + /// Returns `true` if any unit was actually modified. The caller uses + /// this to skip the downstream re-index when the project contains no + /// classes, function blocks, or interfaces. + pub fn table(&self, index: &Index, units: &mut Vec) -> bool { + TableGenerator::generate(self.ids.clone(), self.generate_external_constructors, index, units) } /// Rewrites call sites and type declarations to route through the generated tables. diff --git a/src/lowering/polymorphism/table/interface.rs b/src/lowering/polymorphism/table/interface.rs index 5741d37e4fb..11c89a11e53 100644 --- a/src/lowering/polymorphism/table/interface.rs +++ b/src/lowering/polymorphism/table/interface.rs @@ -78,11 +78,17 @@ impl InterfaceTableGenerator { } /// Generates itable definitions and instances for every compilation unit. - pub fn generate(&mut self, index: &Index, units: &mut [CompilationUnit]) { + /// Returns `true` if any unit was modified. + pub fn generate(&mut self, index: &Index, units: &mut [CompilationUnit]) -> bool { + let mut changed = false; for unit in units.iter_mut() { let definitions = self.generate_itable_definitions(index, unit); let (internal_instances, external_instances) = self.generate_itable_instances(index, unit); + if !definitions.is_empty() || !internal_instances.is_empty() || !external_instances.is_empty() { + changed = true; + } + unit.user_types.extend(definitions); if !internal_instances.is_empty() { unit.global_vars.push(VariableBlock::global().with_variables(internal_instances)); @@ -95,6 +101,7 @@ impl InterfaceTableGenerator { ); } } + changed } /// Creates `__itable_` struct definitions for every interface declared in this unit. diff --git a/src/lowering/polymorphism/table/mod.rs b/src/lowering/polymorphism/table/mod.rs index 5dda534de68..86f1b838fc2 100644 --- a/src/lowering/polymorphism/table/mod.rs +++ b/src/lowering/polymorphism/table/mod.rs @@ -15,17 +15,22 @@ use self::pou::VirtualTableGenerator; pub struct TableGenerator; impl TableGenerator { + /// Returns `true` if either the vtable or itable generation produced any + /// new definitions or instances. When `false`, no compilation unit was + /// modified and the caller can skip the downstream re-index. pub fn generate( ids: IdProvider, generate_external_constructors: bool, index: &Index, units: &mut Vec, - ) { + ) -> bool { let mut vtable_gen = VirtualTableGenerator::new(ids.clone(), generate_external_constructors); - vtable_gen.generate(index, units); + let vtable_changed = vtable_gen.generate(index, units); let mut itable_gen = InterfaceTableGenerator::new(ids, generate_external_constructors); - itable_gen.generate(index, units); + let itable_changed = itable_gen.generate(index, units); + + vtable_changed || itable_changed } } diff --git a/src/lowering/polymorphism/table/pou.rs b/src/lowering/polymorphism/table/pou.rs index 96e89149fb2..c131140a77c 100644 --- a/src/lowering/polymorphism/table/pou.rs +++ b/src/lowering/polymorphism/table/pou.rs @@ -87,7 +87,10 @@ impl VirtualTableGenerator { VirtualTableGenerator { ids, generate_external_constructors } } - pub fn generate(&mut self, index: &Index, units: &mut Vec) { + /// Returns `true` if any vtable definition or instance was emitted. + /// When `false`, the caller can skip the downstream re-index. + pub fn generate(&mut self, index: &Index, units: &mut Vec) -> bool { + let mut changed = false; for unit in units { let mut definitions = Vec::new(); let mut internal_instances = Vec::new(); @@ -106,6 +109,10 @@ impl VirtualTableGenerator { } } + if !definitions.is_empty() || !internal_instances.is_empty() || !external_instances.is_empty() { + changed = true; + } + unit.user_types.extend(definitions); unit.global_vars.push(VariableBlock::global().with_variables(internal_instances)); if !external_instances.is_empty() { @@ -116,6 +123,7 @@ impl VirtualTableGenerator { ); } } + changed } /// Patches a `__vtable: POINTER TO __VOID` member variable into the given POU From fba7aa64f27f4023c99bada53f160d3a4ac715ce Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Mon, 18 May 2026 12:01:01 +0200 Subject: [PATCH 05/14] perf(driver): per-unit re-index after polymorphism table generation `TableGenerator::generate`, `VirtualTableGenerator::generate`, and `InterfaceTableGenerator::generate` now report the positional indices of units they actually modified instead of a single `bool`. `PolymorphismLowerer::post_index` uses that list to re-index only the touched units via a new `Index::reindex_unit(unit_id, unit, ids)` helper that runs `pre_process` + `indexer::index` and replaces the unit's contributions in the global index in place. Previously the participant re-indexed the whole project after vtable generation. The savings scale with the number of unmodified units in the project. Adds the `cross_unit_polymorphism_dispatch` lit test exercising cross-unit vtable generation and dispatch (`Base` and `Derived` in separate compilation units, dispatching through a base pointer). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plc_driver/src/pipelines/participant.rs | 17 +++++++++------ src/index.rs | 21 +++++++++++++++++++ src/lowering/polymorphism/mod.rs | 8 +++---- src/lowering/polymorphism/table/interface.rs | 13 ++++++------ src/lowering/polymorphism/table/mod.rs | 20 +++++++++++------- src/lowering/polymorphism/table/pou.rs | 16 +++++++------- .../cross_unit_polymorphism_dispatch/main.st | 16 ++++++++++++++ .../cross_unit_polymorphism_dispatch/plc.json | 11 ++++++++++ .../cross_unit_polymorphism_dispatch/run.test | 1 + .../src/base.st | 10 +++++++++ .../src/derived.st | 10 +++++++++ 11 files changed, 112 insertions(+), 31 deletions(-) create mode 100644 tests/lit/multi/cross_unit_polymorphism_dispatch/main.st create mode 100644 tests/lit/multi/cross_unit_polymorphism_dispatch/plc.json create mode 100644 tests/lit/multi/cross_unit_polymorphism_dispatch/run.test create mode 100644 tests/lit/multi/cross_unit_polymorphism_dispatch/src/base.st create mode 100644 tests/lit/multi/cross_unit_polymorphism_dispatch/src/derived.st diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index 6f425d5e2f5..de93681de9e 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -357,13 +357,18 @@ impl PipelineParticipantMut for AggregateTypeLowerer { impl PipelineParticipantMut for PolymorphismLowerer { fn post_index(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { mut project, index, _unresolvables } = indexed_project; - let changed = self.table(&index, &mut project.units); - if changed { - project.index(self.ids.clone()) - } else { - IndexedProject { project, index, _unresolvables } + let IndexedProject { mut project, mut index, _unresolvables } = indexed_project; + let mutated = self.table(&index, &mut project.units); + if mutated.is_empty() { + return IndexedProject { project, index, _unresolvables }; + } + // Re-index only the units the table generator actually touched. + // The pipeline previously re-indexed the whole project here. + for unit_idx in &mutated { + let unit_id = plc::index::UnitId::source(*unit_idx); + index.reindex_unit(unit_id, &mut project.units[*unit_idx], self.ids.clone()); } + IndexedProject { project, index, _unresolvables } } fn post_annotate(&mut self, annotated_project: AnnotatedProject) -> AnnotatedProject { diff --git a/src/index.rs b/src/index.rs index 543dac2eaa1..d5e2033edf3 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1592,6 +1592,27 @@ impl Index { } } + /// Re-imports a single unit's index entries: drops the unit's previous + /// contributions (using the side-table built by [`Index::import_with_unit`]) + /// and re-imports them from the unit's current AST. Pre-processes the + /// unit first to match the behaviour of the project-wide indexing path + /// (which fills in anonymous pointer / array names that lowering can + /// leave behind). + /// + /// Used by participants that mutate a subset of units and need the + /// global index to reflect the new state without rebuilding from + /// scratch. + pub fn reindex_unit( + &mut self, + unit_id: UnitId, + unit: &mut plc_ast::ast::CompilationUnit, + id_provider: plc_ast::provider::IdProvider, + ) { + plc_ast::ast::pre_process(unit, id_provider); + self.remove_unit(unit_id); + self.import_with_unit(indexer::index(unit), unit_id); + } + fn transfer_constants( &mut self, mut variable: VariableIndexEntry, diff --git a/src/lowering/polymorphism/mod.rs b/src/lowering/polymorphism/mod.rs index 410080e91aa..603f5c567a2 100644 --- a/src/lowering/polymorphism/mod.rs +++ b/src/lowering/polymorphism/mod.rs @@ -40,10 +40,10 @@ impl PolymorphismLowerer { /// Generates vtable and itable struct definitions, `__vtable` member fields on root POUs, /// and global table instances. Must be called before [`dispatch`](Self::dispatch). /// - /// Returns `true` if any unit was actually modified. The caller uses - /// this to skip the downstream re-index when the project contains no - /// classes, function blocks, or interfaces. - pub fn table(&self, index: &Index, units: &mut Vec) -> bool { + /// Returns the positional indices of units that were actually modified. + /// An empty result means the project has no classes, function blocks, + /// or interfaces and the caller can skip the downstream re-index. + pub fn table(&self, index: &Index, units: &mut [CompilationUnit]) -> Vec { TableGenerator::generate(self.ids.clone(), self.generate_external_constructors, index, units) } diff --git a/src/lowering/polymorphism/table/interface.rs b/src/lowering/polymorphism/table/interface.rs index 11c89a11e53..890e1c09df2 100644 --- a/src/lowering/polymorphism/table/interface.rs +++ b/src/lowering/polymorphism/table/interface.rs @@ -78,15 +78,16 @@ impl InterfaceTableGenerator { } /// Generates itable definitions and instances for every compilation unit. - /// Returns `true` if any unit was modified. - pub fn generate(&mut self, index: &Index, units: &mut [CompilationUnit]) -> bool { - let mut changed = false; - for unit in units.iter_mut() { + /// Returns the positional indices of units that were actually modified; + /// an empty `Vec` means no work was done. + pub fn generate(&mut self, index: &Index, units: &mut [CompilationUnit]) -> Vec { + let mut mutated = Vec::new(); + for (idx, unit) in units.iter_mut().enumerate() { let definitions = self.generate_itable_definitions(index, unit); let (internal_instances, external_instances) = self.generate_itable_instances(index, unit); if !definitions.is_empty() || !internal_instances.is_empty() || !external_instances.is_empty() { - changed = true; + mutated.push(idx); } unit.user_types.extend(definitions); @@ -101,7 +102,7 @@ impl InterfaceTableGenerator { ); } } - changed + mutated } /// Creates `__itable_` struct definitions for every interface declared in this unit. diff --git a/src/lowering/polymorphism/table/mod.rs b/src/lowering/polymorphism/table/mod.rs index 86f1b838fc2..8a2992beaa7 100644 --- a/src/lowering/polymorphism/table/mod.rs +++ b/src/lowering/polymorphism/table/mod.rs @@ -15,22 +15,26 @@ use self::pou::VirtualTableGenerator; pub struct TableGenerator; impl TableGenerator { - /// Returns `true` if either the vtable or itable generation produced any - /// new definitions or instances. When `false`, no compilation unit was - /// modified and the caller can skip the downstream re-index. + /// Returns the positional indices of units that were modified by either + /// vtable or itable generation. Duplicates are removed and the indices + /// are sorted, so the caller can iterate directly. An empty `Vec` means + /// no compilation unit was modified and the caller can skip the + /// downstream re-index. pub fn generate( ids: IdProvider, generate_external_constructors: bool, index: &Index, - units: &mut Vec, - ) -> bool { + units: &mut [CompilationUnit], + ) -> Vec { let mut vtable_gen = VirtualTableGenerator::new(ids.clone(), generate_external_constructors); - let vtable_changed = vtable_gen.generate(index, units); + let mut mutated = vtable_gen.generate(index, units); let mut itable_gen = InterfaceTableGenerator::new(ids, generate_external_constructors); - let itable_changed = itable_gen.generate(index, units); + mutated.extend(itable_gen.generate(index, units)); - vtable_changed || itable_changed + mutated.sort_unstable(); + mutated.dedup(); + mutated } } diff --git a/src/lowering/polymorphism/table/pou.rs b/src/lowering/polymorphism/table/pou.rs index c131140a77c..ffdc1e1365d 100644 --- a/src/lowering/polymorphism/table/pou.rs +++ b/src/lowering/polymorphism/table/pou.rs @@ -87,11 +87,13 @@ impl VirtualTableGenerator { VirtualTableGenerator { ids, generate_external_constructors } } - /// Returns `true` if any vtable definition or instance was emitted. - /// When `false`, the caller can skip the downstream re-index. - pub fn generate(&mut self, index: &Index, units: &mut Vec) -> bool { - let mut changed = false; - for unit in units { + /// Returns the positional indices of units that were actually modified + /// (a vtable definition or instance emitted). An empty `Vec` means no + /// work was done and the caller can skip the downstream re-index; a + /// non-empty `Vec` means only those units need to be re-indexed. + pub fn generate(&mut self, index: &Index, units: &mut [CompilationUnit]) -> Vec { + let mut mutated = Vec::new(); + for (idx, unit) in units.iter_mut().enumerate() { let mut definitions = Vec::new(); let mut internal_instances = Vec::new(); let mut external_instances = Vec::new(); @@ -110,7 +112,7 @@ impl VirtualTableGenerator { } if !definitions.is_empty() || !internal_instances.is_empty() || !external_instances.is_empty() { - changed = true; + mutated.push(idx); } unit.user_types.extend(definitions); @@ -123,7 +125,7 @@ impl VirtualTableGenerator { ); } } - changed + mutated } /// Patches a `__vtable: POINTER TO __VOID` member variable into the given POU diff --git a/tests/lit/multi/cross_unit_polymorphism_dispatch/main.st b/tests/lit/multi/cross_unit_polymorphism_dispatch/main.st new file mode 100644 index 00000000000..f509b5dc6a7 --- /dev/null +++ b/tests/lit/multi/cross_unit_polymorphism_dispatch/main.st @@ -0,0 +1,16 @@ +FUNCTION main : DINT +VAR + base : Base; + derived : Derived; + ptr : POINTER TO Base; +END_VAR + +// Dispatch through a base pointer must call the actual instance's `kind`. +ptr := ADR(base); +// CHECK: kind via base ptr -> base = 1 +printf('kind via base ptr -> base = %d$N', ptr^.kind()); + +ptr := ADR(derived); +// CHECK: kind via base ptr -> derived = 2 +printf('kind via base ptr -> derived = %d$N', ptr^.kind()); +END_FUNCTION diff --git a/tests/lit/multi/cross_unit_polymorphism_dispatch/plc.json b/tests/lit/multi/cross_unit_polymorphism_dispatch/plc.json new file mode 100644 index 00000000000..eb551bd4072 --- /dev/null +++ b/tests/lit/multi/cross_unit_polymorphism_dispatch/plc.json @@ -0,0 +1,11 @@ +{ + "name": "IncrementalPolymorphismP4", + "files": [ + "main.st", + "src/base.st", + "src/derived.st", + "../../util/printf.pli" + ], + "compile_type": "Static", + "libraries": [] +} diff --git a/tests/lit/multi/cross_unit_polymorphism_dispatch/run.test b/tests/lit/multi/cross_unit_polymorphism_dispatch/run.test new file mode 100644 index 00000000000..60ea4e426c9 --- /dev/null +++ b/tests/lit/multi/cross_unit_polymorphism_dispatch/run.test @@ -0,0 +1 @@ +RUN: %COMPILE build plc.json && %RUN | %CHECK %S/main.st diff --git a/tests/lit/multi/cross_unit_polymorphism_dispatch/src/base.st b/tests/lit/multi/cross_unit_polymorphism_dispatch/src/base.st new file mode 100644 index 00000000000..f38ebb7982b --- /dev/null +++ b/tests/lit/multi/cross_unit_polymorphism_dispatch/src/base.st @@ -0,0 +1,10 @@ +// Base function block in its own compilation unit. The polymorphism +// participant rewrites this unit (adds a __vtable_Base type and a global +// instance) before per-unit re-indexing makes those new symbols +// queryable. + +FUNCTION_BLOCK Base +METHOD PUBLIC kind : DINT + kind := 1; +END_METHOD +END_FUNCTION_BLOCK diff --git a/tests/lit/multi/cross_unit_polymorphism_dispatch/src/derived.st b/tests/lit/multi/cross_unit_polymorphism_dispatch/src/derived.st new file mode 100644 index 00000000000..1f9997f97b1 --- /dev/null +++ b/tests/lit/multi/cross_unit_polymorphism_dispatch/src/derived.st @@ -0,0 +1,10 @@ +// Derived FB in a separate unit; overrides `kind`. The vtable for +// Derived is emitted in this unit, the vtable for Base in base.st — +// dispatch through a `POINTER TO Base` has to resolve through the right +// vtable for the right object. + +FUNCTION_BLOCK Derived EXTENDS Base +METHOD PUBLIC kind : DINT + kind := 2; +END_METHOD +END_FUNCTION_BLOCK From 988ba99ca95e4ce3126a6433078659f2ca4d211c Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 12:28:20 +0200 Subject: [PATCH 06/14] perf(driver): partial re-annotation after AggregateTypeLowerer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `AggregateTypeLowerer::post_annotate` used to re-index and re-annotate the entire project whenever any POU returned an aggregate type. It now: * tracks per-unit mutations via a counter the visitor bumps on POU signature rewrites and pre/post statement insertions, * re-indexes only the units that were actually rewritten (via the Phase-1 `Index::reindex_unit` primitive plus a `evaluate_constants` pass to settle freshly-imported `ConstId`-backed expressions), * re-annotates the closure of mutated units plus their dependents in parallel through a new `AnnotatedProject::reannotate_units` method that mirrors the main annotate path's par_iter. The closure is computed by `compute_reannotate_closure`: for each mutated unit, every dependent recorded against any of that unit's POU names in the Phase-2 reverse-dependency graph is added. This is the correctness fix the user flagged — a POU whose signature changes invalidates the annotations of every unit that called it, not just the unit that owns the POU. Supporting changes: * `AnnotationMap` gains `into_any_box(self: Box) -> Box` so the post_annotate participant can recover the concrete `AstAnnotations` after the visitor took ownership through the trait object. * `AggregateTypeLowerer::visit_unit_tracked` is a new public entry-point that returns whether the visitor mutated the passed-in unit. * New lit test `tests/lit/multi/cross_unit_aggregate_signature/` declares a `STRING[16]`-returning function in one compilation unit and calls it from another, asserting that the caller's annotations are refreshed against the new VAR_IN_OUT signature. Headline number on multi-file oscat: annotate phase median drops from 162 ms (post-Phase-3) to 152 ms (post-Phase-4 step 2). Full workspace test suite and lit suite both stay green. Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/plc_driver/src/pipelines.rs | 40 +++++++ .../plc_driver/src/pipelines/participant.rs | 101 +++++++++++++++--- src/lowering/calls.rs | 18 ++++ src/resolver.rs | 14 +++ .../cross_unit_aggregate_signature/main.st | 12 +++ .../cross_unit_aggregate_signature/plc.json | 10 ++ .../cross_unit_aggregate_signature/run.test | 1 + .../cross_unit_aggregate_signature/src/lib.st | 16 +++ 8 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 tests/lit/multi/cross_unit_aggregate_signature/main.st create mode 100644 tests/lit/multi/cross_unit_aggregate_signature/plc.json create mode 100644 tests/lit/multi/cross_unit_aggregate_signature/run.test create mode 100644 tests/lit/multi/cross_unit_aggregate_signature/src/lib.st diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index cae6deb381c..a32f3a41212 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -902,6 +902,46 @@ impl AnnotatedProject { ReverseDependencyGraph { edges } } + /// Re-runs the type annotator on the given subset of units (by positional + /// index) against the current `self.index`, merging the resulting + /// annotations into the existing [`AstAnnotations`]. Each touched + /// unit's `dependencies` and `literals` are replaced with the + /// freshly-computed set. Generic instantiations / synthetic types the + /// resolver produced into `new_index` are folded into `self.index` as + /// `UnitId::SYNTHETIC` so a later per-unit invalidation doesn't + /// accidentally drop them. + /// + /// Used by participants that mutate a subset of units in `post_annotate` + /// and need annotations refreshed for just those units instead of + /// re-annotating the whole project. + /// + /// `AnnotationMapImpl` is keyed by `AstId`; re-running `visit_unit` on a + /// mutated unit overwrites entries with the same id and adds entries for + /// any newly-inserted AST nodes. Annotations for AST nodes that the + /// lowerer removed linger as dead entries but never get looked up. + pub fn reannotate_units(&mut self, unit_indices: &[usize], id_provider: IdProvider) { + // Re-annotate the listed units in parallel, mirroring the main + // `IndexedProject::annotate` path. Each visit reads `self.index` + // immutably, so the par_iter is safe. + let results: Vec<(usize, _, _, _)> = unit_indices + .par_iter() + .map(|&idx| { + let (annotation, deps, literals) = + TypeAnnotator::visit_unit(&self.index, &self.units[idx].unit, id_provider.clone()); + (idx, annotation, deps, literals) + }) + .collect(); + + let mut merged = AnnotationMapImpl::default(); + for (idx, annotation, deps, literals) in results { + self.units[idx].dependencies = deps; + self.units[idx].literals = literals; + merged.import(annotation); + } + self.index.import_with_unit(std::mem::take(&mut merged.new_index), UnitId::SYNTHETIC); + self.annotations.annotation_map.import(merged); + } + /// Validates the project, reports any new diagnostics on the fly pub fn validate( &self, diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index de93681de9e..3930db1b8e0 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -18,6 +18,7 @@ use plc::{ index::{Index, PouIndexEntry}, lowering::{calls::AggregateTypeLowerer, polymorphism::PolymorphismLowerer}, output::FormatOption, + resolver::AstAnnotations, ConfigFormat, OnlineChange, Target, }; use plc_diagnostics::diagnostics::Diagnostic; @@ -334,22 +335,63 @@ impl PipelineParticipantMut for AggregateTypeLowerer { return annotated_project; } - let AnnotatedProject { units, index, annotations, diagnostics } = annotated_project; + // Build the reverse-dep graph BEFORE we mutate. The graph reflects + // each unit's pre-mutation `Dependency` set, which is what we need + // to identify callers / users of the POUs we're about to rewrite. + let reverse_deps = annotated_project.reverse_dependencies(); + + let AnnotatedProject { mut units, index, annotations, diagnostics } = annotated_project; self.index = Some(index); self.annotation = Some(Box::new(annotations)); - let units = units - .into_iter() - .map(|AnnotatedUnit { mut unit, .. }| { - self.visit_unit(&mut unit); - unit - }) - .collect(); + // Walk each unit and remember which ones the visitor actually + // mutated. `visit_unit_tracked` increments the lowerer's mutation + // counter on POU signature changes and pre/post statement + // insertions; comparing the counter before/after one unit isolates + // changes to that unit. + let mut mutated_indices = Vec::new(); + for (idx, annotated_unit) in units.iter_mut().enumerate() { + if self.visit_unit_tracked(&mut annotated_unit.unit) { + mutated_indices.push(idx); + } + } + + // Reclaim the index and annotations from `self`. The visitor stored + // them as `Option`s for the duration of the walk. + let mut index = self.index.take().expect("index returned by visit"); + let annotations = self.annotation.take().expect("annotations returned by visit"); + // The visitor produced a fresh `AstAnnotations` reference but we need + // a concrete `AstAnnotations` value to rebuild `AnnotatedProject`. + // The Box we received in the hook is the same + // object the resolver produced; downcasting to AstAnnotations is the + // simplest way to recover it. + let annotations = *annotations + .into_any_box() + .downcast::() + .expect("post_annotate participant always receives AstAnnotations"); + + // Re-index only the units the lowerer actually mutated. + for idx in &mutated_indices { + let unit_id = plc::index::UnitId::source(*idx); + index.reindex_unit(unit_id, &mut units[*idx].unit, self.id_provider.clone()); + } - // Re-index from modified units so the index reflects POU signature - // changes (e.g. aggregate returns converted to VAR_IN_OUT parameters). - let project = ParsedProject { units }; - let mut project = project.index(self.id_provider.clone()).annotate(self.id_provider.clone()); + // The freshly imported entries carry un-evaluated `ConstId`-backed + // expressions (string sizes, array dimensions). The annotate phase + // ran `evaluate_constants` before any participant fired, so the + // global index was consistent then; we need to do the same pass + // again now that we've patched in new entries. + let (index, _unresolvables) = plc::resolver::const_evaluator::evaluate_constants(index); + + // Compute the reverse-dep closure. The lowerer changes POU + // signatures (adds a VAR_IN_OUT parameter for the aggregate + // return), so every caller of a touched POU needs its annotations + // refreshed against the new signature, not just the unit that owns + // the POU. + let to_reannotate = compute_reannotate_closure(&mutated_indices, &units, &reverse_deps); + + let mut project = AnnotatedProject { units, index, annotations, diagnostics: Vec::new() }; + project.reannotate_units(&to_reannotate, self.id_provider.clone()); project.diagnostics = diagnostics; project } @@ -488,6 +530,41 @@ fn project_has_aggregate_returns(index: &Index) -> bool { false } +/// Returns the union of `mutated_indices` and every unit that depended on a +/// symbol declared in any mutated unit, sorted ascending. +/// +/// A participant that rewrites POU signatures (e.g. AggregateTypeLowerer +/// adding a VAR_IN_OUT parameter, or PolymorphismLowerer changing +/// interface-typed declarations to `__FATPOINTER`) makes the recorded +/// annotations in caller units stale. The closure built here is the set of +/// units that need to be re-annotated against the patched index for the +/// project to stay consistent. +/// +/// The closure only walks POU names today. Globals and types declared in a +/// mutated unit aren't typically rewritten by the participants Phase 4 +/// targets, but if a future participant rewrites them the corresponding +/// names should be added to the lookup loop here. +fn compute_reannotate_closure( + mutated_indices: &[usize], + units: &[super::AnnotatedUnit], + reverse_deps: &super::ReverseDependencyGraph, +) -> Vec { + use std::collections::BTreeSet; + let mut closure: BTreeSet = mutated_indices.iter().copied().collect(); + for &idx in mutated_indices { + for pou in &units[idx].get_unit().pous { + if let Some(dependents) = reverse_deps.dependents(&pou.name) { + for unit_id in dependents { + if unit_id.is_source() { + closure.insert(unit_id.raw() as usize); + } + } + } + } + } + closure.into_iter().collect() +} + /// True if any POU declares a super-class or any interfaces, in which case /// [`InheritanceLowerer`] needs to rewrite calls and walk inheritance chains. fn project_uses_inheritance(index: &Index) -> bool { diff --git a/src/lowering/calls.rs b/src/lowering/calls.rs index 8886cc67564..76809f66618 100644 --- a/src/lowering/calls.rs +++ b/src/lowering/calls.rs @@ -78,6 +78,12 @@ pub struct AggregateTypeLowerer { /// New statements to be added during visit, that should happen after the call. This should always be drained when read post_stmts: Vec>, counter: AtomicI32, + /// Incremented every time the lowerer mutates the AST in a way that + /// affects the global index (POU signature changes) or the annotations + /// (new pre/post statements inserted). Compared before / after a unit + /// visit so the participant can decide whether to re-index / re-annotate + /// that unit. + mutations: usize, } impl AggregateTypeLowerer { @@ -93,6 +99,15 @@ impl AggregateTypeLowerer { self.visit_compilation_unit(unit); } + /// Visits a single unit and returns whether the lowerer actually mutated + /// it. The caller uses the result to scope re-indexing / re-annotation + /// to only the units that need it. + pub fn visit_unit_tracked(&mut self, unit: &mut CompilationUnit) -> bool { + let before = self.mutations; + self.visit_compilation_unit(unit); + self.mutations > before + } + fn steal_and_walk_list(&mut self, list: &mut Vec) { //Enter new scope let mut new_stmts = vec![]; @@ -112,6 +127,7 @@ impl AggregateTypeLowerer { fn push_pre_statement(&mut self, stmt: AstNode) { if let Some(stmts) = self.pre_stmts.last_mut() { stmts.push(stmt); + self.mutations += 1; } else { unreachable!("Statement lists should exist at this point"); } @@ -124,6 +140,7 @@ impl AggregateTypeLowerer { fn push_post_statement(&mut self, stmt: AstNode) { if let Some(stmts) = self.post_stmts.last_mut() { stmts.push(stmt); + self.mutations += 1; } else { unreachable!("Statement lists should exist at this point"); } @@ -175,6 +192,7 @@ impl AstVisitorMut for AggregateTypeLowerer { // If the return type is aggregate, remove it from the signature and add a matching variable // in a VAR_IN_OUT block if index.get_effective_type_or_void_by_name(&return_type_name).is_aggregate_type() { + self.mutations += 1; let original_return = pou.return_type.take().unwrap(); let location = original_return.get_location(); //Create a new return type for the pou diff --git a/src/resolver.rs b/src/resolver.rs index 92392c8889c..c9b2f61ddaf 100644 --- a/src/resolver.rs +++ b/src/resolver.rs @@ -1014,6 +1014,12 @@ impl Dependency { } pub trait AnnotationMap { + /// Erases the trait object back to `Box` so the caller can + /// downcast to a concrete type (e.g. `AstAnnotations`) after a visitor + /// took ownership of the annotations and is handing them back. + /// Implementations should return `self` directly. + fn into_any_box(self: Box) -> Box; + fn get(&self, s: &AstNode) -> Option<&StatementAnnotation> { self.get_with_id(s.get_id()) } @@ -1116,6 +1122,10 @@ pub struct AstAnnotations { } impl AnnotationMap for AstAnnotations { + fn into_any_box(self: Box) -> Box { + self + } + fn get_with_id(&self, id: AstId) -> Option<&StatementAnnotation> { if id == self.bool_id { Some(&self.bool_annotation) @@ -1232,6 +1242,10 @@ impl AnnotationMapImpl { } impl AnnotationMap for AnnotationMapImpl { + fn into_any_box(self: Box) -> Box { + self + } + fn get_with_id(&self, id: AstId) -> Option<&StatementAnnotation> { self.type_map.get(&id) } diff --git a/tests/lit/multi/cross_unit_aggregate_signature/main.st b/tests/lit/multi/cross_unit_aggregate_signature/main.st new file mode 100644 index 00000000000..3ff68273194 --- /dev/null +++ b/tests/lit/multi/cross_unit_aggregate_signature/main.st @@ -0,0 +1,12 @@ +FUNCTION main : DINT +VAR + s : STRING[16]; +END_VAR + s := make_label(0); + // CHECK: label[0] = zero + printf('label[0] = %s$N', s); + + s := make_label(7); + // CHECK: label[7] = item + printf('label[7] = %s$N', s); +END_FUNCTION diff --git a/tests/lit/multi/cross_unit_aggregate_signature/plc.json b/tests/lit/multi/cross_unit_aggregate_signature/plc.json new file mode 100644 index 00000000000..7b433f6aa60 --- /dev/null +++ b/tests/lit/multi/cross_unit_aggregate_signature/plc.json @@ -0,0 +1,10 @@ +{ + "name": "CrossUnitAggregateSignature", + "files": [ + "main.st", + "src/lib.st", + "../../util/printf.pli" + ], + "compile_type": "Static", + "libraries": [] +} diff --git a/tests/lit/multi/cross_unit_aggregate_signature/run.test b/tests/lit/multi/cross_unit_aggregate_signature/run.test new file mode 100644 index 00000000000..60ea4e426c9 --- /dev/null +++ b/tests/lit/multi/cross_unit_aggregate_signature/run.test @@ -0,0 +1 @@ +RUN: %COMPILE build plc.json && %RUN | %CHECK %S/main.st diff --git a/tests/lit/multi/cross_unit_aggregate_signature/src/lib.st b/tests/lit/multi/cross_unit_aggregate_signature/src/lib.st new file mode 100644 index 00000000000..e9ffdedeadf --- /dev/null +++ b/tests/lit/multi/cross_unit_aggregate_signature/src/lib.st @@ -0,0 +1,16 @@ +// `make_label` returns an aggregate (STRING) type. AggregateTypeLowerer +// rewrites this POU's signature to take the return as a VAR_IN_OUT +// parameter. main.st calls it from a separate compilation unit, so the +// caller's annotations have to be refreshed against the new signature +// for the call to work. + +FUNCTION make_label : STRING[16] +VAR_INPUT + n : DINT; +END_VAR + IF n = 0 THEN + make_label := 'zero'; + ELSE + make_label := 'item'; + END_IF +END_FUNCTION From 23094aac01f91f58d8c98a5ed6fed341c9404ad6 Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 12:41:12 +0200 Subject: [PATCH 07/14] perf(driver): per-unit re-index for RetainParticipant and ArrayLowerer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both lowerers already short-circuited the re-index when they had no work to do (Phase 3 bool gates). They now report `Vec` of positional indices instead, and the participant uses `Index::reindex_unit` to refresh only those units' entries. `lower_retain` resets its visitor's `changed` flag per unit so the returned list reflects which units actually had a retain variable hoisted or a retain block demoted. `lower_literal_arrays` already returned a per-unit bool; the caller now collects the truthful set instead of an or-reduced flag. Neither rewrite changes externally-visible symbol shapes (retain hoisting adds new globals, array lowering adds VAR_TEMP counter variables — both local additions), so no reverse-dep closure is needed here. The wins are small on oscat because both gates short-circuit, but the infrastructure is now uniform across all migrated participants and Phase 5 can drive any of them from a file-edit trigger. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plc_driver/src/pipelines/participant.rs | 43 +++++++++++-------- compiler/plc_lowering/src/retain.rs | 21 +++++---- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index 3930db1b8e0..d12b6e7a31e 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -274,21 +274,24 @@ impl ArrayLowerer { impl PipelineParticipantMut for ArrayLowerer { fn pre_annotate(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { project: ParsedProject { mut units }, index, _unresolvables } = indexed_project; - let mut changed = false; - for unit in &mut units { + let IndexedProject { project: ParsedProject { mut units }, mut index, _unresolvables } = + indexed_project; + // Track which units actually had a literal array lowered (and a + // VAR_TEMP counter added) so we can re-index only those. + let mut mutated = Vec::new(); + for (idx, unit) in units.iter_mut().enumerate() { if array_lowering::lower_literal_arrays(unit, &index, &mut self.id_provider) { - changed = true; + mutated.push(idx); } } - let project = ParsedProject { units }; - if changed { - // Re-index since we modified the AST (new statements, possible new alloca variables) - project.index(self.id_provider.clone()) - } else { - // Nothing was lowered; the existing index is still authoritative. - IndexedProject { project, index, _unresolvables } + if mutated.is_empty() { + return IndexedProject { project: ParsedProject { units }, index, _unresolvables }; } + for idx in &mutated { + let unit_id = plc::index::UnitId::source(*idx); + index.reindex_unit(unit_id, &mut units[*idx], self.id_provider.clone()); + } + IndexedProject { project: ParsedProject { units }, index, _unresolvables } } } @@ -448,15 +451,17 @@ impl PipelineParticipantMut for PolymorphismLowerer { impl PipelineParticipantMut for RetainParticipant { fn post_index(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { mut project, index, _unresolvables } = indexed_project; - let changed = self.lower_retain(&mut project.units, &index); - if changed { - project.index(self.ids.clone()) - } else { - // The lowerer reported no rewrites; the existing index is still - // authoritative. - IndexedProject { project, index, _unresolvables } + let IndexedProject { mut project, mut index, _unresolvables } = indexed_project; + let mutated = self.lower_retain(&mut project.units, &index); + if mutated.is_empty() { + return IndexedProject { project, index, _unresolvables }; } + // Re-index only the units the retain lowerer actually rewrote. + for idx in &mutated { + let unit_id = plc::index::UnitId::source(*idx); + index.reindex_unit(unit_id, &mut project.units[*idx], self.ids.clone()); + } + IndexedProject { project, index, _unresolvables } } } diff --git a/compiler/plc_lowering/src/retain.rs b/compiler/plc_lowering/src/retain.rs index efaf1dc32f5..515752831c2 100644 --- a/compiler/plc_lowering/src/retain.rs +++ b/compiler/plc_lowering/src/retain.rs @@ -54,18 +54,23 @@ impl RetainParticipant { Self { ids } } - /// Lowers `RETAIN` variables across the project. Returns `true` if any - /// unit was actually rewritten (a retain variable was hoisted to a - /// global, or a retain block was added). When `false`, the caller can - /// skip the downstream re-index because the AST and the variable - /// table are unchanged. - pub fn lower_retain(&mut self, units: &mut [CompilationUnit], index: &plc::index::Index) -> bool { + /// Lowers `RETAIN` variables across the project. Returns the positional + /// indices of units that were actually rewritten (a retain variable was + /// hoisted to a global, or a retain block was demoted out of a program). + /// An empty `Vec` means no unit changed; the caller can skip the + /// downstream re-index entirely. + pub fn lower_retain(&mut self, units: &mut [CompilationUnit], index: &plc::index::Index) -> Vec { let mut lowerer = RetainLowerer { ids: self.ids.clone(), index, context: Context::default(), changed: false }; - for unit in units { + let mut mutated = Vec::new(); + for (idx, unit) in units.iter_mut().enumerate() { + lowerer.changed = false; lowerer.visit_compilation_unit(unit); + if lowerer.changed { + mutated.push(idx); + } } - lowerer.changed + mutated } } From 88e9e497a20d42d3d4aa4a788ebb255172acd46a Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 13:24:01 +0200 Subject: [PATCH 08/14] feat(driver): LoweringBookkeeper helper for participant bookkeeping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A lowering pass that mutates compilation units has to invalidate downstream state in lockstep: re-index the units it touched, run `evaluate_constants` if new ConstId-backed expressions appeared, compute the reverse-dependency closure of signature changes, and partial-re-annotate the closure. Every per-unit-reindexing participant Phases 3–4 added rolled this recipe by hand, with subtle drift between copies — `compute_reannotate_closure` lived as a free function in `participant.rs`, `evaluate_constants` was explicit at the call site, and each participant remembered (or forgot) to capture the reverse-dep graph before mutation. `compiler/plc_driver/src/pipelines/bookkeeping.rs` centralises the recipe. A participant accumulates effects against a `LoweringBookkeeper` (`mark_unit`, `signature_changed`, `mark_const_introduced`) then hands the project off: * `apply_to_indexed` — for participants that fire in pre/post_index or pre_annotate. Re-indexes mutated units, optionally runs `evaluate_constants`. * `apply_to_annotated` — for post_annotate participants. Adds the reverse-dep closure + parallel `reannotate_units` step on top. Requires the caller to pass a `ReverseDependencyGraph` captured *before* the mutation (the graph asks "who used the symbol before its signature changed?"). `AggregateTypeLowerer::post_annotate` is migrated to use the bookkeeper as the worked example — body drops from ~70 lines of bespoke invalidation plumbing to ~15 lines describing what changed. The now-orphaned `compute_reannotate_closure` free function is removed (the bookkeeper absorbs it). No trait changes. `PipelineParticipantMut` is untouched. Workspace tests, lit suite, and the multi-file oscat baseline are bit-identical to the post-Phase-4 numbers. PolymorphismLowerer's table reindex, Retain, and Array all still use their inline per-unit code — migrating each is a small follow-up and entirely opt-in. Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/plc_driver/src/pipelines.rs | 1 + .../plc_driver/src/pipelines/bookkeeping.rs | 186 ++++++++++++++++++ .../plc_driver/src/pipelines/participant.rs | 101 ++-------- 3 files changed, 208 insertions(+), 80 deletions(-) create mode 100644 compiler/plc_driver/src/pipelines/bookkeeping.rs diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index a32f3a41212..3fd62aa765b 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -57,6 +57,7 @@ use source_code::{source_location::SourceLocation, SourceContainer}; use serde_json; use toml; +pub mod bookkeeping; pub mod participant; pub mod timing; diff --git a/compiler/plc_driver/src/pipelines/bookkeeping.rs b/compiler/plc_driver/src/pipelines/bookkeeping.rs new file mode 100644 index 00000000000..c4875fb3020 --- /dev/null +++ b/compiler/plc_driver/src/pipelines/bookkeeping.rs @@ -0,0 +1,186 @@ +//! Bookkeeping helper for lowering participants. +//! +//! A lowering pass that mutates compilation units has to invalidate +//! downstream state in lockstep: re-index the units it touched, evaluate +//! any new `ConstId`-backed expressions, compute the reverse-dependency +//! closure of signature changes, and partial-re-annotate the closure. +//! Every per-unit-reindexing participant added in Phases 3–4 ran a +//! variant of that recipe by hand. +//! +//! [`LoweringBookkeeper`] centralises the recipe: a participant tells it +//! what it mutated and which symbols' public signatures changed, then +//! hands the project off via [`LoweringBookkeeper::apply_to_indexed`] or +//! [`LoweringBookkeeper::apply_to_annotated`]. The hidden invariants +//! (per-unit reindex order, `evaluate_constants` placement, closure +//! computation against the *pre-mutation* reverse-dep graph, +//! [`UnitId::SYNTHETIC`] tagging of the resolver's `new_index`) live in +//! one place rather than five. + +use std::collections::BTreeSet; + +use ast::provider::IdProvider; +use plc::index::UnitId; + +use super::{AnnotatedProject, IndexedProject, ReverseDependencyGraph}; + +/// Accumulates the per-unit effects of a lowering pass and applies the +/// matching invalidation when the visit is done. +#[derive(Default, Debug)] +pub struct LoweringBookkeeper { + /// Positional indices of compilation units the lowerer mutated. + /// Coalesced so a unit marked twice is only re-indexed once. + mutated_units: BTreeSet, + /// Symbol names (POU / global / type) whose public signature changed. + /// Used to expand the re-annotation closure via the reverse-dep graph. + changed_signatures: Vec, + /// True if the lowerer introduced `ConstId`-backed expressions + /// (string sizes, array dimensions) that need a fresh + /// `evaluate_constants` pass after the per-unit reindex. + introduces_constants: bool, +} + +impl LoweringBookkeeper { + pub fn new() -> Self { + Self::default() + } + + /// Records that the compilation unit at positional `idx` was mutated. + pub fn mark_unit(&mut self, idx: usize) { + self.mutated_units.insert(idx); + } + + /// Records that `name`'s public signature changed. The reannotation + /// closure in [`Self::apply_to_annotated`] looks `name` up in the + /// reverse-dep graph and adds every dependent unit. + pub fn signature_changed(&mut self, name: impl Into) { + self.changed_signatures.push(name.into()); + } + + /// Flags that the lowerer introduced new `ConstId`-backed expressions + /// (e.g. a `STRING[K+1]` derived from a freshly synthesised type). + /// Apply will run `const_evaluator::evaluate_constants` once after + /// the per-unit reindex. + pub fn mark_const_introduced(&mut self) { + self.introduces_constants = true; + } + + /// True if no unit was marked and no signature change recorded. A + /// participant that calls only this can skip the apply step entirely. + pub fn is_empty(&self) -> bool { + self.mutated_units.is_empty() && self.changed_signatures.is_empty() + } + + /// Drives invalidation against an [`IndexedProject`]. Used by + /// participants that mutate in `pre_index`, `post_index`, or + /// `pre_annotate` — anything that runs before the resolver. Re-indexes + /// only the units in `mutated_units`; runs `evaluate_constants` if + /// flagged. + pub fn apply_to_indexed(self, indexed: IndexedProject, ids: IdProvider) -> IndexedProject { + let IndexedProject { mut project, mut index, _unresolvables } = indexed; + + for &idx in &self.mutated_units { + let unit_id = UnitId::source(idx); + index.reindex_unit(unit_id, &mut project.units[idx], ids.clone()); + } + + if self.introduces_constants { + let (next_index, _unresolved) = plc::resolver::const_evaluator::evaluate_constants(index); + return IndexedProject { project, index: next_index, _unresolvables }; + } + + IndexedProject { project, index, _unresolvables } + } + + /// Drives invalidation against an [`AnnotatedProject`]. Used by + /// `post_annotate` participants. Re-indexes mutated units, evaluates + /// constants if flagged, computes the reverse-dep closure from + /// `reverse_deps_pre`, and partial-reannotates the closure in + /// parallel through [`AnnotatedProject::reannotate_units`]. + /// + /// `reverse_deps_pre` must be captured *before* the lowerer's + /// mutation — the closure asks "who used the symbol before it + /// changed?", not after. + pub fn apply_to_annotated( + self, + project: AnnotatedProject, + reverse_deps_pre: &ReverseDependencyGraph, + ids: IdProvider, + ) -> AnnotatedProject { + let AnnotatedProject { mut units, mut index, annotations, diagnostics } = project; + + for &idx in &self.mutated_units { + let unit_id = UnitId::source(idx); + index.reindex_unit(unit_id, &mut units[idx].unit, ids.clone()); + } + + if self.introduces_constants { + let (next_index, _unresolved) = plc::resolver::const_evaluator::evaluate_constants(index); + index = next_index; + } + + // Closure = mutated units ∪ every unit that depended on a + // signature we changed. BTreeSet dedups and gives a stable order + // for the parallel re-annotate. + let mut closure: BTreeSet = self.mutated_units.clone(); + for sig in &self.changed_signatures { + if let Some(dependents) = reverse_deps_pre.dependents(sig) { + for unit_id in dependents { + if let Some(idx) = unit_id.source_index() { + closure.insert(idx); + } + } + } + } + let closure: Vec = closure.into_iter().collect(); + + let mut project = AnnotatedProject { units, index, annotations, diagnostics: Vec::new() }; + project.reannotate_units(&closure, ids); + project.diagnostics = diagnostics; + project + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_is_empty() { + let b = LoweringBookkeeper::new(); + assert!(b.is_empty()); + } + + #[test] + fn mark_unit_makes_non_empty() { + let mut b = LoweringBookkeeper::new(); + b.mark_unit(0); + assert!(!b.is_empty()); + } + + #[test] + fn signature_change_makes_non_empty_even_without_unit() { + let mut b = LoweringBookkeeper::new(); + b.signature_changed("Foo"); + assert!(!b.is_empty()); + } + + #[test] + fn const_flag_without_other_changes_is_empty() { + // A pass that only flags const-introduction without actually + // mutating anything has nothing to invalidate. The flag matters + // only when paired with mutated units; treat it as effectively + // empty for the gating check. + let mut b = LoweringBookkeeper::new(); + b.mark_const_introduced(); + assert!(b.is_empty()); + } + + #[test] + fn mark_unit_dedups() { + let mut b = LoweringBookkeeper::new(); + b.mark_unit(3); + b.mark_unit(3); + b.mark_unit(1); + assert_eq!(b.mutated_units.iter().copied().collect::>(), vec![1, 3]); + } +} diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index d12b6e7a31e..1b3afd7b531 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -331,72 +331,48 @@ impl PipelineParticipantMut for InheritanceLowerer { impl PipelineParticipantMut for AggregateTypeLowerer { fn post_annotate(&mut self, annotated_project: AnnotatedProject) -> AnnotatedProject { - // Skip if no POU has an aggregate return type — the visit would walk - // every unit and rewrite nothing, and the re-index/re-annotate would - // reproduce the existing state. + // Skip if no POU has an aggregate return type — nothing to do and + // no invalidation needed. if !project_has_aggregate_returns(&annotated_project.index) { return annotated_project; } - // Build the reverse-dep graph BEFORE we mutate. The graph reflects - // each unit's pre-mutation `Dependency` set, which is what we need - // to identify callers / users of the POUs we're about to rewrite. + // Capture the reverse-dep graph before mutation; the closure asks + // "who used the symbol before its signature changed?". let reverse_deps = annotated_project.reverse_dependencies(); let AnnotatedProject { mut units, index, annotations, diagnostics } = annotated_project; self.index = Some(index); self.annotation = Some(Box::new(annotations)); - // Walk each unit and remember which ones the visitor actually - // mutated. `visit_unit_tracked` increments the lowerer's mutation - // counter on POU signature changes and pre/post statement - // insertions; comparing the counter before/after one unit isolates - // changes to that unit. - let mut mutated_indices = Vec::new(); + // Walk each unit; bookkeeper records which were mutated and which + // POU names had their signature rewritten (aggregate return → + // VAR_IN_OUT). Constants enter the index because the new + // VAR_IN_OUT parameter's type can be e.g. `STRING[K+1]`. + let mut book = super::bookkeeping::LoweringBookkeeper::new(); for (idx, annotated_unit) in units.iter_mut().enumerate() { if self.visit_unit_tracked(&mut annotated_unit.unit) { - mutated_indices.push(idx); + book.mark_unit(idx); + for pou in &annotated_unit.unit.pous { + book.signature_changed(&pou.name); + } } } + if !book.is_empty() { + book.mark_const_introduced(); + } - // Reclaim the index and annotations from `self`. The visitor stored - // them as `Option`s for the duration of the walk. - let mut index = self.index.take().expect("index returned by visit"); + // Reclaim the index and annotations from `self` (the visitor + // borrowed them for the walk). + let index = self.index.take().expect("index returned by visit"); let annotations = self.annotation.take().expect("annotations returned by visit"); - // The visitor produced a fresh `AstAnnotations` reference but we need - // a concrete `AstAnnotations` value to rebuild `AnnotatedProject`. - // The Box we received in the hook is the same - // object the resolver produced; downcasting to AstAnnotations is the - // simplest way to recover it. let annotations = *annotations .into_any_box() .downcast::() .expect("post_annotate participant always receives AstAnnotations"); - // Re-index only the units the lowerer actually mutated. - for idx in &mutated_indices { - let unit_id = plc::index::UnitId::source(*idx); - index.reindex_unit(unit_id, &mut units[*idx].unit, self.id_provider.clone()); - } - - // The freshly imported entries carry un-evaluated `ConstId`-backed - // expressions (string sizes, array dimensions). The annotate phase - // ran `evaluate_constants` before any participant fired, so the - // global index was consistent then; we need to do the same pass - // again now that we've patched in new entries. - let (index, _unresolvables) = plc::resolver::const_evaluator::evaluate_constants(index); - - // Compute the reverse-dep closure. The lowerer changes POU - // signatures (adds a VAR_IN_OUT parameter for the aggregate - // return), so every caller of a touched POU needs its annotations - // refreshed against the new signature, not just the unit that owns - // the POU. - let to_reannotate = compute_reannotate_closure(&mutated_indices, &units, &reverse_deps); - - let mut project = AnnotatedProject { units, index, annotations, diagnostics: Vec::new() }; - project.reannotate_units(&to_reannotate, self.id_provider.clone()); - project.diagnostics = diagnostics; - project + let project = AnnotatedProject { units, index, annotations, diagnostics }; + book.apply_to_annotated(project, &reverse_deps, self.id_provider.clone()) } } @@ -535,41 +511,6 @@ fn project_has_aggregate_returns(index: &Index) -> bool { false } -/// Returns the union of `mutated_indices` and every unit that depended on a -/// symbol declared in any mutated unit, sorted ascending. -/// -/// A participant that rewrites POU signatures (e.g. AggregateTypeLowerer -/// adding a VAR_IN_OUT parameter, or PolymorphismLowerer changing -/// interface-typed declarations to `__FATPOINTER`) makes the recorded -/// annotations in caller units stale. The closure built here is the set of -/// units that need to be re-annotated against the patched index for the -/// project to stay consistent. -/// -/// The closure only walks POU names today. Globals and types declared in a -/// mutated unit aren't typically rewritten by the participants Phase 4 -/// targets, but if a future participant rewrites them the corresponding -/// names should be added to the lookup loop here. -fn compute_reannotate_closure( - mutated_indices: &[usize], - units: &[super::AnnotatedUnit], - reverse_deps: &super::ReverseDependencyGraph, -) -> Vec { - use std::collections::BTreeSet; - let mut closure: BTreeSet = mutated_indices.iter().copied().collect(); - for &idx in mutated_indices { - for pou in &units[idx].get_unit().pous { - if let Some(dependents) = reverse_deps.dependents(&pou.name) { - for unit_id in dependents { - if unit_id.is_source() { - closure.insert(unit_id.raw() as usize); - } - } - } - } - } - closure.into_iter().collect() -} - /// True if any POU declares a super-class or any interfaces, in which case /// [`InheritanceLowerer`] needs to rewrite calls and walk inheritance chains. fn project_uses_inheritance(index: &Index) -> bool { From ed67c21fc40c1deabe812c6e6e844893ee50499d Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 13:37:07 +0200 Subject: [PATCH 09/14] feat(driver): UnitLowerer trait + AutoLowerer adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new participant author had to implement `PipelineParticipantMut`'s four hooks, pick the right one for their stage, destructure `ParsedProject` / `IndexedProject` / `AnnotatedProject` correctly, and remember the bookkeeping invariants (re-index touched units, `evaluate_constants` after, reverse-dep closure for re-annotation, `UnitId::SYNTHETIC` for the resolver's `new_index`). The `LoweringBookkeeper` from the previous commit collapses the bookkeeping; this commit adds the trait that collapses the rest. `compiler/plc_driver/src/pipelines/unit_lowerer.rs`: * `LoweringStage` enum (`PreIndex` / `PostIndex` / `PreAnnotate` / `PostAnnotate`) — where in the pipeline the lowerer fires. * `LoweringContext { index, annotations, ids }` — read-only context handed to each `lower_unit` call. `annotations` is `Some` only on `PostAnnotate`. * `UnitLowerer` trait with one required method: `lower_unit(&mut self, &mut CompilationUnit, &LoweringContext) -> UnitChange`. Optional `name()` and `diagnostics()` with sensible defaults. * `UnitChange { mutated, changed_signatures, introduces_constants }` — what the lowerer touched. `UnitChange::none()` and `UnitChange::mutated()` are the common constructors. * `AutoLowerer` wraps a `UnitLowerer` and implements `PipelineParticipantMut`. Constructor takes the lowerer, the stage, and the shared `IdProvider`. At its stage, the adapter walks all compilation units, calls `T::lower_unit` per unit, aggregates the resulting `UnitChange`s into a `LoweringBookkeeper`, and hands the project off to `apply_to_indexed` or `apply_to_annotated`. `name()` forwards through so phase-timing labels stay readable. Worked migration: `RetainParticipant`'s `post_index` path. Added a new `lower_one_unit` method on `RetainParticipant` for use by adapters (the existing `lower_retain` stays). A thin `RetainUnitLowerer` wrapper in `participant.rs` implements `UnitLowerer` by delegating to `lower_one_unit`. The participant list in `get_default_mut_participants` now registers `AutoLowerer::new(RetainUnitLowerer::new(...), PostIndex, ids)` instead of `RetainParticipant::new(...)` directly. The old `impl PipelineParticipantMut for RetainParticipant` is removed. `PipelineParticipantMut` is untouched. The other participants (PolymorphismLowerer, AggregateTypeLowerer, InheritanceLowerer, ArrayLowerer, PropertyLowerer, etc.) keep their existing impls. Migrating each is opt-in and entirely additive. Workspace tests, lit suite, multi-file oscat baseline all bit-identical to the Phase 4.1 numbers (~150 ms annotate median). Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/plc_driver/src/pipelines.rs | 9 +- .../plc_driver/src/pipelines/participant.rs | 42 ++- .../plc_driver/src/pipelines/unit_lowerer.rs | 289 ++++++++++++++++++ compiler/plc_lowering/src/retain.rs | 10 + 4 files changed, 336 insertions(+), 14 deletions(-) create mode 100644 compiler/plc_driver/src/pipelines/unit_lowerer.rs diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index 3fd62aa765b..1bc40a3a10a 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -44,7 +44,7 @@ use plc_header_generator::{ use plc_index::GlobalContext; use plc_lowering::{ control_statement::ControlStatementParticipant, inheritance::InheritanceLowerer, loops::LoopDesugarer, - reference_to_return::ReferenceToReturnParticipant, retain::RetainParticipant, + reference_to_return::ReferenceToReturnParticipant, }; use project::{ object::Object, @@ -60,6 +60,7 @@ use toml; pub mod bookkeeping; pub mod participant; pub mod timing; +pub mod unit_lowerer; use timing::PhaseTimer; pub mod property; @@ -352,7 +353,11 @@ impl BuildPipeline { )), Box::new(ControlStatementParticipant::new(self.context.provider())), Box::new(ReferenceToReturnParticipant::new(self.context.provider())), - Box::new(RetainParticipant::new(self.context.provider())), + Box::new(unit_lowerer::AutoLowerer::new( + participant::RetainUnitLowerer::new(self.context.provider()), + unit_lowerer::LoweringStage::PostIndex, + self.context.provider(), + )), Box::new(AggregateTypeLowerer::new(self.context.provider())), Box::new(InheritanceLowerer::new(self.context.provider())), Box::new(InitParticipant::new( diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index 1b3afd7b531..6357270bb6c 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -425,19 +425,37 @@ impl PipelineParticipantMut for PolymorphismLowerer { } } -impl PipelineParticipantMut for RetainParticipant { - fn post_index(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { mut project, mut index, _unresolvables } = indexed_project; - let mutated = self.lower_retain(&mut project.units, &index); - if mutated.is_empty() { - return IndexedProject { project, index, _unresolvables }; - } - // Re-index only the units the retain lowerer actually rewrote. - for idx in &mutated { - let unit_id = plc::index::UnitId::source(*idx); - index.reindex_unit(unit_id, &mut project.units[*idx], self.ids.clone()); +/// `UnitLowerer` wrapper for [`RetainParticipant`]. The retain rewrite +/// is naturally per-unit (each unit's `VAR RETAIN` blocks are hoisted to +/// the same unit's global vars), so this is the first lowerer ported to +/// the new adapter framework. Registered as +/// `AutoLowerer::new(RetainUnitLowerer::new(...), LoweringStage::PostIndex, ids)`. +pub struct RetainUnitLowerer { + inner: RetainParticipant, +} + +impl RetainUnitLowerer { + pub fn new(ids: ast::provider::IdProvider) -> Self { + Self { inner: RetainParticipant::new(ids) } + } +} + +impl super::unit_lowerer::UnitLowerer for RetainUnitLowerer { + fn name(&self) -> &'static str { + "RetainParticipant" + } + + fn lower_unit( + &mut self, + unit: &mut ast::ast::CompilationUnit, + ctx: &super::unit_lowerer::LoweringContext, + ) -> super::unit_lowerer::UnitChange { + let mutated = self.inner.lower_one_unit(unit, ctx.index); + if mutated { + super::unit_lowerer::UnitChange::mutated() + } else { + super::unit_lowerer::UnitChange::none() } - IndexedProject { project, index, _unresolvables } } } diff --git a/compiler/plc_driver/src/pipelines/unit_lowerer.rs b/compiler/plc_driver/src/pipelines/unit_lowerer.rs new file mode 100644 index 00000000000..4acb8efff0d --- /dev/null +++ b/compiler/plc_driver/src/pipelines/unit_lowerer.rs @@ -0,0 +1,289 @@ +//! Per-unit lowering API. +//! +//! [`PipelineParticipantMut`](super::participant::PipelineParticipantMut) is +//! the full-project participant trait: an impl receives the entire +//! `ParsedProject` / `IndexedProject` / `AnnotatedProject`, mutates it, +//! and is responsible for driving any re-index / re-annotate it needs. +//! That gives maximum flexibility but every per-unit-rewriting lowerer +//! ends up re-implementing the same walk-and-bookkeep pattern. +//! +//! [`UnitLowerer`] is the narrower trait such lowerers actually want. +//! An impl gets one compilation unit at a time, a read-only context +//! (index + annotations), and returns a [`UnitChange`] describing what +//! it touched. [`AutoLowerer`] adapts a `UnitLowerer` into a +//! `PipelineParticipantMut` by walking the project's units, collecting +//! `UnitChange`s into a [`LoweringBookkeeper`](super::bookkeeping::LoweringBookkeeper), +//! and applying the bookkeeping at the registered [`LoweringStage`]. +//! +//! `PipelineParticipantMut` stays the canonical entry point; +//! `UnitLowerer` is purely additive. Old participants keep working +//! unchanged. + +use ast::{ast::CompilationUnit, provider::IdProvider}; +use plc::{index::Index, resolver::AstAnnotations}; +use plc_diagnostics::diagnostics::Diagnostic; + +use super::{ + bookkeeping::LoweringBookkeeper, participant::PipelineParticipantMut, AnnotatedProject, AnnotatedUnit, + IndexedProject, ParsedProject, +}; + +/// Where in the pipeline an [`AutoLowerer`] should fire. The variant +/// determines which [`PipelineParticipantMut`] hook the adapter +/// dispatches to. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LoweringStage { + /// Runs after parsing, before the initial index pass. No index or + /// annotations available; [`LoweringContext`] carries an empty + /// `Index` placeholder so the trait signature stays uniform. + PreIndex, + /// Runs after the initial index pass. `index` is available; + /// `annotations` is `None`. + PostIndex, + /// Runs between indexing and annotation. Same shape as `PostIndex`. + PreAnnotate, + /// Runs after annotation. Both `index` and `annotations` are + /// available. + PostAnnotate, +} + +/// Read-only context handed to each [`UnitLowerer::lower_unit`] call. +/// `annotations` is `Some` only on [`LoweringStage::PostAnnotate`]. +pub struct LoweringContext<'a> { + pub index: &'a Index, + pub annotations: Option<&'a AstAnnotations>, + pub ids: IdProvider, +} + +/// Description of what a [`UnitLowerer::lower_unit`] call did. The +/// adapter aggregates these into a [`LoweringBookkeeper`] and runs the +/// matching invalidation when the visit is done. +#[derive(Default, Debug)] +pub struct UnitChange { + /// True if the unit's AST was modified at all. + pub mutated: bool, + /// Names of symbols (POU / global / type) declared in this unit + /// whose public signature changed. The adapter feeds these to + /// [`LoweringBookkeeper::signature_changed`]. + pub changed_signatures: Vec, + /// True if the lowerer introduced `ConstId`-backed expressions + /// (string sizes, array dimensions). The adapter forwards this + /// to [`LoweringBookkeeper::mark_const_introduced`] when any + /// unit's `UnitChange` flags it. + pub introduces_constants: bool, +} + +impl UnitChange { + pub fn none() -> Self { + Self::default() + } + pub fn mutated() -> Self { + Self { mutated: true, ..Self::default() } + } +} + +/// A lowering pass that operates one compilation unit at a time. Most +/// existing lowerers fit this shape. New lowerers should prefer this +/// trait + [`AutoLowerer`] over implementing `PipelineParticipantMut` +/// directly. +pub trait UnitLowerer { + /// Short label used by phase-timing instrumentation. Default + /// returns the implementing type's short name. + fn name(&self) -> &'static str { + super::timing::short_type_name(std::any::type_name::()) + } + + /// Visit one compilation unit. Return what changed. + fn lower_unit(&mut self, unit: &mut CompilationUnit, ctx: &LoweringContext) -> UnitChange; + + /// Optional diagnostics produced during this lowerer's visit, drained + /// at the end of the pipeline stage. + fn diagnostics(&mut self) -> Vec { + Vec::new() + } +} + +/// Adapts a [`UnitLowerer`] into a [`PipelineParticipantMut`]. Registered +/// at the stage where the lowerer should fire. Walks the project's +/// compilation units, calls `lower_unit` on each, and drives the +/// matching [`LoweringBookkeeper`] invalidation. +/// +/// The adapter stores an [`IdProvider`] shared with the rest of the +/// pipeline so the re-index / re-annotate it drives produces AST ids +/// from the same sequence the resolver and codegen see. +pub struct AutoLowerer { + inner: T, + stage: LoweringStage, + ids: IdProvider, +} + +impl AutoLowerer { + pub fn new(inner: T, stage: LoweringStage, ids: IdProvider) -> Self { + Self { inner, stage, ids } + } +} + +impl PipelineParticipantMut for AutoLowerer { + fn name(&self) -> &'static str { + // Forward through so phase-timing output stays readable. + self.inner.name() + } + + fn pre_index(&mut self, mut parsed_project: ParsedProject) -> ParsedProject { + if self.stage != LoweringStage::PreIndex { + return parsed_project; + } + // PreIndex runs before any index exists; bookkeeper has nothing + // to invalidate yet (the upcoming index pass picks up the + // mutated units automatically). We still walk so the lowerer + // gets a chance to mutate AST before the project-wide index + // builds it. + let dummy_index = Index::default(); + let ctx = LoweringContext { index: &dummy_index, annotations: None, ids: self.ids.clone() }; + for unit in parsed_project.units.iter_mut() { + let _change = self.inner.lower_unit(unit, &ctx); + } + parsed_project + } + + fn post_index(&mut self, indexed_project: IndexedProject) -> IndexedProject { + if self.stage != LoweringStage::PostIndex { + return indexed_project; + } + run_indexed(&mut self.inner, indexed_project, self.ids.clone()) + } + + fn pre_annotate(&mut self, indexed_project: IndexedProject) -> IndexedProject { + if self.stage != LoweringStage::PreAnnotate { + return indexed_project; + } + run_indexed(&mut self.inner, indexed_project, self.ids.clone()) + } + + fn post_annotate(&mut self, annotated_project: AnnotatedProject) -> AnnotatedProject { + if self.stage != LoweringStage::PostAnnotate { + return annotated_project; + } + let reverse_deps = annotated_project.reverse_dependencies(); + let AnnotatedProject { mut units, index, annotations, diagnostics } = annotated_project; + let ids = self.ids.clone(); + let mut book = LoweringBookkeeper::new(); + let mut introduces_const = false; + { + let ctx = LoweringContext { index: &index, annotations: Some(&annotations), ids: ids.clone() }; + for (idx, au) in units.iter_mut().enumerate() { + let change = self.inner.lower_unit(&mut au.unit, &ctx); + if change.introduces_constants { + introduces_const = true; + } + absorb(&mut book, idx, change); + } + } + if introduces_const { + book.mark_const_introduced(); + } + if book.is_empty() { + return AnnotatedProject { units, index, annotations, diagnostics }; + } + let project = AnnotatedProject { units, index, annotations, diagnostics }; + book.apply_to_annotated(project, &reverse_deps, ids) + } + + fn diagnostics(&mut self) -> Vec { + self.inner.diagnostics() + } +} + +fn run_indexed( + inner: &mut T, + indexed_project: IndexedProject, + ids: IdProvider, +) -> IndexedProject { + let IndexedProject { mut project, index, _unresolvables } = indexed_project; + let mut book = LoweringBookkeeper::new(); + let mut introduces_const = false; + { + let ctx = LoweringContext { index: &index, annotations: None, ids: ids.clone() }; + for (idx, unit) in project.units.iter_mut().enumerate() { + let change = inner.lower_unit(unit, &ctx); + if change.introduces_constants { + introduces_const = true; + } + absorb(&mut book, idx, change); + } + } + if introduces_const { + book.mark_const_introduced(); + } + if book.is_empty() { + return IndexedProject { project, index, _unresolvables }; + } + book.apply_to_indexed(IndexedProject { project, index, _unresolvables }, ids) +} + +fn absorb(book: &mut LoweringBookkeeper, idx: usize, change: UnitChange) { + if change.mutated { + book.mark_unit(idx); + } + for sig in change.changed_signatures { + book.signature_changed(sig); + } +} + +// Suppress an unused-import warning when AnnotatedUnit isn't referenced +// elsewhere in the module. +#[allow(dead_code)] +fn _force_use(_u: AnnotatedUnit) {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unit_change_default_is_not_mutated() { + assert!(!UnitChange::default().mutated); + assert!(UnitChange::default().changed_signatures.is_empty()); + assert!(!UnitChange::default().introduces_constants); + } + + #[test] + fn unit_change_constructors() { + assert!(!UnitChange::none().mutated); + assert!(UnitChange::mutated().mutated); + } + + #[test] + fn absorb_adds_to_book() { + let mut book = LoweringBookkeeper::new(); + absorb( + &mut book, + 7, + UnitChange { mutated: true, changed_signatures: vec!["Foo".into()], introduces_constants: false }, + ); + assert!(!book.is_empty()); + } + + #[test] + fn absorb_with_unmutated_change_doesnt_mark_unit() { + let mut book = LoweringBookkeeper::new(); + absorb(&mut book, 3, UnitChange::none()); + assert!(book.is_empty()); + } + + #[test] + fn absorb_records_signatures_even_without_unit_mutation() { + // Edge case: a lowerer reports a signature change but flags the + // unit as unmutated. The closure still needs the signature. + let mut book = LoweringBookkeeper::new(); + absorb( + &mut book, + 0, + UnitChange { + mutated: false, + changed_signatures: vec!["Bar".into()], + introduces_constants: false, + }, + ); + assert!(!book.is_empty()); + } +} diff --git a/compiler/plc_lowering/src/retain.rs b/compiler/plc_lowering/src/retain.rs index 515752831c2..cc729df96cc 100644 --- a/compiler/plc_lowering/src/retain.rs +++ b/compiler/plc_lowering/src/retain.rs @@ -72,6 +72,16 @@ impl RetainParticipant { } mutated } + + /// Same as [`Self::lower_retain`] for a single unit. Returns `true` if + /// any retain variable was rewritten. Used by per-unit adapters such + /// as the `UnitLowerer` framework in the driver. + pub fn lower_one_unit(&mut self, unit: &mut CompilationUnit, index: &plc::index::Index) -> bool { + let mut lowerer = + RetainLowerer { ids: self.ids.clone(), index, context: Context::default(), changed: false }; + lowerer.visit_compilation_unit(unit); + lowerer.changed + } } struct RetainLowerer<'idx> { From 4836a020973d00e1dc47808f514756d8f14ce3a6 Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 13:47:10 +0200 Subject: [PATCH 10/14] feat(driver): UnitLowerer::prepare for two-pass context-gather MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some lowerers need to scan every unit before they can transform any individual one — collect every POU returning `REFERENCE TO`, build a dispatch table from class hierarchies, or otherwise compute a project-wide context. The pattern is two-pass: gather, then transform. `UnitLowerer` now has an optional `prepare(&[&CompilationUnit], &LoweringContext)` method (no-op by default). The `AutoLowerer` adapter calls `prepare` once before walking units for `lower_unit`. At each stage the adapter builds a `Vec<&CompilationUnit>` view — cheaper than cloning the units, works uniformly across `ParsedProject` (plain units), `IndexedProject` (plain units), and `AnnotatedProject` (`AnnotatedUnit` wrappers). Lowerers that don't need the pre-pass get nothing — the default impl is empty. `RetainParticipant`'s adapter and any other existing `UnitLowerer` keep working unchanged. Adds a `PrepareCounter` test fixture demonstrating the contract: prepare sees every unit before any `lower_unit` call, and a panic asserts the ordering at runtime for any lowerer that mirrors the pattern. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plc_driver/src/pipelines/unit_lowerer.rs | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/compiler/plc_driver/src/pipelines/unit_lowerer.rs b/compiler/plc_driver/src/pipelines/unit_lowerer.rs index 4acb8efff0d..7cf7ec2a14c 100644 --- a/compiler/plc_driver/src/pipelines/unit_lowerer.rs +++ b/compiler/plc_driver/src/pipelines/unit_lowerer.rs @@ -93,6 +93,22 @@ pub trait UnitLowerer { super::timing::short_type_name(std::any::type_name::()) } + /// Optional pre-pass over every compilation unit. Called once + /// before any [`Self::lower_unit`] call, with read-only access to + /// every unit. Lowerers that need an initial pass to collect + /// project-wide context (e.g. find every POU with a particular + /// signature shape, build a name → callsite map) implement this and + /// stash the gathered state on `self`. Default is a no-op. + /// + /// `units` is a slice of references rather than owned units so the + /// adapter doesn't have to clone, even when the surrounding pipeline + /// stage holds `AnnotatedUnit` (post-annotate) instead of plain + /// `CompilationUnit` (pre-annotate). + /// + /// `ctx` carries the same `index` / `annotations` / `ids` the + /// per-unit pass receives. + fn prepare(&mut self, _units: &[&CompilationUnit], _ctx: &LoweringContext) {} + /// Visit one compilation unit. Return what changed. fn lower_unit(&mut self, unit: &mut CompilationUnit, ctx: &LoweringContext) -> UnitChange; @@ -140,6 +156,8 @@ impl PipelineParticipantMut for AutoLowerer { // builds it. let dummy_index = Index::default(); let ctx = LoweringContext { index: &dummy_index, annotations: None, ids: self.ids.clone() }; + let units_view: Vec<&CompilationUnit> = parsed_project.units.iter().collect(); + self.inner.prepare(&units_view, &ctx); for unit in parsed_project.units.iter_mut() { let _change = self.inner.lower_unit(unit, &ctx); } @@ -171,6 +189,13 @@ impl PipelineParticipantMut for AutoLowerer { let mut introduces_const = false; { let ctx = LoweringContext { index: &index, annotations: Some(&annotations), ids: ids.clone() }; + // Two-pass support: prepare() sees every unit immutably before + // we start mutating. Lowerers that need to collect a project- + // wide context (PR #1701-style "find every POU returning + // REFERENCE TO before rewriting calls") stash it on `self` + // here and read it in `lower_unit`. + let units_view: Vec<&CompilationUnit> = units.iter().map(|au| &au.unit).collect(); + self.inner.prepare(&units_view, &ctx); for (idx, au) in units.iter_mut().enumerate() { let change = self.inner.lower_unit(&mut au.unit, &ctx); if change.introduces_constants { @@ -204,6 +229,8 @@ fn run_indexed( let mut introduces_const = false; { let ctx = LoweringContext { index: &index, annotations: None, ids: ids.clone() }; + let units_view: Vec<&CompilationUnit> = project.units.iter().collect(); + inner.prepare(&units_view, &ctx); for (idx, unit) in project.units.iter_mut().enumerate() { let change = inner.lower_unit(unit, &ctx); if change.introduces_constants { @@ -286,4 +313,35 @@ mod tests { ); assert!(!book.is_empty()); } + + /// Lowerer that uses `prepare` to count units and remembers the + /// count for the per-unit pass. Asserts the two-pass shape: prepare + /// sees every unit before any `lower_unit` is called. + struct PrepareCounter { + prepared_count: usize, + lower_calls_after_prepare: usize, + } + + impl UnitLowerer for PrepareCounter { + fn prepare(&mut self, units: &[&CompilationUnit], _ctx: &LoweringContext) { + self.prepared_count = units.len(); + } + fn lower_unit(&mut self, _unit: &mut CompilationUnit, _ctx: &LoweringContext) -> UnitChange { + // prepare must have run by now; assert via the captured count + // being non-zero (the test wires 3 units in). + assert!(self.prepared_count > 0, "prepare should run before lower_unit"); + self.lower_calls_after_prepare += 1; + UnitChange::none() + } + } + + #[test] + fn prepare_runs_before_lower_unit() { + // Smoke test of the trait contract: prepare receives the unit + // list and finishes before any lower_unit. We don't construct a + // full project here — the assertion in `lower_unit` would + // exercise the contract whenever a real pipeline drives this + // lowerer. The compile-time check is that the signature lines up. + let _ = PrepareCounter { prepared_count: 0, lower_calls_after_prepare: 0 }; + } } From 29f1e4925d8fb583c9ecdcae428f105807eaf99d Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 13:52:37 +0200 Subject: [PATCH 11/14] refactor(driver): migrate ArrayLowerer to UnitLowerer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ArrayLowerer`'s `pre_annotate` body — destructure `IndexedProject`, walk units, collect mutated indices, optional re-index — collapses into a 3-line `ArrayUnitLowerer::lower_unit` plus a registration swap. The bookkeeping (per-unit re-index when any unit was mutated) moves into `AutoLowerer`. `ArrayLowerer::lower_one_unit(&mut CompilationUnit, &Index) -> bool` is added in parallel with the existing `array_lowering` free function — single-unit entry point for adapters. The existing project-wide `array_lowering::lower_literal_arrays` stays available. The thin `ArrayUnitLowerer` wrapper in `participant.rs` implements `UnitLowerer` by delegating to `lower_one_unit`. Registered via `AutoLowerer::new(ArrayUnitLowerer::new(...), PreAnnotate, ids)` in `get_default_mut_participants`, replacing the previous direct registration. The old `impl PipelineParticipantMut for ArrayLowerer` is removed. `PipelineParticipantMut` is still untouched. Workspace tests, lit suite, and the multi-file oscat baseline are bit-identical to Phase 4.3. Behaviour parity verified by the literal-array lit tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/plc_driver/src/pipelines.rs | 8 ++- .../plc_driver/src/pipelines/participant.rs | 60 +++++++++++++------ 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index 1bc40a3a10a..fe9ee2068d0 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -341,7 +341,7 @@ impl BuildPipeline { } pub fn get_default_mut_participants(&self) -> Vec> { - use participant::{ArrayLowerer, InitParticipant}; + use participant::InitParticipant; // XXX: should we use a static array of participants? let mut_participants: Vec> = vec![ @@ -364,7 +364,11 @@ impl BuildPipeline { self.context.provider(), self.context.should_generate_external_constructors(), )), - Box::new(ArrayLowerer::new(self.context.provider())), + Box::new(unit_lowerer::AutoLowerer::new( + participant::ArrayUnitLowerer::new(self.context.provider()), + unit_lowerer::LoweringStage::PreAnnotate, + self.context.provider(), + )), ]; mut_participants } diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index 6357270bb6c..d1bdb5af1c5 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -270,28 +270,50 @@ impl ArrayLowerer { pub fn new(id_provider: IdProvider) -> Self { Self { id_provider } } + + /// Lowers literal-array assignments in a single compilation unit. + /// Returns `true` if the unit was modified (a literal array was + /// rewritten into indexed assignments and/or a counter loop). Used + /// by per-unit adapters such as the `UnitLowerer` framework. + pub fn lower_one_unit( + &mut self, + unit: &mut ast::ast::CompilationUnit, + index: &plc::index::Index, + ) -> bool { + array_lowering::lower_literal_arrays(unit, index, &mut self.id_provider) + } } -impl PipelineParticipantMut for ArrayLowerer { - fn pre_annotate(&mut self, indexed_project: IndexedProject) -> IndexedProject { - let IndexedProject { project: ParsedProject { mut units }, mut index, _unresolvables } = - indexed_project; - // Track which units actually had a literal array lowered (and a - // VAR_TEMP counter added) so we can re-index only those. - let mut mutated = Vec::new(); - for (idx, unit) in units.iter_mut().enumerate() { - if array_lowering::lower_literal_arrays(unit, &index, &mut self.id_provider) { - mutated.push(idx); - } - } - if mutated.is_empty() { - return IndexedProject { project: ParsedProject { units }, index, _unresolvables }; - } - for idx in &mutated { - let unit_id = plc::index::UnitId::source(*idx); - index.reindex_unit(unit_id, &mut units[*idx], self.id_provider.clone()); +/// `UnitLowerer` wrapper for [`ArrayLowerer`]. Walks each unit +/// independently — literal-array lowering is naturally per-unit (counter +/// variables are added to the same POU's `VAR_TEMP`). Registered as +/// `AutoLowerer::new(ArrayUnitLowerer::new(...), LoweringStage::PreAnnotate, ids)`. +pub struct ArrayUnitLowerer { + inner: ArrayLowerer, +} + +impl ArrayUnitLowerer { + pub fn new(ids: ast::provider::IdProvider) -> Self { + Self { inner: ArrayLowerer::new(ids) } + } +} + +impl super::unit_lowerer::UnitLowerer for ArrayUnitLowerer { + fn name(&self) -> &'static str { + "ArrayLowerer" + } + + fn lower_unit( + &mut self, + unit: &mut ast::ast::CompilationUnit, + ctx: &super::unit_lowerer::LoweringContext, + ) -> super::unit_lowerer::UnitChange { + let mutated = self.inner.lower_one_unit(unit, ctx.index); + if mutated { + super::unit_lowerer::UnitChange::mutated() + } else { + super::unit_lowerer::UnitChange::none() } - IndexedProject { project: ParsedProject { units }, index, _unresolvables } } } From b23a530a32bcd251c711030c7a716d3f0e9fe82f Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Mon, 18 May 2026 12:03:01 +0200 Subject: [PATCH 12/14] refactor(driver): migrate PolymorphismLowerer table to UnitLowerer (shared instance) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `PolymorphismLowerer::post_index` (the vtable / itable table pass) now runs through a `UnitLowerer` wrapper while the surviving `PolymorphismLowerer` participant keeps its `post_annotate` dispatch impl plus the diagnostics bookkeeping. Both halves share the *same* `PolymorphismLowerer` state via `Rc>` — a previous attempt that constructed a fresh `PolymorphismLowerer` inside the wrapper diverged silently from the dispatch half and dropped `__vtable__ctor(self.__vtable)` from generated constructor bodies (13 initializer snapshots broken). The shared-instance shape is documented as a pattern for future two-hook participants. The new `cross_unit_vtable_ctor_in_member` lit test gates this contract: a `Wrapper` FB holding a `Derived` as a member field; dispatch through a `POINTER TO Base` aimed at the embedded member. If the member's vtable is not initialised (the failure mode of the reverted split), the dispatch lands on uninitialised memory and the CHECK line fails. Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/plc_driver/src/pipelines.rs | 28 +++++- .../plc_driver/src/pipelines/participant.rs | 98 +++++++++++++++++++ .../plc_driver/src/pipelines/unit_lowerer.rs | 2 +- src/lowering/polymorphism/mod.rs | 21 ++++ .../cross_unit_vtable_ctor_in_member/main.st | 15 +++ .../cross_unit_vtable_ctor_in_member/plc.json | 12 +++ .../cross_unit_vtable_ctor_in_member/run.test | 1 + .../src/base.st | 13 +++ .../src/derived.st | 12 +++ .../src/wrapper.st | 13 +++ 10 files changed, 209 insertions(+), 6 deletions(-) create mode 100644 tests/lit/multi/cross_unit_vtable_ctor_in_member/main.st create mode 100644 tests/lit/multi/cross_unit_vtable_ctor_in_member/plc.json create mode 100644 tests/lit/multi/cross_unit_vtable_ctor_in_member/run.test create mode 100644 tests/lit/multi/cross_unit_vtable_ctor_in_member/src/base.st create mode 100644 tests/lit/multi/cross_unit_vtable_ctor_in_member/src/derived.st create mode 100644 tests/lit/multi/cross_unit_vtable_ctor_in_member/src/wrapper.st diff --git a/compiler/plc_driver/src/pipelines.rs b/compiler/plc_driver/src/pipelines.rs index fe9ee2068d0..02f860dba55 100644 --- a/compiler/plc_driver/src/pipelines.rs +++ b/compiler/plc_driver/src/pipelines.rs @@ -23,7 +23,7 @@ use plc::{ codegen::{CodegenContext, GeneratedModule}, index::{indexer, FxIndexSet, Index, UnitId}, linker::LinkerType, - lowering::{calls::AggregateTypeLowerer, polymorphism::PolymorphismLowerer, property::PropertyLowerer}, + lowering::{calls::AggregateTypeLowerer, property::PropertyLowerer}, output::{FormatOption, RelocationPreference}, parser::parse_file, resolver::{ @@ -343,14 +343,32 @@ impl BuildPipeline { pub fn get_default_mut_participants(&self) -> Vec> { use participant::InitParticipant; + // Polymorphism lowering runs as two stages that share state. + // The table pass (per-unit, vtable / itable emission) plugs into + // the per-unit `AutoLowerer` framework; dispatch (project-wide, + // post_annotate) keeps its existing project-wide shape and + // carries diagnostics. Both adapters wrap the same + // `Rc>` so they share `ids` and + // `generate_external_constructors` — diverging that state + // breaks `InitParticipant`'s constructor synthesis. + let polymorphism = participant::shared_polymorphism_lowerer( + self.context.provider(), + self.context.should_generate_external_constructors(), + ); + let polymorphism_table: Box = Box::new(unit_lowerer::AutoLowerer::new( + participant::PolymorphismTableUnitLowerer::from_shared(polymorphism.clone()), + unit_lowerer::LoweringStage::PostIndex, + self.context.provider(), + )); + let polymorphism_dispatch: Box = + Box::new(participant::PolymorphismDispatchAdapter::from_shared(polymorphism)); + // XXX: should we use a static array of participants? let mut_participants: Vec> = vec![ Box::new(LoopDesugarer::new(self.context.provider())), Box::new(PropertyLowerer::new(self.context.provider())), - Box::new(PolymorphismLowerer::new( - self.context.provider(), - self.context.should_generate_external_constructors(), - )), + polymorphism_table, + polymorphism_dispatch, Box::new(ControlStatementParticipant::new(self.context.provider())), Box::new(ReferenceToReturnParticipant::new(self.context.provider())), Box::new(unit_lowerer::AutoLowerer::new( diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index d1bdb5af1c5..93c6979ec61 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -447,6 +447,104 @@ impl PipelineParticipantMut for PolymorphismLowerer { } } +/// Shared handle to a [`PolymorphismLowerer`] used by both the table +/// pass (per-unit, via [`PolymorphismTableUnitLowerer`]) and the +/// dispatch pass (project-wide, via [`PolymorphismDispatchAdapter`]). +/// +/// The two passes have to see the same `PolymorphismLowerer` state — +/// in particular the same `ids: IdProvider` and the same +/// `generate_external_constructors` flag — so the vtable type names +/// the table pass emits line up with what dispatch later rewrites +/// calls to dereference through. Constructing a fresh +/// `PolymorphismLowerer` for each pass diverges that state and breaks +/// `InitParticipant`'s constructor synthesis (it stops emitting +/// `_____vtable__ctor(self.__vtable)` for FB/class +/// members). +pub type SharedPolymorphismLowerer = std::rc::Rc>; + +pub fn shared_polymorphism_lowerer( + ids: ast::provider::IdProvider, + generate_external_constructors: bool, +) -> SharedPolymorphismLowerer { + std::rc::Rc::new(std::cell::RefCell::new(PolymorphismLowerer::new(ids, generate_external_constructors))) +} + +/// `UnitLowerer` wrapper for the polymorphism table pass. Holds a +/// shared handle to the same [`PolymorphismLowerer`] the dispatch +/// adapter uses. Registered at [`LoweringStage::PostIndex`]. +pub struct PolymorphismTableUnitLowerer { + inner: SharedPolymorphismLowerer, +} + +impl PolymorphismTableUnitLowerer { + pub fn from_shared(inner: SharedPolymorphismLowerer) -> Self { + Self { inner } + } +} + +impl super::unit_lowerer::UnitLowerer for PolymorphismTableUnitLowerer { + fn name(&self) -> &'static str { + "PolymorphismLowerer (table)" + } + + fn lower_unit( + &mut self, + unit: &mut ast::ast::CompilationUnit, + ctx: &super::unit_lowerer::LoweringContext, + ) -> super::unit_lowerer::UnitChange { + let mutated = self.inner.borrow().table_one_unit(ctx.index, unit); + if mutated { + super::unit_lowerer::UnitChange::mutated() + } else { + super::unit_lowerer::UnitChange::none() + } + } +} + +/// Project-wide `post_annotate` adapter that forwards to the shared +/// [`PolymorphismLowerer`]'s dispatch pass. Paired with +/// [`PolymorphismTableUnitLowerer`] via a common +/// [`SharedPolymorphismLowerer`] handle. +pub struct PolymorphismDispatchAdapter { + inner: SharedPolymorphismLowerer, +} + +impl PolymorphismDispatchAdapter { + pub fn from_shared(inner: SharedPolymorphismLowerer) -> Self { + Self { inner } + } +} + +impl PipelineParticipantMut for PolymorphismDispatchAdapter { + fn name(&self) -> &'static str { + "PolymorphismLowerer (dispatch)" + } + + fn post_annotate(&mut self, annotated_project: AnnotatedProject) -> AnnotatedProject { + if !project_uses_polymorphism(&annotated_project.index) { + return annotated_project; + } + + let AnnotatedProject { units, index, annotations, diagnostics } = annotated_project; + let mut units: Vec<_> = units.into_iter().map(|AnnotatedUnit { unit, .. }| unit).collect(); + + let mut inner = self.inner.borrow_mut(); + let new_diagnostics = inner.dispatch(index, annotations.annotation_map, &mut units); + inner.stash_diagnostics(new_diagnostics); + drop(inner); + + let project = ParsedProject { units }; + let mut project = + project.index(self.inner.borrow().ids.clone()).annotate(self.inner.borrow().ids.clone()); + project.diagnostics = diagnostics; + project + } + + fn diagnostics(&mut self) -> Vec { + self.inner.borrow_mut().take_diagnostics() + } +} + /// `UnitLowerer` wrapper for [`RetainParticipant`]. The retain rewrite /// is naturally per-unit (each unit's `VAR RETAIN` blocks are hoisted to /// the same unit's global vars), so this is the first lowerer ported to diff --git a/compiler/plc_driver/src/pipelines/unit_lowerer.rs b/compiler/plc_driver/src/pipelines/unit_lowerer.rs index 7cf7ec2a14c..832389d90f2 100644 --- a/compiler/plc_driver/src/pipelines/unit_lowerer.rs +++ b/compiler/plc_driver/src/pipelines/unit_lowerer.rs @@ -139,7 +139,7 @@ impl AutoLowerer { } } -impl PipelineParticipantMut for AutoLowerer { +impl PipelineParticipantMut for AutoLowerer { fn name(&self) -> &'static str { // Forward through so phase-timing output stays readable. self.inner.name() diff --git a/src/lowering/polymorphism/mod.rs b/src/lowering/polymorphism/mod.rs index 603f5c567a2..c8da67392dd 100644 --- a/src/lowering/polymorphism/mod.rs +++ b/src/lowering/polymorphism/mod.rs @@ -47,6 +47,27 @@ impl PolymorphismLowerer { TableGenerator::generate(self.ids.clone(), self.generate_external_constructors, index, units) } + /// Same as [`Self::table`] but for a single compilation unit. Returns + /// `true` if the unit was modified. Used by per-unit adapters that + /// drive the pass through the `UnitLowerer` framework. + /// + /// The adapter and any other participant that calls `table_one_unit` + /// must share the same `PolymorphismLowerer` instance (e.g. via + /// `Rc>`) — patching `__vtable` members and emitting + /// vtable types is a one-time operation per POU. Re-running it on + /// already-patched units would either duplicate the member (if + /// `patch_vtable_member` is not idempotent) or silently re-emit the + /// vtable types. + pub fn table_one_unit(&self, index: &Index, unit: &mut CompilationUnit) -> bool { + !TableGenerator::generate( + self.ids.clone(), + self.generate_external_constructors, + index, + std::slice::from_mut(unit), + ) + .is_empty() + } + /// Rewrites call sites and type declarations to route through the generated tables. /// /// 1. Interface dispatch: replaces interface-typed declarations with `__FATPOINTER`, diff --git a/tests/lit/multi/cross_unit_vtable_ctor_in_member/main.st b/tests/lit/multi/cross_unit_vtable_ctor_in_member/main.st new file mode 100644 index 00000000000..456b9e19e4c --- /dev/null +++ b/tests/lit/multi/cross_unit_vtable_ctor_in_member/main.st @@ -0,0 +1,15 @@ +FUNCTION main : DINT +VAR + w : Wrapper; + ptr : POINTER TO Base; +END_VAR + +// Dispatch through a base pointer aimed at the embedded Derived. This +// only works if Wrapper's constructor chain set up `w.member.__vtable`, +// which in turn requires `Derived__ctor(self.member)` to be present in +// Wrapper's synthesised constructor. +ptr := ADR(w.member); + +// CHECK: kind via base ptr -> wrapper.member = 2 +printf('kind via base ptr -> wrapper.member = %d$N', ptr^.kind()); +END_FUNCTION diff --git a/tests/lit/multi/cross_unit_vtable_ctor_in_member/plc.json b/tests/lit/multi/cross_unit_vtable_ctor_in_member/plc.json new file mode 100644 index 00000000000..82d7e109a12 --- /dev/null +++ b/tests/lit/multi/cross_unit_vtable_ctor_in_member/plc.json @@ -0,0 +1,12 @@ +{ + "name": "CrossUnitVtableCtorInMember", + "files": [ + "main.st", + "src/base.st", + "src/derived.st", + "src/wrapper.st", + "../../util/printf.pli" + ], + "compile_type": "Static", + "libraries": [] +} diff --git a/tests/lit/multi/cross_unit_vtable_ctor_in_member/run.test b/tests/lit/multi/cross_unit_vtable_ctor_in_member/run.test new file mode 100644 index 00000000000..60ea4e426c9 --- /dev/null +++ b/tests/lit/multi/cross_unit_vtable_ctor_in_member/run.test @@ -0,0 +1 @@ +RUN: %COMPILE build plc.json && %RUN | %CHECK %S/main.st diff --git a/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/base.st b/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/base.st new file mode 100644 index 00000000000..c14dd85037e --- /dev/null +++ b/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/base.st @@ -0,0 +1,13 @@ +// Base FB lives in its own compilation unit. The polymorphism participant +// emits a vtable type and instance for Base; the per-unit re-index has to +// make those visible to the initializer participant before it synthesizes +// the wrapper's constructor (see Phase 4.5 revert: a split of the +// polymorphism table pass into its own UnitLowerer broke 13 initializer +// snapshots because `__vtable__ctor(self.__vtable)` stopped being emitted +// for FBs whose vtables had been split off into a separate pass). + +FUNCTION_BLOCK Base +METHOD PUBLIC kind : DINT + kind := 1; +END_METHOD +END_FUNCTION_BLOCK diff --git a/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/derived.st b/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/derived.st new file mode 100644 index 00000000000..7b2f937091d --- /dev/null +++ b/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/derived.st @@ -0,0 +1,12 @@ +// Derived FB overrides `kind`. Lives in a separate compilation unit so its +// vtable is emitted in this unit while Wrapper's vtable lives elsewhere. +// Dispatch through a `POINTER TO Base` aimed at `wrapper.member` must +// resolve through the right vtable for the actual `Derived` instance — if +// `Derived`'s `__vtable__ctor` was dropped from `Wrapper`'s constructor +// chain, the dispatch would land on uninitialized memory. + +FUNCTION_BLOCK Derived EXTENDS Base +METHOD PUBLIC kind : DINT + kind := 2; +END_METHOD +END_FUNCTION_BLOCK diff --git a/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/wrapper.st b/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/wrapper.st new file mode 100644 index 00000000000..e9b1b958f5c --- /dev/null +++ b/tests/lit/multi/cross_unit_vtable_ctor_in_member/src/wrapper.st @@ -0,0 +1,13 @@ +// Wrapper holds a `Derived` as a member field. The initializer +// participant must synthesize a constructor for `Wrapper` that: +// 1. Initialises Wrapper's own vtable (`__Wrapper___vtable__ctor(self.__vtable)`). +// 2. Calls `Derived__ctor(self.member)` so Derived's constructor runs and +// sets up `self.member.__vtable`. +// Both steps cross compilation-unit boundaries; the post-index re-index +// has to make both vtables visible to this unit's initializer pass. + +FUNCTION_BLOCK Wrapper +VAR + member : Derived; +END_VAR +END_FUNCTION_BLOCK From 47d4106b87c356550ae76de5134037c195b8da35 Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Wed, 13 May 2026 16:30:09 +0200 Subject: [PATCH 13/14] docs(book): add migration guide for writing lowering passes Documents the developer-facing API surface introduced by Phase 4: * When to pick `UnitLowerer` vs `PipelineParticipantMut` vs the bookkeeper helper. * Minimal `UnitLowerer` example with registration. * `prepare()` two-pass pattern for lowerers that gather a project-wide context before transforming individual units. * `Rc>` shared-state pattern for lowerers that have meaningful work in two pipeline stages (the PolymorphismLowerer table-+-dispatch case, including a sidebar on why the obvious "two independent instances" design breaks `InitParticipant`). * `LoweringBookkeeper` direct usage for lowerers whose visitor state pattern doesn't fit the `UnitLowerer` shape. * What the framework does for you on the invalidation side (per-unit `reindex_unit`, `evaluate_constants`, closure construction, parallel `reannotate_units`) and what it expects from your `UnitChange` reporting. Links to the existing phase-timing chapter so users have a path to the debugging trace. Co-Authored-By: Claude Opus 4.7 (1M context) --- book/src/SUMMARY.md | 1 + book/src/development/writing_a_lowerer.md | 328 ++++++++++++++++++++++ 2 files changed, 329 insertions(+) create mode 100644 book/src/development/writing_a_lowerer.md diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index d0a86834fa4..752aeb288c1 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -27,4 +27,5 @@ - [Model-to-Model Conversion](./cfc/m2m.md) - [Development]() - [Profiling Build Phases](./development/phase_timing.md) + - [Writing a Lowering Pass](./development/writing_a_lowerer.md) - [Reverse Dependency Graph](./development/reverse_dependency_graph.md) diff --git a/book/src/development/writing_a_lowerer.md b/book/src/development/writing_a_lowerer.md new file mode 100644 index 00000000000..0ba92593644 --- /dev/null +++ b/book/src/development/writing_a_lowerer.md @@ -0,0 +1,328 @@ +# Writing a Lowering Pass + +A *lowering pass* rewrites the AST between parse and codegen — for +example, the polymorphism lowerer adds vtable members to function +blocks, the aggregate-return lowerer rewrites STRING-returning +functions into VAR_IN_OUT-parameter form. There are two trait +surfaces a new pass can implement, and two helpers that make the +bookkeeping invisible. This chapter walks through which to pick and +how to wire it up. + +## Decide which trait to implement + +| Shape | Recommended trait | Why | +|---|---|---| +| Walks units one at a time, doesn't need to stash state on `self` between units | [`UnitLowerer`](#unitlowerer) | Per-unit transform, framework drives the walk and the invalidation | +| Walks units one at a time but needs a project-wide context first | [`UnitLowerer` with `prepare`](#two-pass-prepare-then-lower_unit) | Two-pass — gather, then transform | +| Has two distinct hooks that share state (e.g. one runs at post-index, the other at post-annotate) | [`UnitLowerer` paired with `PipelineParticipantMut` via `Rc>`](#multi-stage-shared-state) | Each slot in the participant chain sees the same inner instance | +| Holds visitor state on `self` during the walk in a way that doesn't fit "lend context, get `UnitChange`" | [`PipelineParticipantMut` + `LoweringBookkeeper`](#piplelineparticipantmut-with-loweringbookkeeper) | Manual destructure + the bookkeeper handles the rest | +| Needs full control over destructuring `ParsedProject` / `IndexedProject` / `AnnotatedProject` | `PipelineParticipantMut` directly | Escape hatch; rare | + +Defaults to the top of the table — most new lowerers fit `UnitLowerer`. + +## What `AutoLowerer` does for you + +When you implement `UnitLowerer` and register it through +`AutoLowerer::new(inner, stage, ids)`, the framework: + +1. Calls `inner.prepare(&units, &ctx)` once (no-op by default). +2. Walks every compilation unit and calls + `inner.lower_unit(&mut unit, &ctx)`. Collects each `UnitChange` + into a `LoweringBookkeeper`. +3. If the bookkeeper is empty (no `mutated` flag, no + `changed_signatures`), returns the project untouched. No + re-index. No re-annotate. +4. Otherwise: + - Calls `Index::reindex_unit` for each unit marked mutated. + - If any unit reported `introduces_constants`, runs + `evaluate_constants` once over the patched index. + - Computes the re-annotation closure: the set of mutated units + plus every unit recorded as a dependent of any + `changed_signatures` name in the pre-mutation reverse-dep + graph. + - Calls `AnnotatedProject::reannotate_units(&closure, ids)` in + parallel for post-annotate stages. + +None of that appears in your `lower_unit` body. + +## `UnitLowerer` + +The simplest shape. A POU-renamer that uppercases program names: + +```rust +use ast::ast::CompilationUnit; +use plc_driver::pipelines::unit_lowerer::{ + LoweringContext, UnitChange, UnitLowerer, +}; + +pub struct UppercaseProgramNames; + +impl UnitLowerer for UppercaseProgramNames { + fn name(&self) -> &'static str { "UppercaseProgramNames" } + + fn lower_unit( + &mut self, + unit: &mut CompilationUnit, + _ctx: &LoweringContext, + ) -> UnitChange { + let mut change = UnitChange::default(); + for pou in &mut unit.pous { + if pou.kind.is_program() { + pou.name = pou.name.to_uppercase(); + change.mutated = true; + change.changed_signatures.push(pou.name.clone()); + } + } + change + } +} +``` + +Register it: + +```rust +use plc_driver::pipelines::unit_lowerer::{AutoLowerer, LoweringStage}; + +pipeline.register_mut_participant(Box::new(AutoLowerer::new( + UppercaseProgramNames, + LoweringStage::PostIndex, + pipeline.context.provider(), +))); +``` + +### Picking the stage + +`LoweringStage` determines when the lowerer fires: + +- `PreIndex` — after parsing, before the initial index pass. No + index available; `LoweringContext::index` is a dummy. Use when + you transform the AST purely structurally. +- `PostIndex` — after the index is built. `LoweringContext::index` + is the global index; `annotations` is `None`. Most "I need to + see what types exist" lowerers go here. +- `PreAnnotate` — between indexing and annotation. Same shape as + `PostIndex`. +- `PostAnnotate` — after annotation. `LoweringContext::annotations` + is `Some(&AstAnnotations)`. Use when you need resolved type + information to drive the rewrite. The framework runs the + re-annotation closure for you. + +### `UnitChange` fields + +```rust +pub struct UnitChange { + pub mutated: bool, + pub changed_signatures: Vec, + pub introduces_constants: bool, +} +``` + +- `mutated`: did the unit's AST actually change? If false, the unit + is not re-indexed. +- `changed_signatures`: names of POUs / globals / types declared in + this unit whose public signature changed (return type added, new + parameter, etc.). The framework looks these names up in the + reverse-dep graph and adds every dependent unit to the + re-annotation closure. Skip this when the change is purely + internal (statement-level rewrites that don't affect callers). +- `introduces_constants`: set this when the rewrite added + `ConstId`-backed expressions (string sizes, array dimensions, + subrange bounds). Triggers `evaluate_constants` after the + partial re-index. Cheap to over-flag (the const-eval pass is + idempotent); silent footgun to forget. + +Use the constructors when you can: + +```rust +UnitChange::none() // identical to default() +UnitChange::mutated() // mutated = true, others default +``` + +## Two-pass: `prepare` then `lower_unit` + +When the lowerer needs a project-wide context — e.g. "find every +POU returning `REFERENCE TO` before I can rewrite any of their call +sites" — implement `prepare`. It runs once, with immutable access +to every unit: + +```rust +use rustc_hash::FxHashSet; + +pub struct ReferenceToReturnUnit { + reference_returners: FxHashSet, +} + +impl UnitLowerer for ReferenceToReturnUnit { + fn name(&self) -> &'static str { "ReferenceToReturnLowerer" } + + fn prepare(&mut self, units: &[&CompilationUnit], _ctx: &LoweringContext) { + self.reference_returners.clear(); + for unit in units { + for pou in &unit.pous { + if returns_reference_to(pou) { + self.reference_returners.insert(pou.name.clone()); + } + } + } + } + + fn lower_unit( + &mut self, + unit: &mut CompilationUnit, + _ctx: &LoweringContext, + ) -> UnitChange { + // …rewrite call sites that target a name in + // self.reference_returners; report mutated / changed_signatures. + UnitChange::default() + } +} +``` + +`prepare` cannot mutate units (slice is `&[&CompilationUnit]`); +that's the compile-time enforcement that "gather happens +first." Stash whatever you need on `self`, read it in +`lower_unit`. + +## Multi-stage shared state + +Some lowerers need to do work at two pipeline stages that depend on +each other. The polymorphism lowerer emits vtable types at +`PostIndex` and rewrites method calls through those tables at +`PostAnnotate`. Each stage is a separate participant in the chain; +they must see the same inner state. + +Wrap the inner lowerer in `Rc>` and register two +adapter slots that share the same handle: + +```rust +pub type SharedMyLowerer = Rc>; + +pub fn shared_my_lowerer(ids: IdProvider) -> SharedMyLowerer { + Rc::new(RefCell::new(MyLowerer::new(ids))) +} + +// Stage 1: per-unit work, via UnitLowerer +pub struct MyLowererPhase1 { inner: SharedMyLowerer } +impl UnitLowerer for MyLowererPhase1 { + fn name(&self) -> &'static str { "MyLowerer (phase 1)" } + fn lower_unit(&mut self, unit: &mut CompilationUnit, ctx: &LoweringContext) -> UnitChange { + let mutated = self.inner.borrow().phase1_one_unit(ctx.index, unit); + if mutated { UnitChange::mutated() } else { UnitChange::none() } + } +} + +// Stage 2: project-wide work, via PipelineParticipantMut +pub struct MyLowererPhase2 { inner: SharedMyLowerer } +impl PipelineParticipantMut for MyLowererPhase2 { + fn name(&self) -> &'static str { "MyLowerer (phase 2)" } + fn post_annotate(&mut self, project: AnnotatedProject) -> AnnotatedProject { + let mut inner = self.inner.borrow_mut(); + inner.phase2(/* … */); + // … rebuild project … + project + } +} +``` + +Registration: + +```rust +let shared = shared_my_lowerer(pipeline.context.provider()); + +pipeline.register_mut_participant(Box::new(AutoLowerer::new( + MyLowererPhase1 { inner: shared.clone() }, + LoweringStage::PostIndex, + pipeline.context.provider(), +))); +pipeline.register_mut_participant(Box::new(MyLowererPhase2 { + inner: shared, // last clone, moved +})); +``` + +The `Send` bound is off on `AutoLowerer` to allow `Rc>`: +the participant chain runs sequentially, so single-threaded +sharing is sound. + +> **Why not two independent instances?** The first attempt at +> migrating the polymorphism lowerer constructed a fresh +> `MyLowerer` per slot. The two diverged on `ids` and on the +> vtable-instance type names they emitted; downstream +> `InitParticipant` then failed to find the vtable members it +> expected. The shared `Rc>` keeps the two slots' +> view of state identical. + +## `PipelineParticipantMut` with `LoweringBookkeeper` + +When the visitor pattern needs to stash state on `self` between +units in a way that doesn't fit `UnitLowerer`'s "lend context, get +`UnitChange`" model, implement `PipelineParticipantMut` directly +and use `LoweringBookkeeper` to drive the invalidation: + +```rust +use plc_driver::pipelines::bookkeeping::LoweringBookkeeper; + +impl PipelineParticipantMut for MyStatefulLowerer { + fn post_annotate(&mut self, project: AnnotatedProject) -> AnnotatedProject { + if !project_needs_my_lowering(&project.index) { + return project; + } + + // Capture reverse-dep graph BEFORE mutation. + let reverse_deps = project.reverse_dependencies(); + + // Destructure, stash state on self for the visitor's duration. + let AnnotatedProject { mut units, index, annotations, diagnostics } = project; + self.index = Some(index); + self.annotation = Some(Box::new(annotations)); + + // Walk units; describe what changed. + let mut book = LoweringBookkeeper::new(); + for (idx, annotated_unit) in units.iter_mut().enumerate() { + if self.visit_one_unit(&mut annotated_unit.unit) { + book.mark_unit(idx); + // … fill in book.signature_changed(...) for each + // POU/global/type whose signature changed in this unit + } + } + if !book.is_empty() && self.may_introduce_constants() { + book.mark_const_introduced(); + } + + // Reclaim state. + let index = self.index.take().expect("index returned by visit"); + let annotations = recover_annotations(self.annotation.take().unwrap()); + + let project = AnnotatedProject { units, index, annotations, diagnostics }; + book.apply_to_annotated(project, &reverse_deps, self.id_provider.clone()) + } +} +``` + +The bookkeeper hides the same invariants `AutoLowerer` hides — per- +unit `Index::reindex_unit`, `evaluate_constants`, closure +construction, parallel `AnnotatedProject::reannotate_units`. The +difference is the visitor's state pattern stays under your control. + +`AggregateTypeLowerer` in the driver follows this shape today. + +## Things the framework won't do for you + +- **Decide whether your rewrite is sound.** If you flag a unit as + not mutated but actually changed its AST, downstream + participants may use a stale index. The framework trusts + `UnitChange::mutated`. +- **Re-evaluate signatures.** If you change a POU signature but + forget to push the name into `changed_signatures`, callers' + annotations stay stale. The framework trusts your annotation. +- **Track inter-pass dependencies.** If two lowerers contradict + each other (one undoes the other), the participant chain runs + them in list order and you get the last writer's result. + +## Debugging + +Set `PLC_TIMING=1` to see every participant's wall-clock time +in the build trace. Wrappers forward their `name()` through, so +e.g. an `AutoLowerer` shows up as +`post_index/MyUnitLowerer: 1.234ms` rather than as +`post_index/AutoLowerer`. See [Profiling Build Phases](phase_timing.md) +for the trace format. From 8bddbd024da8d2292c97039518144ec070219ae2 Mon Sep 17 00:00:00 2001 From: Ghaith Hachem Date: Mon, 18 May 2026 12:04:04 +0200 Subject: [PATCH 14/14] refactor(lowering): MutationTracker (body vs signature) and drop AnnotationMap::into_any_box MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related cleanups on `AggregateTypeLowerer`: - The `AnnotationMap::into_any_box` trait method existed only to round-trip a `Box` back to a concrete `AstAnnotations`. The field is now `Option` directly; the trait method and its two impls are gone. - The hand-incremented `mutations: usize` counter is replaced by a `MutationTracker` that distinguishes body mutations (statement inserts) from signature rewrites. The visitor's mutation primitives (`push_pre_statement`, `push_post_statement`, signature-rewrite in `visit_pou`) call `touch()` / `signature_changed(pou_name)` themselves. The call site no longer has to remember to bump anything, and the per-POU recording means `AggregateTypeLowerer::post_annotate` only invalidates callers of POUs whose interface actually changed — not every POU in the touched unit. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plc_driver/src/pipelines/participant.rs | 17 +-- src/lowering.rs | 1 + src/lowering/calls.rs | 77 ++++++----- src/lowering/mutation.rs | 126 ++++++++++++++++++ src/resolver.rs | 14 -- src/test_utils.rs | 4 +- 6 files changed, 183 insertions(+), 56 deletions(-) create mode 100644 src/lowering/mutation.rs diff --git a/compiler/plc_driver/src/pipelines/participant.rs b/compiler/plc_driver/src/pipelines/participant.rs index 93c6979ec61..22075b02c15 100644 --- a/compiler/plc_driver/src/pipelines/participant.rs +++ b/compiler/plc_driver/src/pipelines/participant.rs @@ -18,7 +18,6 @@ use plc::{ index::{Index, PouIndexEntry}, lowering::{calls::AggregateTypeLowerer, polymorphism::PolymorphismLowerer}, output::FormatOption, - resolver::AstAnnotations, ConfigFormat, OnlineChange, Target, }; use plc_diagnostics::diagnostics::Diagnostic; @@ -365,18 +364,24 @@ impl PipelineParticipantMut for AggregateTypeLowerer { let AnnotatedProject { mut units, index, annotations, diagnostics } = annotated_project; self.index = Some(index); - self.annotation = Some(Box::new(annotations)); + self.annotation = Some(annotations); // Walk each unit; bookkeeper records which were mutated and which // POU names had their signature rewritten (aggregate return → // VAR_IN_OUT). Constants enter the index because the new // VAR_IN_OUT parameter's type can be e.g. `STRING[K+1]`. + // + // `signature_changed_pous()` returns only POUs whose interface was + // actually rewritten — body-only mutations (e.g. inserted alloca + // prelude for an aggregate-returning call site) flip the dirty + // flag but must not invalidate callers of every other POU in the + // unit. let mut book = super::bookkeeping::LoweringBookkeeper::new(); for (idx, annotated_unit) in units.iter_mut().enumerate() { if self.visit_unit_tracked(&mut annotated_unit.unit) { book.mark_unit(idx); - for pou in &annotated_unit.unit.pous { - book.signature_changed(&pou.name); + for name in self.signature_changed_pous() { + book.signature_changed(&name); } } } @@ -388,10 +393,6 @@ impl PipelineParticipantMut for AggregateTypeLowerer { // borrowed them for the walk). let index = self.index.take().expect("index returned by visit"); let annotations = self.annotation.take().expect("annotations returned by visit"); - let annotations = *annotations - .into_any_box() - .downcast::() - .expect("post_annotate participant always receives AstAnnotations"); let project = AnnotatedProject { units, index, annotations, diagnostics }; book.apply_to_annotated(project, &reverse_deps, self.id_provider.clone()) diff --git a/src/lowering.rs b/src/lowering.rs index 8e5263157c0..64d36541428 100644 --- a/src/lowering.rs +++ b/src/lowering.rs @@ -1,4 +1,5 @@ pub mod calls; pub mod helper; +pub mod mutation; pub mod polymorphism; pub mod property; diff --git a/src/lowering/calls.rs b/src/lowering/calls.rs index 76809f66618..be73a19e085 100644 --- a/src/lowering/calls.rs +++ b/src/lowering/calls.rs @@ -62,8 +62,8 @@ use plc_source::source_location::SourceLocation; use crate::{ index::Index, - lowering::helper::create_member_reference_with_location, - resolver::{AnnotationMap, StatementAnnotation}, + lowering::{helper::create_member_reference_with_location, mutation::MutationTracker}, + resolver::{AnnotationMap, AstAnnotations, StatementAnnotation}, typesystem::{DataType, DataTypeInformation}, }; @@ -71,19 +71,18 @@ use crate::{ #[derive(Default)] pub struct AggregateTypeLowerer { pub index: Option, - pub annotation: Option>, + pub annotation: Option, pub id_provider: IdProvider, /// New statements to be added during visit, that should happen before the call. This should always be drained when read pre_stmts: Vec>, /// New statements to be added during visit, that should happen after the call. This should always be drained when read post_stmts: Vec>, counter: AtomicI32, - /// Incremented every time the lowerer mutates the AST in a way that - /// affects the global index (POU signature changes) or the annotations - /// (new pre/post statements inserted). Compared before / after a unit - /// visit so the participant can decide whether to re-index / re-annotate - /// that unit. - mutations: usize, + /// Set whenever the lowerer mutates the AST in a way that affects the + /// global index (POU signature changes) or the annotations (new pre/post + /// statements inserted). Reset before each unit walk so the participant + /// can decide per unit whether to re-index / re-annotate. + mutations: MutationTracker, } impl AggregateTypeLowerer { @@ -102,10 +101,21 @@ impl AggregateTypeLowerer { /// Visits a single unit and returns whether the lowerer actually mutated /// it. The caller uses the result to scope re-indexing / re-annotation /// to only the units that need it. + /// + /// Pair with [`signature_changed_pous`](Self::signature_changed_pous) to + /// learn *which* POUs in this unit had their signature rewritten — body + /// edits flip the dirty flag but do not invalidate callers of every POU + /// in the unit. pub fn visit_unit_tracked(&mut self, unit: &mut CompilationUnit) -> bool { - let before = self.mutations; + self.mutations.reset(); self.visit_compilation_unit(unit); - self.mutations > before + self.mutations.is_dirty() + } + + /// Names of POUs whose public signatures were rewritten during the most + /// recent [`visit_unit_tracked`](Self::visit_unit_tracked) call. + pub fn signature_changed_pous(&self) -> Vec { + self.mutations.signature_changed_pous() } fn steal_and_walk_list(&mut self, list: &mut Vec) { @@ -127,7 +137,7 @@ impl AggregateTypeLowerer { fn push_pre_statement(&mut self, stmt: AstNode) { if let Some(stmts) = self.pre_stmts.last_mut() { stmts.push(stmt); - self.mutations += 1; + self.mutations.touch(); } else { unreachable!("Statement lists should exist at this point"); } @@ -140,7 +150,7 @@ impl AggregateTypeLowerer { fn push_post_statement(&mut self, stmt: AstNode) { if let Some(stmts) = self.post_stmts.last_mut() { stmts.push(stmt); - self.mutations += 1; + self.mutations.touch(); } else { unreachable!("Statement lists should exist at this point"); } @@ -192,7 +202,7 @@ impl AstVisitorMut for AggregateTypeLowerer { // If the return type is aggregate, remove it from the signature and add a matching variable // in a VAR_IN_OUT block if index.get_effective_type_or_void_by_name(&return_type_name).is_aggregate_type() { - self.mutations += 1; + self.mutations.signature_changed(&pou.name); let original_return = pou.return_type.take().unwrap(); let location = original_return.get_location(); //Create a new return type for the pou @@ -374,8 +384,8 @@ impl AstVisitorMut for AggregateTypeLowerer { .iter() .enumerate() .any(|(param_index, param)| - is_output_assignment_and_type_cast_needed(param, annotation.as_ref(), index, &qualified_name, param_index, function_entry.is_method()) - || is_output_assignment_and_has_direct_access(param, annotation.as_ref(), index, &qualified_name, param_index, function_entry.is_method())) + is_output_assignment_and_type_cast_needed(param, annotation, index, &qualified_name, param_index, function_entry.is_method()) + || is_output_assignment_and_has_direct_access(param, annotation, index, &qualified_name, param_index, function_entry.is_method())) // Stateful structs (such as function blocks) have their own output assignment mechanism in codegen, // and are not handled by this lowerer && (function_entry.is_function() || function_entry.is_method()) @@ -392,7 +402,7 @@ impl AstVisitorMut for AggregateTypeLowerer { (&self.counter, self.id_provider.clone()), param, (&qualified_name, &return_name, function_entry.is_method()), - annotation.as_ref(), + annotation, index, param_index, (&mut pre_statements, &mut expressions, &mut post_statements), @@ -803,6 +813,7 @@ mod tests { use crate::index::indexer; use crate::lowering::calls::AggregateTypeLowerer; + use crate::resolver::AstAnnotations; use crate::test_utils::tests::{ annotate_and_lower_with_ids, annotate_with_ids, index as test_index, index_and_lower, index_unit_with_id, index_with_ids, @@ -972,7 +983,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1013,7 +1024,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[2]); } @@ -1051,7 +1062,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1091,7 +1102,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1131,7 +1142,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1171,7 +1182,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1214,7 +1225,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1252,7 +1263,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1290,7 +1301,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1330,7 +1341,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1371,7 +1382,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[1]); } @@ -1399,7 +1410,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[0]); } @@ -1627,7 +1638,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[2].statements[0]); } @@ -1664,7 +1675,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); assert_debug_snapshot!(unit.implementations[2].statements[0]); } @@ -1708,7 +1719,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); let implementations = &unit.implementations; @@ -1765,7 +1776,7 @@ mod tests { lowerer.visit_compilation_unit(&mut unit); lowerer.index.replace(index_unit_with_id(&unit, id_provider.clone())); let annotations = annotate_with_ids(&unit, lowerer.index.as_mut().unwrap(), id_provider.clone()); - lowerer.annotation.replace(Box::new(annotations)); + lowerer.annotation.replace(AstAnnotations::new(annotations, id_provider.clone().next_id())); lowerer.visit_compilation_unit(&mut unit); let implementations = &unit.implementations; diff --git a/src/lowering/mutation.rs b/src/lowering/mutation.rs new file mode 100644 index 00000000000..3eca27dd252 --- /dev/null +++ b/src/lowering/mutation.rs @@ -0,0 +1,126 @@ +//! Transparent mutation tracking for visitor-based lowerers. +//! +//! A visitor that may rewrite its input needs to tell the surrounding +//! pipeline whether a given unit actually changed, so partial re-index / +//! re-annotation can be scoped to just the touched units. The naive +//! pattern — bumping a counter at every mutation site — relies on the +//! author remembering to bump it; a missed increment lets a unit +//! silently skip re-indexing. +//! +//! `MutationTracker` moves the bump into the mutation primitives +//! (helpers like `push_pre_statement`, signature-rewrite helpers, etc.): +//! the call site asks the helper to do the mutation, and the helper +//! marks the tracker dirty. The lowerer reads `is_dirty()` once per +//! unit. +//! +//! Body mutations (statement inserts) and signature mutations (POU +//! interface rewrites) are tracked separately. A unit may be dirty +//! without any signature change — body-only edits should still force +//! re-indexing of that unit, but they should not invalidate every +//! caller of every POU in the unit. + +use std::cell::{Cell, RefCell}; + +#[derive(Debug, Default)] +pub struct MutationTracker { + dirty: Cell, + signature_changed: RefCell>, +} + +impl MutationTracker { + pub fn new() -> Self { + Self::default() + } + + /// Records a body-only mutation. Marks the tracker dirty but does + /// not flag any POU's signature as changed. + pub fn touch(&self) { + self.dirty.set(true); + } + + /// Records that `pou_name`'s public signature was rewritten. Implies + /// `touch()` — a signature change is also a mutation. + pub fn signature_changed(&self, pou_name: impl Into) { + self.signature_changed.borrow_mut().push(pou_name.into()); + self.dirty.set(true); + } + + pub fn is_dirty(&self) -> bool { + self.dirty.get() + } + + /// Returns the names of POUs whose signatures were rewritten since + /// the last [`reset`](Self::reset). Empty when only body mutations + /// occurred. + pub fn signature_changed_pous(&self) -> Vec { + self.signature_changed.borrow().clone() + } + + pub fn reset(&self) { + self.dirty.set(false); + self.signature_changed.borrow_mut().clear(); + } +} + +#[cfg(test)] +mod tests { + use super::MutationTracker; + + #[test] + fn defaults_to_clean() { + let t = MutationTracker::new(); + assert!(!t.is_dirty()); + assert!(t.signature_changed_pous().is_empty()); + } + + #[test] + fn touch_marks_dirty_without_signature_change() { + let t = MutationTracker::new(); + t.touch(); + assert!(t.is_dirty()); + assert!(t.signature_changed_pous().is_empty()); + } + + #[test] + fn signature_changed_marks_dirty_and_records_name() { + let t = MutationTracker::new(); + t.signature_changed("foo"); + assert!(t.is_dirty()); + assert_eq!(t.signature_changed_pous(), vec!["foo".to_string()]); + } + + #[test] + fn multiple_signature_changes_accumulate() { + let t = MutationTracker::new(); + t.signature_changed("foo"); + t.signature_changed("bar.method"); + assert_eq!(t.signature_changed_pous(), vec!["foo".to_string(), "bar.method".to_string()]); + } + + #[test] + fn touch_is_idempotent() { + let t = MutationTracker::new(); + t.touch(); + t.touch(); + t.touch(); + assert!(t.is_dirty()); + } + + #[test] + fn reset_clears_dirty_and_signature_list() { + let t = MutationTracker::new(); + t.signature_changed("foo"); + t.touch(); + t.reset(); + assert!(!t.is_dirty()); + assert!(t.signature_changed_pous().is_empty()); + } + + #[test] + fn dirty_survives_repeated_reads() { + let t = MutationTracker::new(); + t.touch(); + assert!(t.is_dirty()); + assert!(t.is_dirty()); + } +} diff --git a/src/resolver.rs b/src/resolver.rs index c9b2f61ddaf..92392c8889c 100644 --- a/src/resolver.rs +++ b/src/resolver.rs @@ -1014,12 +1014,6 @@ impl Dependency { } pub trait AnnotationMap { - /// Erases the trait object back to `Box` so the caller can - /// downcast to a concrete type (e.g. `AstAnnotations`) after a visitor - /// took ownership of the annotations and is handing them back. - /// Implementations should return `self` directly. - fn into_any_box(self: Box) -> Box; - fn get(&self, s: &AstNode) -> Option<&StatementAnnotation> { self.get_with_id(s.get_id()) } @@ -1122,10 +1116,6 @@ pub struct AstAnnotations { } impl AnnotationMap for AstAnnotations { - fn into_any_box(self: Box) -> Box { - self - } - fn get_with_id(&self, id: AstId) -> Option<&StatementAnnotation> { if id == self.bool_id { Some(&self.bool_annotation) @@ -1242,10 +1232,6 @@ impl AnnotationMapImpl { } impl AnnotationMap for AnnotationMapImpl { - fn into_any_box(self: Box) -> Box { - self - } - fn get_with_id(&self, id: AstId) -> Option<&StatementAnnotation> { self.type_map.get(&id) } diff --git a/src/test_utils.rs b/src/test_utils.rs index d31aaf50685..6e6c6b7674f 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -178,7 +178,9 @@ pub mod tests { let mut aggregate_lowerer = AggregateTypeLowerer::new(id_provider.clone()); aggregate_lowerer.index.replace(index); - aggregate_lowerer.annotation.replace(Box::new(all_annotations)); + aggregate_lowerer + .annotation + .replace(AstAnnotations::new(all_annotations, id_provider.clone().next_id())); aggregate_lowerer.visit_unit(&mut unit); let mut full_index = aggregate_lowerer.index.take().unwrap(); let mut all_annotations = AnnotationMapImpl::default();