diff --git a/Cargo.lock b/Cargo.lock index 46090621..801cd0cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,7 +148,7 @@ dependencies = [ [[package]] name = "bender-slang" -version = "0.1.0" +version = "0.1.1" dependencies = [ "cmake", "cxx", diff --git a/Cargo.toml b/Cargo.toml index 0b163bf1..25669ce4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ inherits = "release" lto = "thin" [dependencies] -bender-slang = { version = "0.1.0", path = "crates/bender-slang", optional = true } +bender-slang = { version = "0.1.1", path = "crates/bender-slang", optional = true } serde = { version = "1", features = ["derive"] } serde_yaml_ng = "0.10" diff --git a/crates/bender-slang/Cargo.toml b/crates/bender-slang/Cargo.toml index 005d3fc3..7e59f576 100644 --- a/crates/bender-slang/Cargo.toml +++ b/crates/bender-slang/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bender-slang" -version = "0.1.0" +version = "0.1.1" edition = "2024" description = "Internal bender crate: Rust bindings for the Slang SystemVerilog parser" license = "Apache-2.0" diff --git a/crates/bender-slang/cpp/analysis.cpp b/crates/bender-slang/cpp/analysis.cpp index c1989b5d..1233d261 100644 --- a/crates/bender-slang/cpp/analysis.cpp +++ b/crates/bender-slang/cpp/analysis.cpp @@ -1,26 +1,94 @@ // Copyright (c) 2025 ETH Zurich // Tim Fischer +#include "slang/syntax/AllSyntax.h" #include "slang_bridge.h" #include +#include +#include #include +#include +#ifdef _WIN32 +#include +#else +#include +#endif #include #include using namespace slang; +static bool stderr_is_tty() { +#ifdef _WIN32 + return _isatty(_fileno(stderr)) != 0; +#else + return isatty(STDERR_FILENO) != 0; +#endif +} + +// Diagnostic for "a later top-level declaration shadows an earlier one with the same name". +// Lives in the General subsystem; the code is arbitrary but stable. +static const slang::DiagCode kDuplicateTopLevelDecl(slang::DiagSubsystem::General, 9999); +static constexpr std::string_view kDuplicateTopLevelDeclFormat = "module '{}' overwrites previous definition in '{}'"; + rust::Vec reachable_tree_indices(const SlangSession& session, const rust::Vec& tops) { const auto& treeVec = session.trees(); - // Build a mapping from declared symbol names to the index of the tree that - // declares them. + // One engine+client per distinct SourceManager. Each parse group creates its own + // SourceManager (see SlangContext), so trees from different groups need different + // engines; trees within a group share one. + struct DiagState { + std::unique_ptr engine; + std::shared_ptr client; + }; + std::unordered_map diagStates; + const bool tty = stderr_is_tty(); + auto diagFor = [&](const slang::SourceManager& sm) -> DiagState& { + auto [it, inserted] = diagStates.try_emplace(&sm); + if (inserted) { + it->second.engine = std::make_unique(sm); + it->second.client = std::make_shared(); + it->second.client->showColors(tty); + it->second.engine->addClient(it->second.client); + it->second.engine->setMessage(kDuplicateTopLevelDecl, std::string(kDuplicateTopLevelDeclFormat)); + it->second.engine->setSeverity(kDuplicateTopLevelDecl, slang::DiagnosticSeverity::Warning); + } + return it->second; + }; + + // Build the name-to-tree-index map with last-wins semantics, emitting a warning + // whenever a later definition overwrites an earlier one. std::unordered_map nameToTreeIndex; for (size_t i = 0; i < treeVec.size(); ++i) { const auto& metadata = treeVec[i]->getMetadata(); - for (auto name : metadata.getDeclaredSymbols()) { - nameToTreeIndex.emplace(name, i); - } + + auto checkAndInsert = [&](std::string_view name, slang::SourceLocation loc) { + if (name.empty()) + return; + auto [it, inserted] = nameToTreeIndex.emplace(name, i); + if (inserted) + return; + + const auto& prevBufferIds = treeVec[it->second]->getSourceBufferIds(); + std::string_view prevFile = prevBufferIds.empty() + ? std::string_view("") + : treeVec[it->second]->sourceManager().getRawFileName(prevBufferIds[0]); + + auto& state = diagFor(treeVec[i]->sourceManager()); + slang::Diagnostic diag(kDuplicateTopLevelDecl, loc); + diag << name; + diag << prevFile; + state.client->clear(); + state.engine->issue(diag); + std::cerr << state.client->getString(); + it->second = i; + }; + + for (const auto& [decl, _] : metadata.nodeMeta) + checkAndInsert(decl->header->name.valueText(), decl->header->name.location()); + for (const auto classDecl : metadata.classDecls) + checkAndInsert(classDecl->name.valueText(), classDecl->name.location()); } // Build a dependency graph where each tree points to the trees that declare @@ -46,7 +114,8 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con std::string_view name(top.data(), top.size()); auto it = nameToTreeIndex.find(name); if (it == nameToTreeIndex.end()) { - throw std::runtime_error("Top module not found in any parsed source file: " + std::string(name)); + throw std::runtime_error("Top module '" + std::string(name) + "' not found among " + + std::to_string(nameToTreeIndex.size()) + " known top-level declarations."); } startIndices.push_back(it->second); } @@ -75,3 +144,32 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con } return result; } + +// Returns the deduped, canonical filesystem paths of every header file that was actually loaded +// via `include directives while parsing the requested trees. Trees from different parse groups +// may live in different SourceManagers, so the lookup is per-tree. +rust::Vec resolved_include_paths_for(const SlangSession& session, + const rust::Vec& tree_indices) { + const auto& treeVec = session.trees(); + std::unordered_set uniquePaths; + for (auto idx : tree_indices) { + if (idx >= treeVec.size()) + continue; + const auto& tree = treeVec[idx]; + const auto& sm = tree->sourceManager(); + for (const auto& inc : tree->getIncludeDirectives()) { + if (!inc.buffer.id.valid()) + continue; + const auto& fullPath = sm.getFullPath(inc.buffer.id); + if (!fullPath.empty()) { + uniquePaths.insert(fullPath.string()); + } + } + } + rust::Vec out; + out.reserve(uniquePaths.size()); + for (const auto& p : uniquePaths) { + out.push_back(rust::String(p)); + } + return out; +} diff --git a/crates/bender-slang/cpp/slang_bridge.h b/crates/bender-slang/cpp/slang_bridge.h index ac423363..79840a66 100644 --- a/crates/bender-slang/cpp/slang_bridge.h +++ b/crates/bender-slang/cpp/slang_bridge.h @@ -78,6 +78,8 @@ rust::String print_tree(std::shared_ptr tree, SlangPr rust::String dump_tree_json(std::shared_ptr tree); rust::Vec reachable_tree_indices(const SlangSession& session, const rust::Vec& tops); +rust::Vec resolved_include_paths_for(const SlangSession& session, + const rust::Vec& tree_indices); std::size_t tree_count(const SlangSession& session); std::shared_ptr tree_at(const SlangSession& session, std::size_t index); std::uint64_t renamed_declarations(const SyntaxTreeRewriter& rewriter); diff --git a/crates/bender-slang/src/lib.rs b/crates/bender-slang/src/lib.rs index 6930abb5..9a789662 100644 --- a/crates/bender-slang/src/lib.rs +++ b/crates/bender-slang/src/lib.rs @@ -62,6 +62,11 @@ mod ffi { fn reachable_tree_indices(session: &SlangSession, tops: &Vec) -> Result>; + fn resolved_include_paths_for( + session: &SlangSession, + tree_indices: &Vec, + ) -> Result>; + fn tree_count(session: &SlangSession) -> usize; fn tree_at(session: &SlangSession, index: usize) -> Result>; @@ -198,6 +203,18 @@ impl SlangSession { Ok(indices.into_iter().map(|i| i as usize).collect()) } + /// Returns the canonical filesystem paths of every header that was actually loaded via an + /// `include directive while parsing the given trees. Useful for figuring out which include + /// directories were actually consulted. + pub fn resolved_include_paths(&self, tree_indices: &[usize]) -> Result> { + let indices: Vec = tree_indices.iter().map(|&i| i as u32).collect(); + let paths = ffi::resolved_include_paths_for(self.inner.as_ref().unwrap(), &indices) + .map_err(|cause| SlangError::TrimByTop { + message: cause.to_string(), + })?; + Ok(paths.into_iter().collect()) + } + /// Returns syntax trees reachable from the given top modules. pub fn reachable_trees(&self, tops: &[String]) -> Result>> { let indices = self.reachable_indices(tops)?; diff --git a/src/cmd/script.rs b/src/cmd/script.rs index 10a72c4a..a7c41919 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -6,6 +6,12 @@ use std::io::Write; use std::path::{Path, PathBuf}; +#[cfg(unix)] +use std::fs::canonicalize; + +#[cfg(windows)] +use dunce::canonicalize; + use clap::{ArgAction, Args, Subcommand, ValueEnum}; use indexmap::{IndexMap, IndexSet}; use miette::{Context as _, IntoDiagnostic as _}; @@ -21,6 +27,9 @@ use crate::sess::{Session, SessionIo}; use crate::src::{SourceFile, SourceGroup, SourceType}; use crate::target::TargetSet; +#[cfg(feature = "slang")] +use bender_slang::SlangSession; + /// Emit tool scripts for the package #[derive(Args, Debug)] pub struct ScriptArgs { @@ -78,6 +87,22 @@ pub struct ScriptArgs { #[arg(long, global = true, help_heading = "General Script Options")] pub no_abort_on_error: bool, + /// Trim unreachable Verilog files via the given top-level module(s) + #[cfg(feature = "slang")] + #[arg(long, global = true, help_heading = "General Script Options")] + pub top: Vec, + + /// Drop unused include directories from the generated script + #[cfg(feature = "slang")] + #[arg( + long, + value_enum, + default_value_t, + global = true, + help_heading = "General Script Options" + )] + pub trim_incdirs: TrimIncdirs, + /// Format of the generated script #[command(subcommand)] pub format: ScriptFormat, @@ -94,6 +119,20 @@ pub enum CompilationMode { Common, } +/// Controls whether unused include directories are dropped. +#[cfg(feature = "slang")] +#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Serialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum TrimIncdirs { + /// Drop iff `--top` is set + #[default] + Auto, + /// Always drop unused directories + Always, + /// Keep every declared directory + Never, +} + /// Common arguments for Vivado scripts #[derive(Args, Debug, Clone)] pub struct OnlyArgs { @@ -312,6 +351,22 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { .map(|f| f.validate(&ValidationContext::default())) .collect::>>()?; + // Slang-based filtering: trim unreachable Verilog files (when `--top` is given) and/or + // unused include directories (per `--trim-incdirs`). + #[cfg(feature = "slang")] + let srcs = { + let trim_incdirs = match args.trim_incdirs { + TrimIncdirs::Always => true, + TrimIncdirs::Never => false, + TrimIncdirs::Auto => !args.top.is_empty(), + }; + if args.top.is_empty() && !trim_incdirs { + srcs + } else { + apply_slang_filters(srcs, &args.top, trim_incdirs)? + } + }; + let mut tera_context = Context::new(); let mut only_args = OnlyArgs { defines: false, @@ -426,6 +481,128 @@ where } } +/// Filter source groups using slang's view of the design. +/// +/// When `top` is non-empty, Verilog files not reachable from any of those top modules are +/// dropped (VHDL and untyped files are always retained, and any group that ends up with no +/// files is dropped). When `trim_incdirs` is true, include directories slang did not resolve +/// an `include through are dropped from `include_dirs` and `export_incdirs`. +#[cfg(feature = "slang")] +fn apply_slang_filters<'a>( + srcs: Vec>, + top: &[String], + trim_incdirs: bool, +) -> Result>> { + use std::collections::{HashMap, HashSet}; + + let mut session = SlangSession::new(); + let mut index_to_path: HashMap = HashMap::new(); + + for src_group in &srcs { + // Collect include dirs + let include_dirs: Vec = src_group + .include_dirs + .iter() + .chain(src_group.export_incdirs.values().flatten()) + .map(|(_, path)| path.to_string_lossy().into_owned()) + .collect::>() + .into_iter() + .collect(); + + // Collect defines + let defines: Vec = src_group + .defines + .iter() + .map(|(def, (_, value))| match value { + Some(v) => format!("{def}={v}"), + None => def.to_string(), + }) + .collect::>() + .into_iter() + .collect(); + + // Collect only Verilog file paths. + let paths: Vec<&Path> = src_group + .files + .iter() + .filter_map(|f| match f { + SourceFile::File(p, Some(SourceType::Verilog)) => Some(*p), + _ => None, + }) + .collect(); + + if !paths.is_empty() { + let file_paths: Vec = paths + .iter() + .map(|p| p.to_string_lossy().into_owned()) + .collect(); + let indices = session + .parse_group(&file_paths, &include_dirs, &defines) + .into_diagnostic()?; + for (idx, path) in indices.into_iter().zip(&paths) { + index_to_path.insert(idx, path); + } + } + } + + // Determine which trees feed into the include / file-retention questions. With `--top` we + // only look at trees reachable from those top modules; without `--top` we use every tree + // (relevant when the caller asked for include-dir trimming but no file filtering). + let filter_files = !top.is_empty(); + let kept_indices: Vec = if filter_files { + session.reachable_indices(top).into_diagnostic()? + } else { + (0..session.tree_count()).collect() + }; + let kept_paths: HashSet<&Path> = if filter_files { + kept_indices + .iter() + .filter_map(|i| index_to_path.get(i).copied()) + .collect() + } else { + HashSet::new() + }; + + // Strict include-dir trimming: a directory survives only if slang actually resolved at least + // one `include directive through it. Canonicalize both sides so symlinks / `.` / `..` don't + // cause spurious mismatches. + let resolved_includes: Vec = if trim_incdirs { + session + .resolved_include_paths(&kept_indices) + .into_diagnostic()? + .into_iter() + .map(PathBuf::from) + .collect() + } else { + Vec::new() + }; + let dir_is_used = |dir: &Path| -> bool { + let canon = canonicalize(dir).unwrap_or_else(|_| dir.to_path_buf()); + resolved_includes.iter().any(|f| f.starts_with(&canon)) + }; + + Ok(srcs + .into_iter() + .map(|mut group| { + if filter_files { + group.files.retain(|f| match f { + SourceFile::File(p, Some(SourceType::Verilog)) => kept_paths.contains(p), + _ => true, + }); + } + if trim_incdirs { + group.include_dirs.retain(|(_, p)| dir_is_used(p)); + for paths in group.export_incdirs.values_mut() { + paths.retain(|(_, p)| dir_is_used(p)); + } + } + group + }) + // Remove empty groups that may have resulted from filtering out all Verilog files. + .filter(|group| !group.files.is_empty()) + .collect()) +} + static HEADER_AUTOGEN: &str = "This script was generated automatically by bender."; fn add_defines(defines: &mut IndexMap>, define_args: &[String]) { diff --git a/tests/pickle/Bender.yml b/tests/pickle/Bender.yml index 7373ef37..861040b1 100644 --- a/tests/pickle/Bender.yml +++ b/tests/pickle/Bender.yml @@ -16,5 +16,12 @@ sources: - target: top include_dirs: - include + - include_unused files: - src/top.sv + + - target: dup + files: + - src/dup_a.sv + - src/dup_b.sv + - src/dup_top.sv diff --git a/tests/pickle/include_unused/unused_header.svh b/tests/pickle/include_unused/unused_header.svh new file mode 100644 index 00000000..91b90e8d --- /dev/null +++ b/tests/pickle/include_unused/unused_header.svh @@ -0,0 +1,3 @@ +// Header that exists in include_unused/ but is never `included by any source file. +// Used by tests/script.rs to verify --top trims unused include directories. +`define UNUSED_MACRO 1 diff --git a/tests/pickle/src/dup_a.sv b/tests/pickle/src/dup_a.sv new file mode 100644 index 00000000..08715784 --- /dev/null +++ b/tests/pickle/src/dup_a.sv @@ -0,0 +1,3 @@ +// Version A of dup_mod (e.g. from a dependency package) +module dup_mod #(parameter int VERSION = 1); +endmodule diff --git a/tests/pickle/src/dup_b.sv b/tests/pickle/src/dup_b.sv new file mode 100644 index 00000000..530053a6 --- /dev/null +++ b/tests/pickle/src/dup_b.sv @@ -0,0 +1,3 @@ +// Version B of dup_mod (e.g. a project-level override of the dependency) +module dup_mod #(parameter int VERSION = 2); +endmodule diff --git a/tests/pickle/src/dup_top.sv b/tests/pickle/src/dup_top.sv new file mode 100644 index 00000000..f37ac658 --- /dev/null +++ b/tests/pickle/src/dup_top.sv @@ -0,0 +1,3 @@ +module dup_top; + dup_mod u(); +endmodule diff --git a/tests/script.rs b/tests/script.rs new file mode 100644 index 00000000..ca0b39a7 --- /dev/null +++ b/tests/script.rs @@ -0,0 +1,198 @@ +// Copyright (c) 2025 ETH Zurich +// Tim Fischer + +#[cfg(feature = "slang")] +mod tests { + use std::collections::HashSet; + + use assert_cmd::cargo; + + fn run_script(args: &[&str]) -> String { + let mut full_args = vec!["-d", "tests/pickle", "script"]; + full_args.extend(args); + + let out = cargo::cargo_bin_cmd!() + .args(&full_args) + .output() + .expect("Failed to execute bender binary"); + + assert!( + out.status.success(), + "script command failed.\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + String::from_utf8(out.stdout).expect("stdout must be utf-8") + } + + /// Return the path component after the last `/` or `\`. On Windows, bender's source paths + /// can come out mixed (e.g. `D:\workspace\tests\pickle/src/top.sv`) while incdir paths are + /// all-backslash because the Bender.yml entry has no embedded separator — so splitting on + /// `/` alone misses the latter. + fn basename(path: &str) -> &str { + match path.rfind(|c: char| c == '/' || c == '\\') { + Some(i) => &path[i + 1..], + None => path, + } + } + + /// Extract the set of source-file basenames from a flist-plus output. + /// Filters out `+incdir+` / `+define+` lines so only path lines remain. + fn source_basenames(output: &str) -> HashSet<&str> { + output + .lines() + .map(str::trim) + .filter(|l| !l.is_empty() && !l.starts_with("+incdir+") && !l.starts_with("+define+")) + .map(basename) + .collect() + } + + /// Extract the basenames of `+incdir+` directories from a flist-plus output. + fn incdir_basenames(output: &str) -> HashSet<&str> { + output + .lines() + .map(str::trim) + .filter_map(|l| l.strip_prefix("+incdir+")) + .map(basename) + .collect() + } + + #[test] + fn script_top_filters_unreachable_files() { + // Without --top: all files present + let full_out = run_script(&["--target", "top", "flist-plus"]); + let full = source_basenames(&full_out); + assert!(full.contains("unused_top.sv")); + assert!(full.contains("unused_leaf.sv")); + + // With --top top: unreachable files removed + let trimmed_out = run_script(&["--target", "top", "--top", "top", "flist-plus"]); + let trimmed = source_basenames(&trimmed_out); + assert!(trimmed.contains("top.sv")); + assert!(trimmed.contains("core.sv")); + assert!(trimmed.contains("leaf.sv")); + assert!(!trimmed.contains("unused_top.sv")); + assert!(!trimmed.contains("unused_leaf.sv")); + } + + #[test] + fn script_top_multiple_tops() { + let out = run_script(&[ + "--target", + "top", + "--top", + "top", + "--top", + "unused_top", + "flist-plus", + ]); + let trimmed = source_basenames(&out); + assert!(trimmed.contains("top.sv")); + assert!(trimmed.contains("unused_top.sv")); + } + + #[test] + fn script_top_empty_keeps_all_files() { + // Without --top: all files appear + let out = run_script(&["--target", "top", "flist-plus"]); + let full = source_basenames(&out); + assert!(full.contains("top.sv")); + assert!(full.contains("core.sv")); + assert!(full.contains("leaf.sv")); + assert!(full.contains("unused_top.sv")); + assert!(full.contains("unused_leaf.sv")); + } + + /// Default (`--trim-incdirs auto`) trims include dirs iff `--top` is set. + /// `include/` is used by top.sv (`include "macros.svh"`); `include_unused/` is declared in + /// the Bender.yml but never resolved through. + #[test] + fn script_trim_incdirs_auto() { + // No --top: both dirs survive. + let full_out = run_script(&["--target", "top", "flist-plus"]); + let full_incs = incdir_basenames(&full_out); + assert!(full_incs.contains("include")); + assert!(full_incs.contains("include_unused")); + + // With --top: include_unused is dropped, include survives. + let trimmed_out = run_script(&["--target", "top", "--top", "top", "flist-plus"]); + let trimmed_incs = incdir_basenames(&trimmed_out); + assert!( + trimmed_incs.contains("include"), + "include/ should survive: {trimmed_incs:?}" + ); + assert!( + !trimmed_incs.contains("include_unused"), + "include_unused/ should be dropped: {trimmed_incs:?}" + ); + } + + /// `--trim-incdirs always` prunes unused incdirs even without `--top`. + #[test] + fn script_trim_incdirs_always_without_top() { + let out = run_script(&["--target", "top", "--trim-incdirs", "always", "flist-plus"]); + let incs = incdir_basenames(&out); + assert!(incs.contains("include")); + assert!( + !incs.contains("include_unused"), + "include_unused/ should be dropped: {incs:?}" + ); + + // File list is untouched — no --top means no reachability filter. + let files = source_basenames(&out); + assert!(files.contains("top.sv")); + assert!(files.contains("unused_top.sv")); + assert!(files.contains("unused_leaf.sv")); + } + + /// `--trim-incdirs never` keeps all incdirs even with `--top`. + #[test] + fn script_trim_incdirs_never_with_top() { + let out = run_script(&[ + "--target", + "top", + "--top", + "top", + "--trim-incdirs", + "never", + "flist-plus", + ]); + let incs = incdir_basenames(&out); + assert!(incs.contains("include")); + assert!( + incs.contains("include_unused"), + "include_unused/ should be retained with --trim-incdirs never: {incs:?}" + ); + + // File filtering still happens — unreachable files are still dropped. + let files = source_basenames(&out); + assert!(files.contains("top.sv")); + assert!(!files.contains("unused_top.sv")); + } + + /// Regression test: when two files define the same module name, last-wins semantics apply. + /// The file parsed last (dup_b.sv) wins; the earlier definition (dup_a.sv) is dropped. + #[test] + fn script_top_duplicate_module_name_last_wins() { + // Without --top: both dup files appear (no filtering applied) + let full_out = run_script(&["--target", "dup", "flist-plus"]); + let full = source_basenames(&full_out); + assert!(full.contains("dup_a.sv")); + assert!(full.contains("dup_b.sv")); + assert!(full.contains("dup_top.sv")); + + // With --top dup_top: only dup_b.sv (last-wins) and dup_top.sv appear + let trimmed_out = run_script(&["--target", "dup", "--top", "dup_top", "flist-plus"]); + let trimmed = source_basenames(&trimmed_out); + assert!(trimmed.contains("dup_top.sv")); + assert!( + trimmed.contains("dup_b.sv"), + "dup_b.sv (last-wins) missing: {trimmed:?}" + ); + assert!( + !trimmed.contains("dup_a.sv"), + "dup_a.sv (overwritten) should be absent: {trimmed:?}" + ); + } +}