diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000000..6c0201db04 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,9 @@ +## Documentation policy + +Document **why**, not what. Comments must add information not visible locally. + +**Add** docs for: non-obvious contracts (preconditions, invariants, ownership, thread-safety, error/ordering semantics); rationale for non-obvious design choices; missing module/file headers when purpose isn't self-evident; **anything you had to dig to figure out** — your friction is the signal, leave a breadcrumb where the knowledge belongs. + +**Skip** docs that restate code, duplicate what a good name or signature already conveys (improve those first), or assert things you aren't confident will stay true. Stale docs are worse than none. + +Use idiomatic doc-comment syntax. Be terse; link out (issue, commit, design doc) rather than inline long explanations. Fix or remove drifted docs you encounter. \ No newline at end of file diff --git a/crates/cargo-paralegal-flow/src/lib.rs b/crates/cargo-paralegal-flow/src/lib.rs index cd96c07c87..20a148c5f0 100644 --- a/crates/cargo-paralegal-flow/src/lib.rs +++ b/crates/cargo-paralegal-flow/src/lib.rs @@ -151,6 +151,8 @@ pub struct MarkerControl { /// Implies `--include-std`. #[clap(long, env)] side_effect_markers: bool, + #[clap(long)] + elide_on_whitelist_markers: bool, } /// Arguments that control the flow analysis @@ -187,6 +189,9 @@ pub struct ClapAnalysisCtrl { /// Recompile the standard library and make the code available for analysis. #[clap(long, env)] pub include_std: bool, + /// Emit an error if the call chain depth exceeds this value. + #[clap(long)] + pub fail_on_deep_call_chain: Option, } impl MarkerControl { @@ -197,4 +202,8 @@ impl MarkerControl { pub fn mark_side_effects(&self) -> bool { self.side_effect_markers } + + pub fn elide_on_whitelist_markers(&self) -> bool { + self.elide_on_whitelist_markers + } } diff --git a/crates/flowistry_pdg/src/pdg.rs b/crates/flowistry_pdg/src/pdg.rs index fa7f07596c..9fc4a3d630 100644 --- a/crates/flowistry_pdg/src/pdg.rs +++ b/crates/flowistry_pdg/src/pdg.rs @@ -331,13 +331,40 @@ pub enum Constant { Uint(u64), Char(char), // Placeholder. Floats in the rust compiler are a bit weird so I'll skip them for now. - //Float(f64), + Float(FloatWrapper), Bool(bool), String(Intern), //Unknown(Intern), Zst(Intern), } +/// This is an unsafe wrapper around f64 in that it defines hash and equality +/// based on bit representation. This is not in line with float semantics. +/// +/// But I really need this to be hash and eq, hence the hack. +#[derive(Clone, Copy, Serialize, Deserialize, Debug)] +#[serde(transparent)] +pub struct FloatWrapper(pub f64); + +impl std::cmp::PartialEq for FloatWrapper { + fn eq(&self, other: &Self) -> bool { + unsafe { + std::mem::transmute::<&f64, &u64>(&self.0) + == std::mem::transmute::<&f64, &u64>(&other.0) + } + } +} + +impl std::cmp::Eq for FloatWrapper {} + +impl std::hash::Hash for FloatWrapper { + fn hash(&self, state: &mut H) { + unsafe { + std::mem::transmute::<&f64, &u64>(&self.0).hash(state); + } + } +} + impl Constant { pub fn int(i: impl Into) -> Self { Self::Int(i.into()) @@ -368,6 +395,7 @@ impl std::fmt::Display for Constant { Self::Uint(u) => Display::fmt(u, f), Self::Char(c) => Display::fmt(c, f), Self::String(s) => Debug::fmt(s, f), + Self::Float(fl) => Display::fmt(&fl.0, f), Self::Zst(s) => f.write_str(s), //Self::Unknown(u) => write!(f, "Unsupported constant: {u}"), } diff --git a/crates/flowistry_pdg_construction/src/analysis/async_support.rs b/crates/flowistry_pdg_construction/src/analysis/async_support.rs index a028d00f3c..64aa8681c3 100644 --- a/crates/flowistry_pdg_construction/src/analysis/async_support.rs +++ b/crates/flowistry_pdg_construction/src/analysis/async_support.rs @@ -6,10 +6,10 @@ use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::DefId; use rustc_middle::{ mir::{ - AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, + visit, AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }, - ty::{GenericArgsRef, Instance, TyCtxt}, + ty::{self, GenericArgsRef, Instance, TyCtxt}, }; use rustc_span::{Span, Spanned}; @@ -26,6 +26,17 @@ use crate::utils; pub enum AsyncType { Fn, Trait, + Tool, +} + +impl AsyncType { + pub fn describe(&self) -> &'static str { + match self { + AsyncType::Fn => "async function", + AsyncType::Trait => "async trait", + AsyncType::Tool => "async tool", + } + } } /// Context for a call to [`Future::poll`](std::future::Future::poll), when @@ -70,37 +81,22 @@ pub fn try_as_async_trait_function<'tcx>( if !has_async_trait_signature(tcx, def_id) { return None; } - let mut matching_statements = - body.basic_blocks - .iter_enumerated() - .flat_map(|(block, bbdat)| { - bbdat.statements.iter().enumerate().filter_map( - move |(statement_index, statement)| { - let (def_id, generics) = match_async_trait_assign(statement)?; - Some(( - def_id, - generics, - Location { - block, - statement_index, - }, - )) - }, - ) - }) - .collect::>(); - assert_eq!(matching_statements.len(), 1); - matching_statements.pop() + let (def_id, generics, loc, _) = find_coroutine_assign(body); + Some((def_id, generics, loc)) } -pub fn match_async_trait_assign<'tcx>( - statement: &Statement<'tcx>, -) -> Option<(DefId, GenericArgsRef<'tcx>)> { +pub fn match_coroutine_assign<'tcx, 'a>( + statement: &'a Statement<'tcx>, +) -> Option<( + DefId, + GenericArgsRef<'tcx>, + &'a rustc_index::IndexVec>, +)> { match &statement.kind { StatementKind::Assign(box ( _, - Rvalue::Aggregate(box AggregateKind::Coroutine(def_id, generic_args), _args), - )) => Some((*def_id, *generic_args)), + Rvalue::Aggregate(box AggregateKind::Coroutine(def_id, generic_args), args), + )) => Some((*def_id, *generic_args, args)), _ => None, } } @@ -121,7 +117,6 @@ fn has_async_trait_signature(tcx: TyCtxt, def_id: DefId) -> bool { } } -use rustc_middle::ty; fn match_pin_box_dyn_ty(lang_items: &rustc_hir::LanguageItems, t: ty::Ty) -> bool { let ty::TyKind::Adt(pin_ty, args) = t.kind() else { return false; @@ -166,6 +161,79 @@ fn get_async_generator<'tcx>(body: &Body<'tcx>) -> (DefId, GenericArgsRef<'tcx>, (*def_id, generic_args, location) } +// matches std::pin::Pin> +fn has_async_tool_signature(tcx: TyCtxt<'_>, def_id: DefId) -> bool { + use rustc_hir::def::DefKind; + if !matches!(tcx.def_kind(def_id), DefKind::Fn | DefKind::AssocFn) { + return false; + } + let sig = tcx.fn_sig(def_id); + let lang_items = tcx.lang_items(); + let ty::TyKind::Adt(adt_def, inner) = sig.skip_binder().output().skip_binder().kind() else { + return false; + }; + if !lang_items.pin_type().is_some_and(|p| p == adt_def.did()) { + return false; + } + let [b_ty] = inner.as_slice() else { + return false; + }; + let Some(f_ty) = b_ty.as_type().and_then(ty::Ty::boxed_ty) else { + return false; + }; + let ty::TyKind::Dynamic(preds, _) = f_ty.kind() else { + return false; + }; + let Some(ty::ExistentialPredicate::Trait(t)) = preds.first().map(|b| b.skip_binder()) else { + return false; + }; + lang_items.future_trait().is_some_and(|f| f == t.def_id) +} + +pub fn find_coroutine_assign<'tcx, 'a>( + body: &'a Body<'tcx>, +) -> ( + DefId, + GenericArgsRef<'tcx>, + Location, + &'a rustc_index::IndexVec>, +) { + let mut matching_statements = + body.basic_blocks + .iter_enumerated() + .flat_map(|(block, bbdat)| { + bbdat.statements.iter().enumerate().filter_map( + move |(statement_index, statement)| { + let (def_id, generics, args) = match_coroutine_assign(statement)?; + Some(( + def_id, + generics, + Location { + block, + statement_index, + }, + args, + )) + }, + ) + }) + .collect::>(); + assert_eq!(matching_statements.len(), 1); + matching_statements.pop().unwrap() +} + +fn try_as_async_tool<'tcx>( + tcx: TyCtxt<'tcx>, + def_id: DefId, + body: &Body<'tcx>, +) -> Option<(DefId, GenericArgsRef<'tcx>, Location)> { + if !has_async_tool_signature(tcx, def_id) { + return None; + } + let (def_id, gargs, loc, _) = find_coroutine_assign(body); + Some((def_id, gargs, loc)) +} + /// Try to interpret this function as an async function. /// /// If this is an async function it returns the [`Instance`] of the generator, @@ -178,11 +246,10 @@ pub fn determine_async<'tcx>( ) -> Option<(Instance<'tcx>, Location, AsyncType)> { let ((generator_def_id, args, loc), asyncness) = if is_async(tcx, def_id) { (get_async_generator(body), AsyncType::Fn) + } else if let Some(g) = try_as_async_trait_function(tcx, def_id, body) { + (g, AsyncType::Trait) } else { - ( - try_as_async_trait_function(tcx, def_id, body)?, - AsyncType::Trait, - ) + (try_as_async_tool(tcx, def_id, body)?, AsyncType::Tool) }; let typing_env = body.typing_env(tcx).with_post_analysis_normalized(tcx); let generator_fn = utils::try_resolve_function(tcx, generator_def_id, typing_env, args)?; diff --git a/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs b/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs index bb7eecfd9d..8c636ab93c 100644 --- a/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs +++ b/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs @@ -12,8 +12,12 @@ use std::{borrow::Cow, hash::Hash, rc::Rc}; +use paralegal_flowistry::mir::FlowistryInput; use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; -use rustc_middle::{mir::Location, ty::Instance}; +use rustc_middle::{ + mir::{self, Location}, + ty::Instance, +}; use crate::{analysis::global::partial_graph::NodeKey, DepNodeKind, MemoPdgConstructor}; @@ -80,6 +84,13 @@ impl<'tcx, 'c, K: Clone> VisitDriver<'tcx, 'c, K> { }, ) } + + pub fn current_body(&self) -> &'tcx mir::Body<'tcx> { + self.memo + .body_cache() + .get(self.current_function().def_id()) + .body() + } } pub trait Visitor<'tcx, K: Hash + Eq + Clone> { diff --git a/crates/flowistry_pdg_construction/src/analysis/global/mod.rs b/crates/flowistry_pdg_construction/src/analysis/global/mod.rs index ced4ae7ac2..a09f63fa1f 100644 --- a/crates/flowistry_pdg_construction/src/analysis/global/mod.rs +++ b/crates/flowistry_pdg_construction/src/analysis/global/mod.rs @@ -25,7 +25,7 @@ use crate::{ callback::DefaultCallback, constants::PlaceOrConst, source_access::{self, BodyCache, CachedBody}, - utils::{PlaceConflictContext, TwoLevelCache}, + utils::{manufacture_substs_for, try_resolve_function, PlaceConflictContext, TwoLevelCache}, CallChangeCallback, }; @@ -649,7 +649,7 @@ enum Input<'tcx> { }, } -struct GraphSizeEstimator { +pub struct GraphSizeEstimator { nodes: usize, edges: usize, functions: usize, @@ -658,7 +658,7 @@ struct GraphSizeEstimator { #[allow(dead_code)] impl GraphSizeEstimator { - fn new() -> Self { + pub fn new() -> Self { Self { nodes: 0, edges: 0, @@ -667,7 +667,7 @@ impl GraphSizeEstimator { } } - fn format_size(&self) -> String { + pub fn format_size(&self) -> String { format!( "nodes: {}, edges: {}, functions: {}, call_string_length: {}", HumanInt(self.nodes), diff --git a/crates/flowistry_pdg_construction/src/analysis/local/mod.rs b/crates/flowistry_pdg_construction/src/analysis/local/mod.rs index 0d47fe6dd5..e81f6c3183 100644 --- a/crates/flowistry_pdg_construction/src/analysis/local/mod.rs +++ b/crates/flowistry_pdg_construction/src/analysis/local/mod.rs @@ -381,6 +381,10 @@ impl<'tcx, 'a, K> LocalAnalysis<'tcx, 'a, K> { let ty = func.ty(&self.mono_body, self.tcx()); utils::type_as_fn(self.tcx(), ty) } + + fn fmt_fn(&self, def_id: DefId) -> String { + format!("{} ({}:{})", self.tcx().def_path_str(def_id), def_id.krate.as_u32(), def_id.index.as_u32()) + } } impl<'tcx, 'a, K: Hash + Eq + Clone> LocalAnalysis<'tcx, 'a, K> { @@ -777,7 +781,17 @@ impl<'tcx, 'a, K: Hash + Eq + Clone> LocalAnalysis<'tcx, 'a, K> { } else { continue; }; - let src = final_state.get_place_node(*place, location.into()).unwrap(); + let Some(src) = final_state.get_place_node(*place, location.into()) else { + let span = match location { + RichLocation::Location(l) => self.mono_body.source_info(*l).span, + _ => self.mono_body.span, + }; + // CORNER CUTTING: we should investigate why this happens. + self.tcx() + .dcx() + .span_warn(span, format!("could not find reference to {place:?} here")); + continue; + }; let eloc = RichLocation::End; let key = NodeKey::for_place(*place, eloc.into()); let prior = final_state.get_node(&key); diff --git a/crates/flowistry_pdg_construction/src/constants.rs b/crates/flowistry_pdg_construction/src/constants.rs index 702c451306..9a738ee65a 100644 --- a/crates/flowistry_pdg_construction/src/constants.rs +++ b/crates/flowistry_pdg_construction/src/constants.rs @@ -1,6 +1,6 @@ -use std::hash::Hash; +use std::{hash::Hash, num::ParseFloatError}; -use flowistry_pdg::Constant; +use flowistry_pdg::{Constant, FloatWrapper}; use internment::Intern; use rustc_middle::{ @@ -107,6 +107,7 @@ pub enum ConstConversionError<'tcx> { signed: bool, }, EvalFailed(mir::Const<'tcx>), + CannotParseFloat(ParseFloatError), /// A `RuntimeChecks` operand was encountered; these are compiler-inserted /// runtime assertions (e.g. overflow checks) and carry no data value. RuntimeChecksOperand, @@ -155,6 +156,8 @@ impl std::fmt::Display for ConstConversionError<'_> { ConstConversionError::EvalFailed(c) => { write!(f, "Evaluation failed for constant: {:?}", c) } + ConstConversionError::CannotParseFloat(e) => + write!(f, "Cannot parse float: {}", e), ConstConversionError::RuntimeChecksOperand => { write!(f, "RuntimeChecks operand is not a data value") } @@ -183,56 +186,58 @@ fn constant_from_const_value<'tcx>( ty: ty::Ty<'tcx>, ct: &mir::ConstValue, ) -> Result> { + let default_err = Err(ConstConversionError::UnsupportedConstType(mir::Const::Val( + *ct, ty, + ))); // largely from rustc_middle/mir/pretty.rs:1952-1962 match (ct, ty.kind()) { (_, ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Str) => { if let Some(data) = ct.try_get_slice_bytes_for_diagnostics(tcx) { - return Ok(Constant::String(Intern::from_ref( + Ok(Constant::String(Intern::from_ref( String::from_utf8_lossy(data).as_ref(), - ))); + ))) + } else { + default_err } } (mir::ConstValue::Scalar(mir::interpret::Scalar::Int(int)), tyk) => match tyk { - ty::Bool => return Ok(Constant::Bool(int.try_to_bool().unwrap())), + ty::Bool => Ok(Constant::Bool(int.try_to_bool().unwrap())), // Skipping floats for now. - // ty::Float(fty) => Self::Float(match fty { - // ty::FloatTy::F16 => int.to_f16() as f64, - // ty::FloatTy::F32 => int.to_f32() as f64, - // ty::FloatTy::F64 => int.to_f64() as f64, - // }), - ty::Int(ity) => { - return Ok(Constant::Int(match ity { - ty::IntTy::I8 => int.to_u8() as i64, - ty::IntTy::I16 => int.to_u16() as i64, - ty::IntTy::I32 => int.to_u32() as i64, - ty::IntTy::I64 => int.to_u64() as i64, - ty::IntTy::Isize => int.to_target_isize(tcx), - ty::IntTy::I128 => { - return Err(ConstConversionError::Integer128NotSupported { signed: true }) - } - })) - } - ty::Uint(uty) => { - return Ok(Constant::Uint(match uty { - ty::UintTy::U8 => int.to_u8() as u64, - ty::UintTy::U16 => int.to_u16() as u64, - ty::UintTy::U32 => int.to_u32() as u64, - ty::UintTy::U64 => int.to_u64(), - ty::UintTy::Usize => int.to_target_usize(tcx), - ty::UintTy::U128 => { - return Err(ConstConversionError::Integer128NotSupported { signed: false }) - } - })) - } - ty::Char => { - return Ok(Constant::Char(int.to_u32() as u8 as char)); - } - _ => (), + ty::Float(fty) => Ok(Constant::Float( + match fty { + ty::FloatTy::F16 => int.to_f16().to_string(), + ty::FloatTy::F32 => int.to_f32().to_string(), + ty::FloatTy::F64 => int.to_f64().to_string(), + _ => return default_err, + } + .parse() + .map(FloatWrapper) + .map_err(ConstConversionError::CannotParseFloat)?, + )), + ty::Int(ity) => Ok(Constant::Int(match ity { + ty::IntTy::I8 => int.to_u8() as i64, + ty::IntTy::I16 => int.to_u16() as i64, + ty::IntTy::I32 => int.to_u32() as i64, + ty::IntTy::I64 => int.to_u64() as i64, + ty::IntTy::Isize => int.to_target_isize(tcx), + ty::IntTy::I128 => { + return Err(ConstConversionError::Integer128NotSupported { signed: true }) + } + })), + ty::Uint(uty) => Ok(Constant::Uint(match uty { + ty::UintTy::U8 => int.to_u8() as u64, + ty::UintTy::U16 => int.to_u16() as u64, + ty::UintTy::U32 => int.to_u32() as u64, + ty::UintTy::U64 => int.to_u64(), + ty::UintTy::Usize => int.to_target_usize(tcx), + ty::UintTy::U128 => { + return Err(ConstConversionError::Integer128NotSupported { signed: false }) + } + })), + ty::Char => Ok(Constant::Char(int.to_u32() as u8 as char)), + _ => default_err, }, - (mir::ConstValue::ZeroSized, t) => return Ok(Constant::Zst(Intern::new(format!("{t:?}")))), - _ => (), + (mir::ConstValue::ZeroSized, t) => Ok(Constant::Zst(Intern::new(format!("{t:?}")))), + _ => default_err, } - Err(ConstConversionError::UnsupportedConstType(mir::Const::Val( - *ct, ty, - ))) } diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 23881d7ac7..be041947f1 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -26,11 +26,11 @@ pub mod constants; pub mod source_access; pub mod utils; -pub use analysis::async_support::{determine_async, is_async_trait_fn, match_async_trait_assign}; +pub use analysis::async_support::{self, determine_async, is_async_trait_fn}; pub use analysis::global::call_tree_visit::{VisitDriver, Visitor}; pub use analysis::global::{ - DepEdge, DepEdgeKind, DepNode, DepNodeKind, MemoPdgConstructor, Node, OneHopLocation, - PartialGraph, Use, + DepEdge, DepEdgeKind, DepNode, DepNodeKind, GraphSizeEstimator, MemoPdgConstructor, Node, + OneHopLocation, PartialGraph, Use, }; pub use analysis::CallingConvention; pub use callback::{ diff --git a/crates/flowistry_pdg_construction/src/utils/mod.rs b/crates/flowistry_pdg_construction/src/utils/mod.rs index afcf362151..75f797a892 100644 --- a/crates/flowistry_pdg_construction/src/utils/mod.rs +++ b/crates/flowistry_pdg_construction/src/utils/mod.rs @@ -538,443 +538,6 @@ pub fn assert_resolved<'tcx>(rvalue: &impl TypeVisitable>, msg: imp // slight alterations that do not throw an error for nested references // ################################################################################################# -// /// Helper function for checking if places conflict with a mutable borrow and deep access depth. -// /// This is used to check for places conflicting outside of the borrow checking code (such as in -// /// dataflow). -// pub fn places_conflict<'tcx>( -// tcx: TyCtxt<'tcx>, -// body: &Body<'tcx>, -// borrow_place: Place<'tcx>, -// access_place: Place<'tcx>, -// ) -> bool { -// let borrow_local = borrow_place.local; -// let access_local = access_place.local; -// let access_place = access_place.as_ref(); - -// if borrow_local != access_local { -// // We have proven the borrow disjoint - further projections will remain disjoint. -// return false; -// } - -// // This Local/Local case is handled by the more general code below, but -// // it's so common that it's a speed win to check for it first. -// if borrow_place.projection.is_empty() && access_place.projection.is_empty() { -// return true; -// } - -// // loop invariant: borrow_c is always either equal to access_c or disjoint from it. -// for ((borrow_place, borrow_c), &access_c) in -// std::iter::zip(borrow_place.iter_projections(), access_place.projection) -// { -// // Borrow and access path both have more components. -// // -// // Examples: -// // -// // - borrow of `a.(...)`, access to `a.(...)` -// // - borrow of `a.(...)`, access to `b.(...)` -// // -// // Here we only see the components we have checked so -// // far (in our examples, just the first component). We -// // check whether the components being borrowed vs -// // accessed are disjoint (as in the second example, -// // but not the first). -// match place_projection_conflict( -// tcx, -// body, -// borrow_place, -// borrow_c, -// access_c, -// PlaceConflictBias::Overlap, -// ) { -// Overlap::Arbitrary => { -// // We have encountered different fields of potentially -// // the same union - the borrow now partially overlaps. -// // -// // There is no *easy* way of comparing the fields -// // further on, because they might have different types -// // (e.g., borrows of `u.a.0` and `u.b.y` where `.0` and -// // `.y` come from different structs). -// // -// // We could try to do some things here - e.g., count -// // dereferences - but that's probably not a good -// // idea, at least for now, so just give up and -// // report a conflict. This is unsafe code anyway so -// // the user could always use raw pointers. -// return true; -// } -// Overlap::EqualOrDisjoint => { -// // This is the recursive case - proceed to the next element. -// } -// Overlap::Disjoint => { -// // We have proven the borrow disjoint - further -// // projections will remain disjoint. -// return false; -// } -// } -// } - -// if borrow_place.projection.len() > access_place.projection.len() { -// for (base, elem) in borrow_place -// .iter_projections() -// .skip(access_place.projection.len()) -// { -// // Borrow path is longer than the access path. Examples: -// // -// // - borrow of `a.b.c`, access to `a.b` -// // -// // Here, we know that the borrow can access a part of -// // our place. This is a conflict if that is a part our -// // access cares about. - -// let base_ty = base.ty(body, tcx).ty; - -// match (elem, &base_ty.kind()) { -// (ProjectionElem::Deref, ty::Ref(_, _, hir::Mutability::Not)) => { -// // This occurs only in two cases. Either we have a reference -// // as an argument, which causes queries such as -// // conflicting("(*_1)", "_2") or if we have a raw pointer in -// // the mix. In the reference case the alias analysis will -// // already keep track of the conflict. Raw pointers by -// // themselves are not soundly supported. However this can -// // also occur via a manual "deref" (or somesuch), on which -// // case we rely on the lifetimes declared on those functions -// // to be correct and then our alias analysis will pick it up -// // correctly. -// return false; -// } -// (ProjectionElem::Deref, _) -// | (ProjectionElem::Field { .. }, _) -// | (ProjectionElem::Index { .. }, _) -// | (ProjectionElem::ConstantIndex { .. }, _) -// | (ProjectionElem::Subslice { .. }, _) -// | (ProjectionElem::OpaqueCast { .. }, _) -// | (ProjectionElem::UnwrapUnsafeBinder(_), _) -// | (ProjectionElem::Downcast { .. }, _) => { -// // Recursive case. This can still be disjoint on a -// // further iteration if this a shallow access and -// // there's a deref later on, e.g., a borrow -// // of `*x.y` while accessing `x`. -// } -// } -// } -// } - -// true -// } - -// #[derive(Clone, Copy)] -// pub(crate) enum Overlap { -// Arbitrary, -// EqualOrDisjoint, -// Disjoint, -// } - -// // Given that the bases of `elem1` and `elem2` are always either equal -// // or disjoint (and have the same type!), return the overlap situation -// // between `elem1` and `elem2`. -// fn place_projection_conflict<'tcx>( -// tcx: TyCtxt<'tcx>, -// body: &Body<'tcx>, -// pi1: PlaceRef<'tcx>, -// pi1_elem: PlaceElem<'tcx>, -// pi2_elem: PlaceElem<'tcx>, -// bias: PlaceConflictBias, -// ) -> Overlap { -// match (pi1_elem, pi2_elem) { -// (ProjectionElem::Deref, ProjectionElem::Deref) => { -// // derefs (e.g., `*x` vs. `*x`) - recur. -// Overlap::EqualOrDisjoint -// } -// (ProjectionElem::OpaqueCast(_), ProjectionElem::OpaqueCast(_)) => { -// // casts to other types may always conflict irrespective of the type being cast to. -// Overlap::EqualOrDisjoint -// } -// (ProjectionElem::UnwrapUnsafeBinder(_), ProjectionElem::UnwrapUnsafeBinder(_)) => { -// Overlap::EqualOrDisjoint -// } -// (ProjectionElem::Field(f1, _), ProjectionElem::Field(f2, _)) => { -// if f1 == f2 { -// // same field (e.g., `a.y` vs. `a.y`) - recur. -// Overlap::EqualOrDisjoint -// } else { -// let ty = pi1.ty(body, tcx).ty; -// if ty.is_union() { -// // Different fields of a union, we are basically stuck. -// Overlap::Arbitrary -// } else { -// // Different fields of a struct (`a.x` vs. `a.y`). Disjoint! -// Overlap::Disjoint -// } -// } -// } -// (ProjectionElem::Downcast(_, v1), ProjectionElem::Downcast(_, v2)) => { -// // different variants are treated as having disjoint fields, -// // even if they occupy the same "space", because it's -// // impossible for 2 variants of the same enum to exist -// // (and therefore, to be borrowed) at the same time. -// // -// // Note that this is different from unions - we *do* allow -// // this code to compile: -// // -// // ``` -// // fn foo(x: &mut Result) { -// // let mut v = None; -// // if let Ok(ref mut a) = *x { -// // v = Some(a); -// // } -// // // here, you would *think* that the -// // // *entirety* of `x` would be borrowed, -// // // but in fact only the `Ok` variant is, -// // // so the `Err` variant is *entirely free*: -// // if let Err(ref mut a) = *x { -// // v = Some(a); -// // } -// // drop(v); -// // } -// // ``` -// if v1 == v2 { -// Overlap::EqualOrDisjoint -// } else { -// Overlap::Disjoint -// } -// } -// ( -// ProjectionElem::Index(..), -// ProjectionElem::Index(..) -// | ProjectionElem::ConstantIndex { .. } -// | ProjectionElem::Subslice { .. }, -// ) -// | ( -// ProjectionElem::ConstantIndex { .. } | ProjectionElem::Subslice { .. }, -// ProjectionElem::Index(..), -// ) => { -// // Array indexes (`a[0]` vs. `a[i]`). These can either be disjoint -// // (if the indexes differ) or equal (if they are the same). -// match bias { -// PlaceConflictBias::Overlap => { -// // If we are biased towards overlapping, then this is the recursive -// // case that gives "equal *or* disjoint" its meaning. -// Overlap::EqualOrDisjoint -// } -// PlaceConflictBias::NoOverlap => { -// // If we are biased towards no overlapping, then this is disjoint. -// Overlap::Disjoint -// } -// } -// } -// ( -// ProjectionElem::ConstantIndex { -// offset: o1, -// min_length: _, -// from_end: false, -// }, -// ProjectionElem::ConstantIndex { -// offset: o2, -// min_length: _, -// from_end: false, -// }, -// ) -// | ( -// ProjectionElem::ConstantIndex { -// offset: o1, -// min_length: _, -// from_end: true, -// }, -// ProjectionElem::ConstantIndex { -// offset: o2, -// min_length: _, -// from_end: true, -// }, -// ) => { -// if o1 == o2 { -// Overlap::EqualOrDisjoint -// } else { -// Overlap::Disjoint -// } -// } -// ( -// ProjectionElem::ConstantIndex { -// offset: offset_from_begin, -// min_length: min_length1, -// from_end: false, -// }, -// ProjectionElem::ConstantIndex { -// offset: offset_from_end, -// min_length: min_length2, -// from_end: true, -// }, -// ) -// | ( -// ProjectionElem::ConstantIndex { -// offset: offset_from_end, -// min_length: min_length1, -// from_end: true, -// }, -// ProjectionElem::ConstantIndex { -// offset: offset_from_begin, -// min_length: min_length2, -// from_end: false, -// }, -// ) => { -// // both patterns matched so it must be at least the greater of the two -// let min_length = std::cmp::max(min_length1, min_length2); -// // `offset_from_end` can be in range `[1..min_length]`, 1 indicates the last -// // element (like -1 in Python) and `min_length` the first. -// // Therefore, `min_length - offset_from_end` gives the minimal possible -// // offset from the beginning -// if offset_from_begin >= min_length - offset_from_end { -// Overlap::EqualOrDisjoint -// } else { -// Overlap::Disjoint -// } -// } -// ( -// ProjectionElem::ConstantIndex { -// offset, -// min_length: _, -// from_end: false, -// }, -// ProjectionElem::Subslice { -// from, -// to, -// from_end: false, -// }, -// ) -// | ( -// ProjectionElem::Subslice { -// from, -// to, -// from_end: false, -// }, -// ProjectionElem::ConstantIndex { -// offset, -// min_length: _, -// from_end: false, -// }, -// ) => { -// if (from..to).contains(&offset) { -// Overlap::EqualOrDisjoint -// } else { -// Overlap::Disjoint -// } -// } -// ( -// ProjectionElem::ConstantIndex { -// offset, -// min_length: _, -// from_end: false, -// }, -// ProjectionElem::Subslice { from, .. }, -// ) -// | ( -// ProjectionElem::Subslice { from, .. }, -// ProjectionElem::ConstantIndex { -// offset, -// min_length: _, -// from_end: false, -// }, -// ) => { -// if offset >= from { -// Overlap::EqualOrDisjoint -// } else { -// Overlap::Disjoint -// } -// } -// ( -// ProjectionElem::ConstantIndex { -// offset, -// min_length: _, -// from_end: true, -// }, -// ProjectionElem::Subslice { -// to, from_end: true, .. -// }, -// ) -// | ( -// ProjectionElem::Subslice { -// to, from_end: true, .. -// }, -// ProjectionElem::ConstantIndex { -// offset, -// min_length: _, -// from_end: true, -// }, -// ) => { -// if offset > to { -// Overlap::EqualOrDisjoint -// } else { -// Overlap::Disjoint -// } -// } -// ( -// ProjectionElem::Subslice { -// from: f1, -// to: t1, -// from_end: false, -// }, -// ProjectionElem::Subslice { -// from: f2, -// to: t2, -// from_end: false, -// }, -// ) => { -// if f2 >= t1 || f1 >= t2 { -// Overlap::Disjoint -// } else { -// Overlap::EqualOrDisjoint -// } -// } -// (ProjectionElem::Subslice { .. }, ProjectionElem::Subslice { .. }) => { -// Overlap::EqualOrDisjoint -// } -// (ProjectionElem::Deref, ProjectionElem::Field(idx, _)) -// if matches!(idx.as_u32(), 0 | 1) && pi1.ty(body, tcx).ty.is_box() => -// { -// // Special case for deref of box. -// // -// // Basically rustc is sloppy when it comes to boxes. It is perfectly permissible to have the following assignments: -// // -// // ``` -// // fn Box::new(_1: T) { -// // ... -// // _5 = ShallowInitBox(move _4, std::sys::pal::unix::sync::mutex::Mutex); -// // (*_5) = move _1; -// // _0 = move _5; -// // } -// // ``` -// // -// // This is technically invalid, because `Box` is not a reference (or -// // pointer) type, but defined as `struct Box(Unique, A)`. So technically it should be -// // `*(_5.0.0)` to get to the contained value. -// // -// // Further more we must allow both the `.0` and `.1` field, since it -// // also tries to match the allocator for conflicts. -// // -// // XXX(Justus): This does not try and adjust the the projection -// // completely as explained above, instead it just matches the deref -// // to the first projection on right of the assignment. This does not -// // appear to be a problem at this point, but could cause more -// // incorrect projections downstream. -// Overlap::EqualOrDisjoint -// } -// ( -// ProjectionElem::Deref -// | ProjectionElem::Field(..) -// | ProjectionElem::Index(..) -// | ProjectionElem::ConstantIndex { .. } -// | ProjectionElem::OpaqueCast { .. } -// | ProjectionElem::UnwrapUnsafeBinder(_) -// | ProjectionElem::Subslice { .. } -// | ProjectionElem::Downcast(..), -// _, -// ) => panic!( -// "mismatched projections in place_element_conflict: {:?} and {:?}", -// pi1_elem, pi2_elem -// ), -// } -// } - pub struct PlaceConflictContext<'tcx> { tcx: TyCtxt<'tcx>, unique_ptr_def_id: DefId, diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 6e043a6aae..b06beb1d44 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -1,3 +1,6 @@ +use std::hash::Hash; +use std::time::Instant; + use super::{path_for_item, src_loc_for_span}; use crate::{ HashMap, HashSet, MarkerCtx, Pctx, @@ -20,7 +23,7 @@ use rustc_data_structures::fx::FxHashSet; use rustc_hir::def_id::DefId; use rustc_middle::{ mir, - ty::{Instance, TyCtxt, TypingEnv}, + ty::{self, Instance, TyCtxt, TypingEnv}, }; use either::Either; @@ -51,6 +54,8 @@ struct GraphAssembler<'tcx, 'a> { /// conversion. types: HashMap>, known_def_ids: &'a mut FxHashSet, + counter: u64, + started: Instant, } /// Create a global PDG (spanning the entire call tree) starting from the given @@ -95,6 +100,22 @@ pub fn assemble_pdg<'tcx>( let mut driver = VisitDriver::new(pdg_constructor, possible_generator_instance, k); let mut assembler = GraphAssembler::new(pctx.clone(), known_def_ids, base_body_def_id); + let print_estimated_size = |driver: &mut VisitDriver<'tcx, '_, u32>| { + if let Some(depth) = pctx.opts().anactrl().fail_on_deep_call_chain() { + let mut deepchain_printer = StacktracePrinter::new(tcx, depth); + driver.start(&mut deepchain_printer); + if deepchain_printer.locations_printed > 0 { + panic!("Printed a long chain"); + } + } + let mut estimator = flowistry_pdg_construction::GraphSizeEstimator::new(); + driver.start(&mut estimator); + println!( + "Graph for {}: {}", + tcx.def_path_str(target), + estimator.format_size() + ); + }; // If the top level function is async we start analysis from the generator // instead (because the async function itself is basically empty, it just @@ -109,6 +130,7 @@ pub fn assemble_pdg<'tcx>( location: RichLocation::Location(loc), }, |driver| { + print_estimated_size(driver); driver.start(&mut assembler); }, ); @@ -116,6 +138,7 @@ pub fn assemble_pdg<'tcx>( // have to manually sync up the actual arguments to the async function. assembler.fix_async_args(base_body_instance, loc, &mut driver); } else { + print_estimated_size(&mut driver); driver.start(&mut assembler); } let return_ = assembler.determine_return(); @@ -153,6 +176,8 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { known_def_ids, types: Default::default(), is_async, + counter: 0, + started: Instant::now(), } } @@ -176,6 +201,14 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { vis: &mut VisitDriver<'tcx, '_, K>, weight: &DepNode<'tcx, OneHopLocation>, ) -> GNode { + self.counter += 1; + if self.counter % 1_000_000 == 0 { + println!( + "[{}] Seen {} mil nodes", + self.started.elapsed().as_secs(), + self.counter / 1_000_000 + ); + } let weight = globalize_node(vis, weight, self.tcx()); let table = self.nodes.last_mut().unwrap(); let prior = table[node.index()]; @@ -196,6 +229,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { at: CallString, span: rustc_span::Span, is_arg: Option, + same: bool, ) -> GNode { GNode(self.graph.add_node(NodeInfo { at, @@ -205,6 +239,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { }, span: src_loc_for_span(span, self.tcx()), is_arg, + same, })) } @@ -285,21 +320,23 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { } else { (place.local.into(), place.projection.as_slice()) }; - trace!("Using base place {base_place:?} with projections {projections:?}"); + trace!( + "Using base place {base_place:?} in {} with projections {projections:?}", + tcx.def_path_str(function.def_id()) + ); let resolution = vis.current_function(); - // Thread through each caller to recover generic arguments let body = self.pctx.body_cache().get(function.def_id()).body(); - let raw_ty = base_place.ty(body, tcx); - let base_ty = try_monomorphize( + let ty = try_monomorphize( resolution, tcx, TypingEnv::fully_monomorphized(), - raw_ty, + body.local_decls[base_place.local].ty, span, ) .unwrap(); + let base_ty = place_ty_from_ty_and_projections(tcx, ty, base_place.projection); self.handle_node_types_helper(node, base_ty, projections); } @@ -570,6 +607,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { driver.globalize_location(&RichLocation::Start.into()), base_body.local_decls[arg].source_info.span, Some((arg.as_u32() - 1) as u16), + true, ) }) .collect::>(); @@ -578,6 +616,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { driver.globalize_location(&RichLocation::End.into()), base_body.local_decls[mir::RETURN_PLACE].source_info.span, None, + true, ); let mono_ty = |local: mir::Local| { @@ -605,6 +644,12 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { let local = mir::RETURN_PLACE; self.handle_node_types_helper(return_node, mono_ty(local), &[]); + let interpreter = + super::simple_interpreter::SimpleInterpreter::interpret_body(tcx, base_body).unwrap(); + + let coroutine_fields = + flowistry_pdg_construction::async_support::find_coroutine_assign(base_body).3; + // Establish connections to existing nodes let generator_loc = RichLocation::Location(loc); let transition_at = CallString::new(&[GlobalLocation { @@ -621,9 +666,10 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { else { continue; }; + if n.place.local.as_u32() == 1 && at.location == RichLocation::Start { let ridx = self.translate_node(nidx); - let Some(mir::ProjectionElem::Field(id, _)) = n.place.projection.first() else { + let Some(mir::ProjectionElem::Field(id, fty)) = n.place.projection.first() else { tcx.dcx().span_err( *span, format!("Expected field projection on async generator in {def_id:?}, found {:?}", n.place), @@ -631,7 +677,40 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { continue; }; - let arg = args_as_nodes[id.as_usize()]; + use rustc_middle::mir::Operand; + + let (Operand::Copy(op_in_parent) | Operand::Move(op_in_parent)) = + coroutine_fields[*id] + else { + tcx.dcx() + .span_err(*span, format!("Expected to find operator {id:?} in parent")); + continue; + }; + let target_arg = interpreter.resolve(op_in_parent).unwrap(); + + // Subtract for 0, which is always the return value + let target_local = target_arg.local.as_usize() - 1; + + let Some(arg) = args_as_nodes.get(target_local) else { + let inst = driver.current_function(); + for fun in [inst.def_id(), def_id] { + let mut f = std::io::BufWriter::new( + std::fs::File::create(format!("{}.mir", tcx.def_path_str(fun))) + .unwrap(), + ); + use std::io::Write; + write!(f, "{:#?}", self.ctx().body_cache().get(fun).body()).unwrap(); + } + tcx.dcx().span_err( + *span, + format!( + "Expected argument node for field {} ({fty:?}) in {}, found none", + id.as_usize(), + tcx.def_path_str(def_id) + ), + ); + continue; + }; self.graph.add_edge( arg.to_index(), ridx.to_index(), @@ -678,6 +757,18 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { } } +fn place_ty_from_ty_and_projections<'tcx>( + tcx: TyCtxt<'tcx>, + ty: ty::Ty<'tcx>, + projection: &[mir::PlaceElem<'tcx>], +) -> mir::PlaceTy<'tcx> { + projection + .iter() + .fold(mir::PlaceTy::from_ty(ty), |place_ty, &elem| { + place_ty.projection_ty(tcx, elem) + }) +} + fn globalize_node<'tcx, K: Clone>( vis: &mut VisitDriver<'tcx, '_, K>, node: &DepNode<'tcx, OneHopLocation>, @@ -695,6 +786,70 @@ fn globalize_node<'tcx, K: Clone>( }, }, is_arg: node.use_.as_arg(), + same: is_pure_node(vis, node), + } +} + +fn place_is_idempotent_projection(place: mir::Place<'_>) -> bool { + use mir::PlaceElem; + place.projection.iter().all(|p| { + matches!( + p, + PlaceElem::Deref | PlaceElem::OpaqueCast(_) + ) + }) +} + +fn is_pure_node<'tcx, K: Clone>( + vis: &VisitDriver<'tcx, '_, K>, + node: &DepNode<'tcx, OneHopLocation>, +) -> bool { + use mir::{Operand, Rvalue::*, StatementKind as Sk}; + let RichLocation::Location(loc) = node.at.location else { + // Only argument and return nodes appear at these locations and they + // are never operationally changed. + return true; + }; + match vis.current_body().stmt_at(loc) { + Either::Left(mir::Statement { kind, .. }) => match kind { + Sk::FakeRead(..) + | Sk::StorageLive(..) + | Sk::StorageDead(..) + | Sk::Retag(..) + | Sk::PlaceMention(..) + | Sk::AscribeUserType(..) + | Sk::Coverage(..) + | Sk::ConstEvalCounter + | Sk::Nop + | Sk::BackwardIncompatibleDropHint { .. } => true, + Sk::Intrinsic(..) | Sk::SetDiscriminant { .. } => false, + Sk::Assign(a) => { + // This is a conservative match. If the place is projected out + // at all, we say it's not the same. In theory we could, for + // example, check whether it gets assigned to another projection + // on the same type and would admit more patterns. + + let place = match &a.1 { + Use(op) | Cast(_, op, _) => match op { + Operand::Copy(place) | Operand::Move(place) => place, + Operand::Constant(_) | Operand::RuntimeChecks(_) => return true, + }, + + Ref(_, _, place) | RawPtr(_, place) => place, + Aggregate(_, aggs) => { + return aggs + .iter() + .filter_map(|o| o.place()) + .all(place_is_idempotent_projection) + } + ThreadLocalRef(..) | WrapUnsafeBinder(..) => return true, + Repeat(..) | BinaryOp(..) | UnaryOp(..) + | Discriminant(..) | CopyForDeref(..) => return false, + }; + place_is_idempotent_projection(*place) + } + }, + _ => false, } } @@ -730,7 +885,14 @@ impl<'tcx, K: std::hash::Hash + Eq + Clone> Visitor<'tcx, K> for GraphAssembler< _is_at_start: bool, ) { let [parent_table, this_table] = self.nodes.last_chunk_mut().unwrap(); - this_table[in_this.index()] = parent_table[in_caller.index()] + let parent_idx = parent_table[in_caller.index()]; + this_table[in_this.index()] = parent_idx; + + // We now know that this function is being inlined so we clear the + // purity flag in the parent which would have been set there previously + // as the statements were being traversed. + let weight = self.graph.node_weight_mut(parent_idx.0).unwrap(); + weight.same = true } fn visit_node( @@ -797,3 +959,51 @@ impl<'tcx, K: std::hash::Hash + Eq + Clone> Visitor<'tcx, K> for GraphAssembler< ); } } + +struct StacktracePrinter<'tcx> { + tcx: TyCtxt<'tcx>, + locations_printed: u32, + target_depth: usize, +} + +impl<'tcx> StacktracePrinter<'tcx> { + fn new(tcx: TyCtxt<'tcx>, target_depth: u32) -> Self { + Self { + tcx, + locations_printed: 0, + target_depth: target_depth as usize, + } + } +} + +impl<'tcx> Visitor<'tcx, u32> for StacktracePrinter<'tcx> { + fn visit_partial_graph( + &mut self, + vis: &mut VisitDriver<'tcx, '_, u32>, + partial_graph: &PartialGraph<'tcx, u32>, + ) { + let call_string = vis.call_stack(); + if self.target_depth <= call_string.len() && self.locations_printed < 5 { + self.locations_printed += 1; + println!( + "Found deep graph for {}", + self.tcx.def_path_str(partial_graph.def_id()) + ); + for loc in call_string.iter().rev() { + let fun = loc.function; + + let span = self.tcx.def_span(fun); + let smap = &self.tcx.sess.source_map(); + let (_, start, ..) = smap.span_to_location_info(span); + println!( + " Called from {:100} {}:{start}", + self.tcx.def_path_str(fun), + smap.span_to_filename(span) + .display(rustc_span::RemapPathScopeComponents::empty()), + ) + } + println!(); + } + vis.visit_partial_graph(self, partial_graph); + } +} diff --git a/crates/paralegal-flow/src/ana/inline_judge.rs b/crates/paralegal-flow/src/ana/inline_judge.rs index 06bef2749f..e5824e1bc7 100644 --- a/crates/paralegal-flow/src/ana/inline_judge.rs +++ b/crates/paralegal-flow/src/ana/inline_judge.rs @@ -5,7 +5,7 @@ use paralegal_spdg::{Identifier, utils::write_sep}; use rustc_hir::def_id::{CrateNum, DefId}; use rustc_middle::ty::{ self, AssocKind, BoundVariableKind, Clause, ClauseKind, Instance, ProjectionPredicate, - TraitPredicate, TypingEnv, + TraitPredicate, TyCtxt, TypingEnv, }; use rustc_span::Span; use rustc_type_ir::{PredicatePolarity, TyKind}; @@ -87,7 +87,7 @@ impl<'tcx> InlineJudge<'tcx> { .marker_if_unloadable(marker_target_def_id) .is_some() => { - InlineJudgement::AbstractViaType("cannot unloadable item") + InlineJudgement::AbstractViaType("unloadable item") } _ if self.ctx.tcx().is_constructor(marker_target_def_id) => { // This is an enum constructor. It would be better to handle @@ -135,14 +135,20 @@ impl<'tcx> InlineJudge<'tcx> { InliningDepth::Unconstrained => InlineJudgement::Inline(false), }; if let InlineJudgement::AbstractViaType(reason) = judgement { - let emit_err = !(is_marked || self.ctx.opts().relaxed()); - self.ensure_is_safe_to_approximate( - info.param_env, - info.callee, - info.span, - emit_err, - reason, - ) + if !self + .marker_ctx() + .all_markers_associated_with(marker_target_def_id) + .any(|m| m.as_str().starts_with("std:")) + { + let emit_err = !(is_marked || self.ctx.opts().relaxed()); + self.ensure_is_safe_to_approximate( + info.param_env, + info.callee, + info.span, + emit_err, + reason, + ) + } } judgement } @@ -192,13 +198,34 @@ struct SafetyChecker<'tcx> { reason: &'static str, } +struct PpInst<'tcx> { + tcx: TyCtxt<'tcx>, + i: Instance<'tcx>, +} + +impl std::fmt::Display for PpInst<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.tcx.def_path_str(self.i.def_id()))?; + if !self.i.args.is_empty() { + write!(f, "<")?; + write_sep(f, ",", self.i.args, |a, fmt| write!(fmt, "{:?}", a))?; + write!(f, ">")?; + } + Ok(()) + } +} + impl<'tcx> SafetyChecker<'tcx> { /// Emit an error or a warning with some preformatted messaging. fn err(&self, s: &str, span: Span) { let sess = self.ctx.tcx().dcx(); let msg = format!( - "the call to {:?} is not safe to abstract as demanded by '{}', because of: {s}", - self.resolved, self.reason + "the call to {} is not safe to abstract as demanded by '{}', because of: {s}", + PpInst { + i: self.resolved, + tcx: self.ctx.tcx() + }, + self.reason ); if self.emit_err { let mut diagnostic = sess.struct_span_err(span, msg); @@ -213,11 +240,20 @@ impl<'tcx> SafetyChecker<'tcx> { /// Emit an error that mentions the `markers` found fn err_markers(&self, s: &str, markers: &[Identifier], span: Span) { - if !markers.is_empty() { + let mut markers = markers + .iter() + .filter(|m| !m.as_str().starts_with("std:")) + .peekable(); + if markers.peek().is_some() { self.err( &format!( "{s}: found marker(s) {}", - Print(|fmt| write_sep(fmt, ", ", markers, |elem, fmt| write!(fmt, "'{elem}'"))) + Print( + |fmt| write_sep(fmt, ", ", markers.clone(), |elem, fmt| write!( + fmt, + "'{elem}'" + )) + ) ), span, ); diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 58898f7538..bc9acaa001 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -41,6 +41,7 @@ use tracing::{debug, info}; mod graph_converter; mod inline_judge; +mod simple_interpreter; use std::time::Duration; diff --git a/crates/paralegal-flow/src/ana/simple_interpreter.rs b/crates/paralegal-flow/src/ana/simple_interpreter.rs new file mode 100644 index 0000000000..ae5c90fe2f --- /dev/null +++ b/crates/paralegal-flow/src/ana/simple_interpreter.rs @@ -0,0 +1,186 @@ +use rustc_middle::{ + mir::{self, visit, Operand, Place, Rvalue}, + ty::TyCtxt, +}; + +#[derive(Clone)] +enum InterpretationState<'tcx> { + Uninitialized, + Argument, + Set(Place<'tcx>), + Err(String), +} + +pub struct SimpleInterpreter<'tcx> { + tcx: TyCtxt<'tcx>, + locals: Vec>, +} + +impl<'tcx> SimpleInterpreter<'tcx> { + fn new(tcx: TyCtxt<'tcx>, num_args: usize, num_locals: usize) -> Self { + assert!(num_args < num_locals); + let mut locals = vec![InterpretationState::Uninitialized; num_locals]; + for i in 0..(num_args + 1) { + locals[i] = InterpretationState::Argument; + } + Self { tcx, locals } + } + + pub fn resolve(&self, place: Place<'tcx>) -> Result, String> { + match self.locals.get(place.local.as_usize()) { + Some(InterpretationState::Argument) => Ok(place), + Some(InterpretationState::Set(p)) => self + .resolve(*p) + .map(|p| p.project_deeper(place.projection, self.tcx)), + Some(InterpretationState::Err(e)) => Err(e.clone()), + _ => Err(format!("Uninitialized local {place:?}")), + } + } + + pub fn interpret_body(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) -> Result { + let mut interpreter = SimpleInterpreter::new(tcx, body.arg_count, body.local_decls.len()); + use visit::Visitor; + interpreter.visit_body(body); + Ok(interpreter) + } +} + +impl<'tcx> visit::Visitor<'tcx> for SimpleInterpreter<'tcx> { + fn visit_assign( + &mut self, + place: &Place<'tcx>, + rvalue: &Rvalue<'tcx>, + location: mir::Location, + ) { + let state = &mut self.locals[place.local.as_usize()]; + match rvalue { + Rvalue::Use(Operand::Move(place)) | Rvalue::Use(Operand::Copy(place)) => match state { + InterpretationState::Uninitialized => { + *state = InterpretationState::Set(*place); + } + InterpretationState::Set(prior) => { + *state = InterpretationState::Err(format!("Duplicate assignment to local {place:?} at {location:?}. Previous assignment {prior:?}")); + } + _ => (), + }, + _ => { + *state = InterpretationState::Err(format!("Unsupported rvalue: {rvalue:?}")); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rustc_abi::FieldIdx; + use rustc_borrowck::consumers::BodyWithBorrowckFacts; + use rustc_middle::mir::{self, ProjectionElem}; + use paralegal_rustc_utils::test_utils::CompileBuilder; + use std::collections::HashMap; + + fn clear_projection_elem(e: ProjectionElem) -> ProjectionElem<(), ()> { + match e { + ProjectionElem::Deref => ProjectionElem::Deref, + ProjectionElem::Field(f, _) => ProjectionElem::Field(f, ()), + ProjectionElem::Index(_) => ProjectionElem::Index(()), + ProjectionElem::ConstantIndex { + offset, + min_length, + from_end, + } => ProjectionElem::ConstantIndex { + offset, + min_length, + from_end, + }, + ProjectionElem::Subslice { from, to, from_end } => { + ProjectionElem::Subslice { from, to, from_end } + } + ProjectionElem::Downcast(ty, variant_index) => { + ProjectionElem::Downcast(ty, variant_index) + } + ProjectionElem::OpaqueCast(..) => ProjectionElem::OpaqueCast(()), + ProjectionElem::UnwrapUnsafeBinder(_) => ProjectionElem::UnwrapUnsafeBinder(()), + } + } + + fn projections_from_string(s: &str) -> Vec> { + s.split_whitespace() + .map(|s| match s { + _ if s.starts_with('.') => { + ProjectionElem::Field(FieldIdx::from_u32(s[1..].parse().unwrap()), ()) + } + "*" => ProjectionElem::Deref, + _ => panic!("Invalid projection element: {s}"), + }) + .collect() + } + + struct TestHelper<'tcx> { + name_map: HashMap<&'tcx str, Place<'tcx>>, + tcx: TyCtxt<'tcx>, + body: &'tcx BodyWithBorrowckFacts<'tcx>, + interpreter: SimpleInterpreter<'tcx>, + } + + impl<'tcx> TestHelper<'tcx> { + fn new(tcx: TyCtxt<'tcx>, body: &'tcx BodyWithBorrowckFacts<'tcx>) -> Self { + let name_map = body + .body + .var_debug_info + .iter() + .filter_map(|info| { + if let mir::VarDebugInfoContents::Place(p) = info.value { + Some((info.name.as_str(), p)) + } else { + None + } + }) + .collect::>(); + let interpreter = SimpleInterpreter::interpret_body(tcx, &body.body).unwrap(); + Self { + tcx, + body, + interpreter, + name_map, + } + } + + fn assert_resolves_to(&self, name: &str, base: &str, projections: &str) { + let start = self.name_map[name]; + let base = self.name_map[base]; + let resolved = self.interpreter.resolve(start).unwrap(); + assert_eq!(resolved.local, base.local); + assert_eq!( + resolved + .projection + .iter() + .map(clear_projection_elem) + .collect::>(), + base.projection + .iter() + .map(clear_projection_elem) + .chain(projections_from_string(projections)) + .collect::>() + ); + } + } + + #[test] + #[ignore = "Somehow this can't load MIR bodies???"] + fn test_simple_interpreter() { + CompileBuilder::new(stringify! { + fn foo(x: ((i32, i32), i32)) { + let y = x.0; + let z = y.0; + } + }) + .compile(|result| { + let (_, body) = result.as_body(); + let helper = TestHelper::new(result.tcx, body); + helper.assert_resolves_to("y", "x", ".0"); + helper.assert_resolves_to("z", "x", ".0 .0"); + }) + .unwrap(); + } +} diff --git a/crates/paralegal-flow/src/ann/db/mod.rs b/crates/paralegal-flow/src/ann/db/mod.rs index 21be999b75..741758c15f 100644 --- a/crates/paralegal-flow/src/ann/db/mod.rs +++ b/crates/paralegal-flow/src/ann/db/mod.rs @@ -16,9 +16,7 @@ use crate::{ args::{Args, Stub}, utils::{ self, IntoDefId, is_function_like, - resolve::{ - expect_resolve_string_to_def_id, report_resolution_err, resolve_string_to_def_id, - }, + resolve::{self, expect_resolve_string_to_def_id, resolve_string_to_def_id}, }, }; use flowistry_pdg_construction::source_access::{ @@ -28,6 +26,8 @@ use itertools::Itertools; use paralegal_flowistry::mir::FlowistryInput; use paralegal_spdg::{Identifier, TypeId}; +pub use paralegal_spdg::AutoMarkers; + use paralegal_rustc_utils::cache::Cache; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashSet; @@ -474,44 +474,6 @@ impl ItemMarkers { } } -pub struct AutoMarkers { - pub side_effect_unknown_virtual: Identifier, - pub side_effect_foreign: Identifier, - pub side_effect_unknown_fn_ptr: Identifier, - pub side_effect_raw_ptr: Identifier, - pub side_effect_transmute: Identifier, - pub side_effect_unknown: Identifier, - pub side_effect_intrinsic: Identifier, -} - -impl Default for AutoMarkers { - fn default() -> Self { - AutoMarkers { - side_effect_unknown_virtual: Identifier::new_intern("auto:side-effect:unknown:virtual"), - side_effect_foreign: Identifier::new_intern("auto:side-effect:foreign"), - side_effect_unknown_fn_ptr: Identifier::new_intern("auto:side-effect:unknown:fn-ptr"), - side_effect_raw_ptr: Identifier::new_intern("auto:side-effect:raw-ptr"), - side_effect_transmute: Identifier::new_intern("auto:side-effect:transmute"), - side_effect_unknown: Identifier::new_intern("auto:side-effect:unknown"), - side_effect_intrinsic: Identifier::new_intern("auto:side-effect:intrinsic"), - } - } -} - -impl AutoMarkers { - pub fn all(&self) -> [Identifier; 7] { - [ - self.side_effect_unknown_virtual, - self.side_effect_foreign, - self.side_effect_unknown_fn_ptr, - self.side_effect_raw_ptr, - self.side_effect_transmute, - self.side_effect_unknown, - self.side_effect_intrinsic, - ] - } -} - pub struct FunctionMarkerStat<'tcx> { pub function: MaybeMonomorphized<'tcx>, pub is_constructor: bool, @@ -574,10 +536,10 @@ impl<'tcx> MarkerDatabase<'tcx> { .build_config() .stubs .iter() - .filter_map(|(k, v)| { - let res = expect_resolve_string_to_def_id(tcx, k, args.relaxed()); - let res = if args.relaxed() { res? } else { res.unwrap() }; - Some((res, v)) + .flat_map(|(k, v)| { + expect_resolve_string_to_def_id(tcx, k, args.relaxed()) + .into_iter() + .zip(std::iter::repeat(v)) }) .collect(); let included_crates = Rc::new(args.anactrl().inclusion_predicate(tcx)); @@ -595,6 +557,11 @@ impl<'tcx> MarkerDatabase<'tcx> { for ann in anns { match ann { Annotation::Marker(r) => { + trace!( + "Assigning marker {} to {}", + r.marker, + tcx.def_path_str(item) + ); for s in refinement_to_selectors(r.refinement) { markers .entry(item) @@ -783,8 +750,7 @@ fn parse_external_marker_file(s: &str) -> anyhow::Result { /// Given the TOML of external annotations we have parsed, resolve the paths /// (keys of the map) to [`DefId`]s. fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { - //let relaxed = opts.relaxed(); - let relaxed = false; + let relaxed = opts.relaxed(); if let Some(annotation_file) = opts.marker_control().external_annotations() { let data = std::fs::read_to_string(annotation_file).unwrap_or_else(|_| { panic!( @@ -806,21 +772,23 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { let new_map: ExternalMarkers = from_toml .iter() - .filter_map(|(path, entries)| { + .flat_map(|(path, entries)| { let res = resolve_string_to_def_id(tcx, path); let must_succeed = entries .iter() .any(|entry| !entry.refinement._internal_can_fail_resolve_silently); - let def_id = match res { - Err(e) if !must_succeed => { + let def_ids = res.unwrap_or_else(|e| { + if !must_succeed { trace!("Failed to resolve path {}: {:?}", path, e); - return None; + } else { + tcx.dcx() + .err(format!("Failed to resolve path {}: {:?}", path, e)); } - _ => report_resolution_err(tcx, path, relaxed, res)?, - }; - Some((def_id, entries)) + vec![] + }); + def_ids.into_iter().zip(std::iter::repeat(entries)) }) - .flat_map(|(def_id, entries)| { + .flat_map(|(res, entries)| { let on_module_children = entries .iter() .fold(None, |acc, entry| { @@ -829,7 +797,7 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { }) { tcx.dcx().err(format!( "Conflicting use of `on_all_module_children` on {}", - tcx.def_path_str(def_id) + res.as_string(tcx) )); } Some( @@ -838,20 +806,34 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { ) }) .unwrap_or(false); - let def_kind = tcx.def_kind(def_id); - let only_self = [def_id]; + let only_self = match res { + resolve::Res::Def(_, id) => Box::new([id]) as Box<[_]>, + _ => Box::new([]), + }; let def_ids = if on_module_children { - let defs = match def_kind { - DefKind::Struct | DefKind::Enum => tcx.inherent_impls(def_id), - DefKind::Mod | DefKind::Impl { .. } => &only_self, - _ => panic!( - "Expected module-like def kind for {}, got {def_kind:?}", - tcx.def_path_str(def_id) - ), + let defs = match res { + resolve::Res::PrimTy(t) => tcx.incoherent_impls(t), + resolve::Res::Def(_, id) => { + let def_kind =tcx.def_kind(id); + if !matches!(def_kind, DefKind::Impl {..} | DefKind::Struct | DefKind::Enum | DefKind::Mod | DefKind::Trait) { + tcx.dcx().err(format!("on_all_module_children used on a non-module-like item {}, has kind {def_kind:?}", tcx.def_path_str(id))); + }; + &only_self + }, }; + utils::flatten_child_items(tcx, defs.iter().copied()) } else { - [def_id].into_iter().collect() + match res { + resolve::Res::Def(_, id) => Some(id), + resolve::Res::PrimTy(t) => { + tcx.dcx() + .err(format!("Cannot assign markers to primitive type {t:?}")); + None + } + } + .into_iter() + .collect() }; def_ids.into_iter().flat_map(|def_id| { entries.iter().map(move |entry| { @@ -860,7 +842,22 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { }) }) }) - .collect(); + .into_grouping_map() + .reduce(|mut one: Vec<_>, _, mut other| { + one.extend(other.drain(..)); + one + }); + // let mut f = std::io::BufWriter::new( + // std::fs::File::create("computed-external-marker-assignments.txt").unwrap(), + // ); + // use std::io::Write; + // for (item, markers) in new_map.iter() { + // write!(f, "{:20} : ", tcx.def_path_str(item)).unwrap(); + // for ann in markers.iter() { + // write!(f, "{}, ", ann.marker).unwrap(); + // } + // writeln!(f).unwrap(); + // } new_map } else { HashMap::new() diff --git a/crates/paralegal-flow/src/ann/db/reachable.rs b/crates/paralegal-flow/src/ann/db/reachable.rs index 86c4816e5a..58956d94cf 100644 --- a/crates/paralegal-flow/src/ann/db/reachable.rs +++ b/crates/paralegal-flow/src/ann/db/reachable.rs @@ -36,6 +36,10 @@ impl<'tcx> MarkerCtx<'tcx> { pub fn get_reachable_markers(&self, res: impl Into>) -> &[Identifier] { let res = res.into(); let def_id = res.def_id(); + trace!( + "Checking reachable markers for {}", + self.tcx().def_path_str(def_id) + ); let mark_side_effects = self.db().config.marker_control().mark_side_effects(); if self.is_marked(def_id) { trace!(" Is marked"); @@ -75,7 +79,10 @@ impl<'tcx> MarkerCtx<'tcx> { } let non_direct = (!is_self_marked).then(|| self.get_reachable_markers(res)); + let filter_std_markers = self.db().config.marker_control().elide_on_whitelist_markers(); + direct_markers.chain(non_direct.into_iter().flatten().copied()) + .filter(move |m| !filter_std_markers || !(m.as_str().starts_with("std:") || m.as_str().starts_with("safe-libs:"))) } /// If the transitive marker cache did not contain the answer, this is what @@ -120,6 +127,7 @@ impl<'tcx> MarkerCtx<'tcx> { use mir::visit::Visitor; vis.visit_body(&mono_body); let found = vis.found_markers; + trace!("Found markers {found:?}"); found.into_iter().collect() } @@ -211,7 +219,7 @@ impl<'tcx> MarkerCtx<'tcx> { // because we are not given the closure function, instead // we are provided the id of the function that creates the // future. As such we can't just monomorphize and traverse, - // we have to find the generator first. + // we have to find the generator firstparalegal_. if let ty::TyKind::Alias(alias) = local_decls[mir::RETURN_PLACE].ty.kind() && matches!(alias.kind, ty::AliasTyKind::Opaque { .. }) && let ty::AliasTyKind::Opaque { def_id, .. } = alias.kind diff --git a/crates/paralegal-flow/src/ann/parse.rs b/crates/paralegal-flow/src/ann/parse.rs index 7452c7d29e..5b7af48f30 100644 --- a/crates/paralegal-flow/src/ann/parse.rs +++ b/crates/paralegal-flow/src/ann/parse.rs @@ -505,7 +505,7 @@ pub(crate) fn otype_ann_match(ann: &hir::AttrArgs, tcx: TyCtxt) -> Result FxHashSet { diff --git a/crates/paralegal-flow/src/args.rs b/crates/paralegal-flow/src/args.rs index 8a54cf2233..ca00a08428 100644 --- a/crates/paralegal-flow/src/args.rs +++ b/crates/paralegal-flow/src/args.rs @@ -251,6 +251,27 @@ pub struct AnalysisCtrl { included_crate_cache: OnceLock>, no_pdg_cache: bool, include_std: bool, + fail_on_deep_call_chain: Option, +} + +impl std::hash::Hash for AnalysisCtrl { + fn hash(&self, state: &mut H) { + let Self { + analyze, + inlining_depth, + include, + included_crate_cache: _, + no_pdg_cache, + include_std, + fail_on_deep_call_chain + } = self; + analyze.hash(state); + inlining_depth.hash(state); + include.hash(state); + no_pdg_cache.hash(state); + include_std.hash(state); + fail_on_deep_call_chain.hash(state); + } } impl Default for AnalysisCtrl { @@ -262,6 +283,7 @@ impl Default for AnalysisCtrl { no_pdg_cache: false, included_crate_cache: OnceLock::new(), include_std: false, + fail_on_deep_call_chain: None, } } } @@ -277,6 +299,7 @@ impl TryFrom for AnalysisCtrl { no_adaptive_approximation, k_depth, include_std, + fail_on_deep_call_chain } = value; let inlining_depth = if no_interprocedural_analysis { @@ -294,11 +317,12 @@ impl TryFrom for AnalysisCtrl { no_pdg_cache, included_crate_cache: OnceLock::new(), include_std, + fail_on_deep_call_chain }) } } -#[derive(strum::EnumIs, strum::AsRefStr, Clone)] +#[derive(strum::EnumIs, strum::AsRefStr, Clone, Hash)] pub enum InliningDepth { /// Inline to arbitrary depth Unconstrained, @@ -390,6 +414,10 @@ impl AnalysisCtrl { pub fn include_std(&self) -> bool { self.include_std } + + pub fn fail_on_deep_call_chain(&self) -> Option { + self.fail_on_deep_call_chain + } } impl DumpArgs { diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 82de7cbb23..55e37a99e8 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -66,8 +66,8 @@ impl<'tcx> CollectingVisitor<'tcx> { .anactrl() .selected_targets() .iter() - .filter_map(|path| { - let def_id = expect_resolve_string_to_def_id(tcx, path, opts.relaxed())?; + .flat_map(|path| expect_resolve_string_to_def_id(tcx, path, opts.relaxed())) + .filter_map(|def_id| { if !def_id.is_local() { tcx.dcx().span_err(tcx.def_span(def_id), format!("found an external function {def_id:?} as analysis target. Analysis targets are required to be local.")); return None; diff --git a/crates/paralegal-flow/src/main.rs b/crates/paralegal-flow/src/main.rs index a69dc4a862..bc293917ca 100644 --- a/crates/paralegal-flow/src/main.rs +++ b/crates/paralegal-flow/src/main.rs @@ -35,10 +35,11 @@ impl VersionArgs { } let second = chars.next(); if second == Some('-') { - self.handle_long_arg(&a[2..]) - } - for c in second.into_iter().chain(chars) { - self.handle_short_arg(c); + self.handle_long_arg(&a[2..]); + } else { + for c in second.into_iter().chain(chars) { + self.handle_short_arg(c); + } } } diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index b86a943482..4ec603f375 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -29,7 +29,7 @@ use std::{ use paralegal_spdg::{ DefInfo, DisplayPath, EdgeInfo, Endpoint, FileSystemStorable, InstructionInfo, InstructionKind, Node, NodeInfo, NodeKind, ParalegalArtifact, SPDG, TypeId, - traverse::{EdgeSelection, generic_flows_to, generic_influencers}, + traverse::{EdgeSelection, generic_flows_to, generic_influencers, edge_generic_flows_to}, utils::{display_list, write_sep}, }; @@ -945,6 +945,22 @@ impl<'g> CtrlRef<'g> { } } + pub fn arguments_by_index(&'g self) -> Vec> { + let mut args = vec![]; + for n in self.ctrl.arguments.iter() { + let w = self.ctrl.graph.node_weight(*n).unwrap(); + let Some(a) = w.is_arg else { continue }; + if (a as usize) >= args.len() { + args.resize_with((a as usize) + 1, || NodeRefs { + graph: self, + nodes: vec![], + }); + } + args[a as usize].nodes.push(*n); + } + args + } + pub fn constants(&'g self) -> impl Iterator, Constant)> { let spdg = self.spdg(); spdg.all_sources().filter_map(|node| { @@ -1008,6 +1024,40 @@ impl<'g> CtrlRef<'g> { let auto = auto_markers.all(); auto.into_iter().flat_map(|m| self.marked(m)) } + + pub fn show_side_effects(&self, show_trace: bool) { + let auto_markers = AutoMarkers::default(); + let auto = auto_markers.all(); + for m in auto { + let marked = self.marked(m); + if !marked.is_empty() { + println!("Side effect {m}"); + } + for n in marked { + let d = DisplayPath::from( + &self.graph().desc.def_info[&n.info().at.leaf().function].path, + ); + println!( + "{} in {} in {}", + n.info().kind, + n.instruction_info().description, + d + ); + if !show_trace { + continue; + } + for loc in n.info().at.iter() { + let i_info = &self.graph.desc.instruction_info[&loc]; + let d_info = &self.graph().desc.def_info[&loc.function]; + println!( + " called from {} {}", + DisplayPath::from(&d_info.path), + i_info.span + ); + } + } + } + } } impl<'g> HasGraph<'g> for &FnRef<'g> { @@ -1365,6 +1415,27 @@ pub trait FlowsTo { .copied() .any(|n| self.spdg().graph.neighbors(n).next().is_some()) } + + fn flows_to_unchanged(&self, other: &impl FlowsTo) -> bool { + if self.spdg_ident() != other.spdg_ident() { + return false; + } + + let graph = &self.spdg().graph; + + let fgraph = petgraph::visit::NodeFiltered::from_fn(graph, |n| { + graph.node_weight(n).unwrap().same + || self.nodes().contains(&n) + || other.nodes().contains(&n) + }); + + generic_flows_to( + self.nodes().iter().copied(), + &fgraph, + other.nodes().iter().copied(), + ) + .is_some() + } } fn influences_ctrl_impl( @@ -1382,7 +1453,7 @@ fn influences_ctrl_impl( EdgeSelection::Control, ); - generic_flows_to( + edge_generic_flows_to( slf.nodes().iter().copied(), edge_selection, slf.spdg(), @@ -1426,7 +1497,7 @@ fn flows_to_impl( if slf.spdg_ident() != other.spdg_ident() { return false; } - generic_flows_to( + edge_generic_flows_to( slf.nodes().iter().copied(), edge_selection, slf.spdg(), diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 074c89f055..a2eb6de31c 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -5,7 +5,7 @@ use flowistry_pdg::RichLocation; use flowistry_pdg_construction::{is_async_trait_fn, source_access::BodyCache, utils::type_as_fn}; use paralegal_flowistry::mir::FlowistryInput; use thiserror::Error; -use tracing::info; +use tracing::{info, trace}; use crate::{Either, Symbol, TyCtxt, desc::Identifier, rustc_span::ErrorGuaranteed}; pub use flowistry_pdg_construction::utils::is_virtual; @@ -794,21 +794,49 @@ pub fn flatten_child_items( ) -> FxHashSet { use rustc_hir::def::DefKind; let mut queue: Vec<_> = modules.into_iter().collect(); + trace!("flatten_child_items starting with: {:?}", queue); let mut seen = FxHashSet::default(); seen.extend(queue.iter().cloned()); let mut result = FxHashSet::default(); while let Some(module) = queue.pop() { - let children = match tcx.def_kind(module) { - DefKind::Mod => Box::new( - if let Some(local) = module.as_local() { + let def_kind = tcx.def_kind(module); + trace!( + "Processing item: {} with def kind {:?}", + tcx.def_path_str(module), + def_kind + ); + let children = match def_kind { + DefKind::Mod => { + let it = if let Some(local) = module.as_local() { tcx.module_children_local(local) } else { tcx.module_children(module) } .iter() - .filter_map(|c| c.res.opt_def_id()), - ) as Box>, + .filter_map(|c| c.res.opt_def_id()) + .filter(|c| { + let parent = tcx.opt_parent(*c); + assert!( + parent.is_some() || c.is_crate_root(), + "{c:?} has no parent but isn't a crate (is {:?})", + tcx.def_kind(*c) + ); + parent.is_some_and(|parent| parent == module) + }) + .chain( + // Trait impls are not contained in `module_children` where + // they are defined, but instead associated with the crate + // itself. + module + .as_crate_root() + .map(|c| tcx.trait_impls_in_crate(c)) + .into_iter() + .flatten() + .copied(), + ); + Box::new(it) as Box> + } DefKind::Impl { .. } => Box::new( tcx.associated_items(module) .in_definition_order() @@ -817,10 +845,18 @@ pub fn flatten_child_items( DefKind::Struct | DefKind::Enum => { Box::new(tcx.inherent_impls(module).iter().copied()) as Box<_> } - _ => continue, + _ => { + continue; + } }; for id in children { - match tcx.def_kind(id) { + let def_kind = tcx.def_kind(id); + trace!( + "Processing child item: {} with def kind {:?}", + tcx.def_path_str(id), + def_kind + ); + match def_kind { DefKind::Struct | DefKind::Enum | DefKind::Mod | DefKind::Impl { .. } if !seen.contains(&id) => { diff --git a/crates/paralegal-flow/src/utils/resolve.rs b/crates/paralegal-flow/src/utils/resolve.rs index 1376cb663e..9bca5b3f62 100644 --- a/crates/paralegal-flow/src/utils/resolve.rs +++ b/crates/paralegal-flow/src/utils/resolve.rs @@ -1,4 +1,4 @@ -use std::hash::Hash; +use std::{hash::Hash, panic}; use ast::Mutability; use hir::{ @@ -8,17 +8,18 @@ use hir::{ def_id::LOCAL_CRATE, def_id::LocalDefId, }; -use rustc_ast::{self as ast, ExprKind, PathSegment, QSelf, Ty, TyKind, token::TokenKind}; +use rustc_ast::{self as ast, ExprKind, GenericBound, PathSegment, QSelf, TraitObjectSyntax, Ty, TyKind, token::TokenKind}; use rustc_data_structures::stable_hasher::StableHasher; use rustc_hir::{self as hir, def_id::DefId}; use rustc_middle::ty::{self, FloatTy, IntTy, TyCtxt, UintTy, fast_reject::SimplifiedType}; use rustc_parse::{lexer::StripTokens, new_parser_from_source_str}; use rustc_span::Symbol; +use tracing::{trace, warn}; #[derive(Debug, Clone, Copy)] pub enum Res { Def(DefKind, DefId), - PrimTy(PrimTy), + PrimTy(SimplifiedType), } #[derive(Clone, Debug)] @@ -50,7 +51,7 @@ impl Res { fn from_def_res(res: def::Res) -> Result { match res { def::Res::Def(k, i) => Ok(Res::Def(k, i)), - def::Res::PrimTy(t) => Ok(Res::PrimTy(t)), + def::Res::PrimTy(t) => Ok(Res::PrimTy(prim_ty_to_simp_ty(t))), other => Err(ResolutionError::UnconvertibleRes(other)), } } @@ -62,6 +63,24 @@ impl Res { panic!("Not a def") } } + + pub fn as_string(&self, tcx: TyCtxt<'_>) -> String { + match self { + Res::Def(_, id) => tcx.def_path_str(*id), + Res::PrimTy(ty) => format!("{ty:?}"), + } + } +} + +fn prim_ty_to_simp_ty(pt: PrimTy) -> SimplifiedType { + match pt { + PrimTy::Bool => SimplifiedType::Bool, + PrimTy::Char => SimplifiedType::Char, + PrimTy::Str => SimplifiedType::Str, + PrimTy::Int(i) => SimplifiedType::Int(i.into()), + PrimTy::Uint(u) => SimplifiedType::Uint(u.into()), + PrimTy::Float(f) => SimplifiedType::Float(f.into()), + } } fn as_primitive_ty(name: Symbol) -> Option { @@ -103,7 +122,7 @@ fn find_primitive_impls(tcx: TyCtxt<'_>, name: Symbol) -> impl Iterator Option { +pub fn expect_resolve_string_to_def_id(tcx: TyCtxt, path: &str, relaxed: bool) -> Vec { report_resolution_err(tcx, path, relaxed, resolve_string_to_def_id(tcx, path)) } @@ -111,8 +130,8 @@ pub fn report_resolution_err( tcx: TyCtxt<'_>, path: &str, relaxed: bool, - obj: Result, -) -> Option { + obj: Result>, +) -> Vec { let report_err = if relaxed { |tcx: TyCtxt<'_>, err: String| { tcx.dcx().warn(err); @@ -122,20 +141,25 @@ pub fn report_resolution_err( tcx.dcx().err(err); } }; - let res = obj + let Some(res) = obj .map_err(|e| report_err(tcx, format!("Could not resolve {path}: {e:?}"))) - .ok()?; - match res { - Res::Def(_, did) => Some(did), - other => { - let msg = format!("expected {path} to resolve to an item, got {other:?}"); - report_err(tcx, msg); - None - } - } + .ok() + else { + return vec![]; + }; + res.into_iter() + .filter_map(|res| match res { + Res::Def(_, did) => Some(did), + other => { + let msg = format!("expected {path} to resolve to an item, got {other:?}"); + report_err(tcx, msg); + None + } + }) + .collect() } -pub fn resolve_string_to_def_id(tcx: TyCtxt, path: &str) -> Result { +pub fn resolve_string_to_def_id(tcx: TyCtxt, path: &str) -> Result> { let mut hasher = StableHasher::new(); path.hash(&mut hasher); let mut parser = new_parser_from_source_str( @@ -242,16 +266,20 @@ fn find_crates(tcx: TyCtxt<'_>, name: Symbol) -> impl Iterator + ' fn resolve_ty<'tcx>(tcx: TyCtxt<'tcx>, t: &Ty) -> Result> { match &t.kind { TyKind::Path(qslf, pth) => { - let adt = def_path_res(tcx, qslf.as_deref(), pth.segments.as_slice())?; + let results = def_path_res(tcx, qslf.as_deref(), pth.segments.as_slice())?; + let [adt] = results.as_slice() else { + return Err(ResolutionError::UnsupportedType(t.clone())); + }; Ok(match adt { - Res::Def(_, did) => ty::Ty::new_adt(tcx, tcx.adt_def(did), ty::List::empty()), - Res::PrimTy(t) => match t { - PrimTy::Bool => tcx.types.bool, - PrimTy::Char => tcx.types.char, - PrimTy::Str => tcx.types.str_, - PrimTy::Int(i) => ty::Ty::new_int(tcx, i), - PrimTy::Uint(u) => ty::Ty::new_uint(tcx, u), - PrimTy::Float(f) => ty::Ty::new_float(tcx, f), + Res::Def(_, did) => ty::Ty::new_adt(tcx, tcx.adt_def(*did), ty::List::empty()), + Res::PrimTy(simp) => match simp { + SimplifiedType::Bool => tcx.types.bool, + SimplifiedType::Char => tcx.types.char, + SimplifiedType::Str => tcx.types.str_, + SimplifiedType::Int(i) => ty::Ty::new_int(tcx, *i), + SimplifiedType::Uint(u) => ty::Ty::new_uint(tcx, *u), + SimplifiedType::Float(f) => ty::Ty::new_float(tcx, *f), + _ => return Err(ResolutionError::UnsupportedType(t.clone())), }, }) } @@ -262,6 +290,44 @@ fn resolve_ty<'tcx>(tcx: TyCtxt<'tcx>, t: &Ty) -> Result> { .collect::>>()? .as_ref(), )), + TyKind::Array(inner, const_) => { + if !matches!(inner.kind, TyKind::Infer) { + tcx.dcx() + .warn(format!("Ignoring non-wildcard slice type {inner:?}")); + } + if !matches!(const_.value.kind, rustc_ast::ExprKind::Underscore) { + tcx.dcx() + .warn(format!("Ignoring const argument {const_:?} in array type")); + } + let our_len = 42; + warn!("Array types may not behave properly with instance markers because we always use the instance of arrays of length {our_len}"); + Ok(ty::Ty::new_array( + tcx, + ty::Ty::new_param(tcx, 0, Symbol::intern("T")), + our_len, + )) + } + TyKind::Slice(inner) => { + if !matches!(inner.kind, TyKind::Infer) { + tcx.dcx() + .warn(format!("Ignoring non-wildcard slice type {inner:?}")); + } + Ok(ty::Ty::new_slice( + tcx, + ty::Ty::new_param(tcx, 0, Symbol::intern("T")), + )) + } + TyKind::Ref(lt, mt) => { + if lt.is_some() { + tcx.dcx().warn("Ignoring lifetimes in reference types"); + } + let inner = resolve_ty(tcx, &mt.ty)?; + let lt = ty::Region::new_var(tcx, ty::RegionVid::ZERO); + Ok(match mt.mutbl { + hir::Mutability::Mut => ty::Ty::new_mut_ref(tcx, lt, inner), + hir::Mutability::Not => ty::Ty::new_imm_ref(tcx, lt, inner), + }) + } _ => Err(ResolutionError::UnsupportedType(t.clone())), } } @@ -290,7 +356,8 @@ fn resolve_ty<'tcx>(tcx: TyCtxt<'tcx>, t: &Ty) -> Result> { /// non-deterministically. /// 2. It probably cannot resolve a qualified path if the base is a primitive /// type. E.g. `usize::abs_diff` resolves but `::abs_diff` does not. -pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> Result { +pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> Result> { + trace!("Resolving {qself:?} {path:?}"); let no_generics_supported = |segment: &PathSegment| { if segment.args.is_some() { tcx.dcx().err(format!( @@ -303,9 +370,14 @@ pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> [primitive] => { /* Start here for issue 1 */ let sym = primitive.ident.name; - return PrimTy::from_name(sym) - .map(Res::PrimTy) - .ok_or(ResolutionError::CannotResolvePrimitiveType(sym)); + if let Some(sim_ty) = as_primitive_ty(sym) { + return Ok(vec![Res::PrimTy(sim_ty)]); + } else { + ( + Box::new(find_crates(tcx, sym)) as Box>, + &[], + ) + } } [base, path @ ..] => { /* This is relevant for issue 2 */ @@ -331,31 +403,52 @@ pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> Some(slf) => ( if slf.position == 0 { /* this is relevant for 3 */ - let TyKind::Path(qslf, pth) = &slf.ty.kind else { - return Err(ResolutionError::UnsupportedType((*slf.ty).clone())); - }; - let ty = def_path_res(tcx, qslf.as_deref(), &pth.segments)?; - let impls = tcx.inherent_impls(ty.def_id()); - Box::new(impls.iter().copied()) as Box<_> + let t = &*slf.ty; + match &slf.ty.kind { + TyKind::Path(qslf, pth) => Box::new( + def_path_res(tcx, qslf.as_deref(), &pth.segments)? + .into_iter() + .flat_map(|def_id| tcx.inherent_impls(def_id.def_id()).iter().copied()), + ) + as Box>, + TyKind::TraitObject(bounds, TraitObjectSyntax::Dyn) => { + let [GenericBound::Trait(t_ref)] = bounds.as_slice() else { + return Err(ResolutionError::UnsupportedType(t.clone())); + }; + let resolved_trait = + def_path_res(tcx, None, &t_ref.trait_ref.path.segments)?; + let any_sym = Symbol::intern("Any"); + let Some(t) = resolved_trait + .iter() + .find(|t| tcx.is_diagnostic_item(any_sym, t.def_id())) + else { + return Err(ResolutionError::UnsupportedType(t.clone())); + }; + let impls = tcx.inherent_impls(t.def_id()); + Box::new(impls.iter().cloned()) as Box<_> + } + _ => return Err(ResolutionError::UnsupportedType((*slf.ty).clone())), + } } else { - let r#trait = def_path_res(tcx, None, &path[..slf.position])?; - let r#type = resolve_ty(tcx, &slf.ty)?; let mut impls = vec![]; - /* This is relevant for issue 2 */ - tcx.for_each_relevant_impl(r#trait.def_id(), r#type, |i| impls.push(i)); - if impls.is_empty() { - if tcx - .lang_items() - .clone_fn() - .is_some_and(|c| c == r#trait.def_id()) - && r#type.is_unit() - { - tcx.dcx().err("Cannot assign markers to the Clone instance of (), is not a real function"); + for r#trait in def_path_res(tcx, None, &path[..slf.position])? { + let r#type = resolve_ty(tcx, &slf.ty)?; + /* This is relevant for issue 2 */ + tcx.for_each_relevant_impl(r#trait.def_id(), r#type, |i| impls.push(i)); + if impls.is_empty() { + if tcx + .lang_items() + .clone_fn() + .is_some_and(|c| c == r#trait.def_id()) + && r#type.is_unit() + { + tcx.dcx().err("Cannot assign markers to the Clone instance of (), is not a real function"); + } + return Err(ResolutionError::NoImplsFound( + r#trait.def_id(), + (*slf.ty).clone(), + )); } - return Err(ResolutionError::NoImplsFound( - r#trait.def_id(), - (*slf.ty).clone(), - )); } Box::new(impls.into_iter()) as Box<_> }, @@ -364,45 +457,59 @@ pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> }; let starts = starts.collect::>(); + trace!("starts: {:?}", starts); let starts = starts.into_iter(); - let mut last = Err(if let Some(f) = path.first() { + let empty_err = Err(if let Some(f) = path.first() { ResolutionError::CouldNotResolveCrate(f.ident.name) } else { ResolutionError::EmptyStarts }); - for first in starts { - last = path - .iter() - // for each segment, find the child item - .try_fold(Res::Def(tcx.def_kind(first), first), |res, segment| { - no_generics_supported(segment); - let segment = segment.ident.name; - let def_id = res.def_id(); - if let Some(item) = item_child_by_name(tcx, def_id, segment) { - item - } else if matches!(res, Res::Def(DefKind::Enum | DefKind::Struct, _)) { - // it is not a child item so check inherent impl items - tcx.inherent_impls(def_id) - .iter() - .find_map(|&impl_def_id| item_child_by_name(tcx, impl_def_id, segment)) - .unwrap_or(Err(ResolutionError::CouldNotFindChild { + let mut found = starts + .map(|first| { + path.iter() + // for each segment, find the child item + .try_fold(Res::Def(tcx.def_kind(first), first), |res, segment| { + no_generics_supported(segment); + let segment = segment.ident.name; + let def_id = res.def_id(); + if let Some(item) = item_child_by_name(tcx, def_id, segment) { + item + } else if matches!(res, Res::Def(DefKind::Enum | DefKind::Struct, _)) { + // it is not a child item so check inherent impl items + tcx.inherent_impls(def_id) + .iter() + .find_map(|&impl_def_id| item_child_by_name(tcx, impl_def_id, segment)) + .unwrap_or(Err(ResolutionError::CouldNotFindChild { + item: def_id, + segment, + search_space: SearchSpace::InherentImpl, + })) + } else { + Err(ResolutionError::CouldNotFindChild { item: def_id, segment, - search_space: SearchSpace::InherentImpl, - })) - } else { - Err(ResolutionError::CouldNotFindChild { - item: def_id, - segment, - search_space: SearchSpace::Mod, - }) - } - }); + search_space: SearchSpace::Mod, + }) + } + }) + }) + .collect::>(); - if last.is_ok() { - return last; - } + if found.is_empty() { + empty_err + } else if found.iter().all(|f| f.is_err()) { + found.pop().unwrap().map(|_| vec![]) + } else { + Ok(found + .into_iter() + .filter_map(|f| f.ok()) + .inspect(|res| { + trace!(" Resolved to {res:?}"); + if let Res::Def(_, did) = res { + trace!(" {} at {:?}", tcx.def_path_str(*did), tcx.def_span(*did)); + } + }) + .collect()) } - last } diff --git a/crates/paralegal-flow/tests/async_tests.rs b/crates/paralegal-flow/tests/async_tests.rs index 3565e90cc8..9e22839306 100644 --- a/crates/paralegal-flow/tests/async_tests.rs +++ b/crates/paralegal-flow/tests/async_tests.rs @@ -1035,3 +1035,66 @@ fn async_args_and_local_variable() { assert!(!no_source.flows_to_any(&sinks)); }) } + +#[test] +fn pattern_arguments() { + inline_test! { + struct Params { + one: usize, + two: usize, + three: usize, + four: usize, + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn marked(result: usize) { + assert_eq!(result, 42); + } + + #[paralegal_flow::marker(source, arguments = [0])] + async fn main(Params { one, two, three, four } : Params) { + let result = one + three + four; + marked(result); + } + } + .check_ctrl(|ctrl| { + let target = ctrl.marked(Identifier::new_intern("target")); + let source = ctrl.marked(Identifier::new_intern("source")); + assert!(!target.is_empty()); + assert!(!source.is_empty()); + assert!(source.flows_to_data(&target)); + }); +} + +#[test] +fn pattern_arguments_poor_mans_tool_def() { + inline_test! { + struct Params { + one: usize, + two: usize, + three: usize, + four: usize, + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn marked(result: usize) { + assert_eq!(result, 42); + } + + #[paralegal_flow::marker(source, arguments = [0])] + fn main(Params { one, two, three, four } : Params) + -> std::pin::Pin>> { + std::boxed::Box::pin(async move { + let result = one + three + four; + marked(result); + }) as std::pin::Pin>> + } + } + .check_ctrl(|ctrl| { + let target = ctrl.marked(Identifier::new_intern("target")); + let source = ctrl.marked(Identifier::new_intern("source")); + assert!(!target.is_empty()); + assert!(!source.is_empty()); + assert!(source.flows_to_data(&target)); + }); +} diff --git a/crates/paralegal-flow/tests/fixtures/dont-inline-on-std-marker.toml b/crates/paralegal-flow/tests/fixtures/dont-inline-on-std-marker.toml new file mode 100644 index 0000000000..dc8947beff --- /dev/null +++ b/crates/paralegal-flow/tests/fixtures/dont-inline-on-std-marker.toml @@ -0,0 +1,7 @@ +[["crate::hidden1"]] +marker = "std:test" +on_return = true + +[["crate::hidden2"]] +marker = "safe-libs:test" +on_argument = [0] diff --git a/crates/paralegal-flow/tests/fixtures/lifetime-resolving-markers.toml b/crates/paralegal-flow/tests/fixtures/lifetime-resolving-markers.toml new file mode 100644 index 0000000000..44d42dd59d --- /dev/null +++ b/crates/paralegal-flow/tests/fixtures/lifetime-resolving-markers.toml @@ -0,0 +1,3 @@ +[["::test_method"]] +marker = "present" +on_argument = [0] diff --git a/crates/paralegal-flow/tests/marker_tests.rs b/crates/paralegal-flow/tests/marker_tests.rs index f7705b0015..33c3d2df0e 100644 --- a/crates/paralegal-flow/tests/marker_tests.rs +++ b/crates/paralegal-flow/tests/marker_tests.rs @@ -5,13 +5,20 @@ extern crate lazy_static; use paralegal_flow::{define_flow_test_template, inline_test, test_utils::*}; use paralegal_spdg::{Identifier, InstructionKind}; +use std::path::Path; const TEST_CRATE_NAME: &str = "tests/marker-tests"; const EXTRA_ARGS: &[&str] = &["--no-interprocedural-analysis"]; +const CRATE_MARKER_CRATE_PATH: &str = "tests/test-crates-for-crate-marker/consumer"; + lazy_static! { static ref TEST_CRATE_ANALYZED: bool = run_paralegal_flow_with_flow_graph_dump_and(TEST_CRATE_NAME, EXTRA_ARGS); + static ref CRATE_MARKER_CRATE_ANALYZED: bool = run_paralegal_flow_with_flow_graph_dump_and( + CRATE_MARKER_CRATE_PATH, + &["--external-annotations", "external-annotations.toml"] + ); } macro_rules! define_test { @@ -20,6 +27,32 @@ macro_rules! define_test { }; } +macro_rules! crate_marker_test { + ($($t:tt)*) => { + define_flow_test_template!(CRATE_MARKER_CRATE_ANALYZED, CRATE_MARKER_CRATE_PATH, $($t)*); + }; +} + +crate_marker_test!(crate_marker : ctrl -> { + assert!(!ctrl.marked("found").is_empty()); +}); + +crate_marker_test!(serde_json: ctrl -> { + assert!(!ctrl.marked("serde").is_empty()); + ctrl.assert_purity(true); +}); + +crate_marker_test!(memchr: ctrl -> { + assert!(!ctrl.marked("memchr").is_empty()); + ctrl.assert_purity(true); +}); + +crate_marker_test!(marker_overlap: ctrl -> { + assert!(!ctrl.marked("found").is_empty()); + assert!(!ctrl.marked("submod-conflict").is_empty()); + assert!(!ctrl.marked("direct-conflict").is_empty()); +}); + #[test] fn use_wrapper() { InlineTestBuilder::new(stringify! { @@ -365,3 +398,69 @@ fn async_fn_marker() { assert!(source.flows_to_data(&sink)); }); } + +#[test] +fn lifetime_resolving() { + inline_test! { + use std::marker::PhantomData; + + trait Test { + fn test_method(&self); + } + + struct Ref<'a> { + _marker: PhantomData<&'a ()>, + } + + impl Test for Ref<'_> { + fn test_method(&self) { + todo!() + } + } + + fn main() { + Ref { _marker: PhantomData }.test_method(); + } + } + .with_marker_file(Path::new("tests/fixtures/lifetime-resolving-markers.toml")) + .check_ctrl(|ctrl| assert!(dbg!(ctrl.markers()).contains(&Identifier::new_intern("present")))); +} + +#[test] +fn dont_inline_on_std_marker() { + inline_test! { + #[paralegal_flow::marker(target1)] + fn canary1 () {} + + #[paralegal_flow::marker(target2)] + fn canary2 (arg: usize) {} + + fn hidden1() { + canary1() + } + + fn hidden2(arg: usize) { + canary2(arg) + } + + fn intermediate1() { + hidden1() + } + + fn intermediate2(arg: usize) { + hidden2(arg) + } + + fn main() { + intermediate1(); + intermediate2(0); + } + }.with_marker_file(Path::new("tests/fixtures/dont-inline-on-std-marker.toml")) + .with_extra_args(["--elide-on-whitelist-markers"]) + .check_ctrl(|ctrl| { + assert!(ctrl.marked("std:test").is_empty()); + assert!(ctrl.marked("safe-libs:test").is_empty()); + assert!(ctrl.marked("target1").is_empty()); + assert!(ctrl.marked("target2").is_empty()); + }); +} \ No newline at end of file diff --git a/crates/paralegal-flow/tests/purity/misc.rs b/crates/paralegal-flow/tests/purity/misc.rs index 774e899508..21cea986ce 100644 --- a/crates/paralegal-flow/tests/purity/misc.rs +++ b/crates/paralegal-flow/tests/purity/misc.rs @@ -515,3 +515,17 @@ fn structs() { ctrl.assert_purity(true); }); } + +#[test] +fn array_iter_marker() { + inline_test! { + fn main() { + [1, 2, 3].into_iter(); + } + } + .with_dependency_environment(stdlib_environment()) + .check_ctrl(|ctrl| { + ctrl.assert_purity(true); + assert!(ctrl.has_function("into_iter")); + }); +} diff --git a/crates/paralegal-flow/tests/purity/mod.rs b/crates/paralegal-flow/tests/purity/mod.rs index 913f254511..64b3173e2d 100644 --- a/crates/paralegal-flow/tests/purity/mod.rs +++ b/crates/paralegal-flow/tests/purity/mod.rs @@ -9,4 +9,5 @@ mod leaky; mod misc; mod raw_ptr; mod recursive; +mod side_effects; mod r#static; diff --git a/crates/paralegal-flow/tests/purity/side_effects.rs b/crates/paralegal-flow/tests/purity/side_effects.rs new file mode 100644 index 0000000000..83b2e078ca --- /dev/null +++ b/crates/paralegal-flow/tests/purity/side_effects.rs @@ -0,0 +1,35 @@ +use paralegal_flow::define_flow_test_template; +use paralegal_flow::test_utils::*; + +const TEST_CRATE_NAME: &str = "tests/purity/test-crate-side-effect"; +const EXTRA_ARGS: &[&str] = &[ + "--side-effect-markers", + "--external-annotations", + "../stdlib-markers.toml", +]; + +lazy_static! { + static ref TEST_CRATE_ANALYZED: bool = + run_paralegal_flow_with_flow_graph_dump_and(TEST_CRATE_NAME, EXTRA_ARGS); +} + +macro_rules! define_test { + ($($t:tt)*) => { + define_flow_test_template!(TEST_CRATE_ANALYZED, TEST_CRATE_NAME, $($t)*); + }; +} + +define_test!(fs: ctrl -> { + ctrl.show_side_effects(true); + + assert!(!ctrl.marked("side-effect:fs:write").is_empty()); +}); + +define_test!(path_eq: ctrl -> { + ctrl.show_side_effects(true); + ctrl.assert_purity(true); +}); + +define_test!(os_str_from_bytes: ctrl -> { + ctrl.assert_purity(true); +}); diff --git a/crates/paralegal-flow/tests/purity/stdlib-markers.toml b/crates/paralegal-flow/tests/purity/stdlib-markers.toml index e2503d1796..fca4d4abd6 100644 --- a/crates/paralegal-flow/tests/purity/stdlib-markers.toml +++ b/crates/paralegal-flow/tests/purity/stdlib-markers.toml @@ -1,29 +1,60 @@ # Trusted modules +[["alloc::boxed"]] +marker = "std:boxed" +_internal_can_fail_resolve_silently = true +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::ptr::non_null"]] +marker = "std:ptr" +_internal_can_fail_resolve_silently = true +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::ptr::metadata"]] +marker = "std:ptr" +_internal_can_fail_resolve_silently = true +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + [["alloc::vec"]] marker = "std:vec" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true [["core::slice"]] marker = "std:slice" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true [["alloc::string"]] marker = "std:string" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true -[["std::collection::hash_map"]] +[["std::collections::hash_map"]] marker = "std:hashmap" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true -[["std::collection::btree"]] +[["std::collections::btree"]] marker = "std:btree" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true # Impls of global allocator. # "alloc::alloc::\{impl#0\}", @@ -32,6 +63,14 @@ _internal_on_all_module_children = true marker = "std:allocation" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["alloc::alloc::exchange_malloc"]] +marker = "std:allocation" +_internal_can_fail_resolve_silently = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true [["std::boxed::Box::into_raw_with_allocator"]] marker = "std:box" @@ -225,6 +264,32 @@ on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] on_return = true _internal_can_fail_resolve_silently = true +[["core::hint"]] +marker = "std:hint" +_internal_can_fail_resolve_silently = true +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::cmp"]] +marker = "std:cmp" +_internal_can_fail_resolve_silently = true +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::iter"]] +marker = "std:iter" +_internal_can_fail_resolve_silently = true +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["std::ffi::OsStr::from_encoded_bytes_unchecked"]] +marker = "std:ffi" +_internal_can_fail_resolve_silently = true +on_return = true + # markers for microtesting @@ -240,3 +305,131 @@ marker = "ignore" on_return = true _internal_on_all_module_children = true _internal_can_fail_resolve_silently = true + + +[["::fold"]] +marker = "std:slice" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true +_internal_can_fail_resolve_silently = true + +[["str::as_bytes"]] +marker = "std:string" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::next"]] +marker = "std:slice" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true +_internal_can_fail_resolve_silently = true + +[["::deref_mut"]] +marker = "side-effect:external-state" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["std::io::error"]] +marker = "std:error" +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::future::get_context"]] +marker = "std:future" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +# Safe, because (safe) slice API observes lifetimes. Might have to exclude +# unsafe functions at some point though. +[["core::slice"]] +marker = "std:slice" +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::get"]] +marker = "std:slice" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::get_mut"]] +marker = "std:slice" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::index"]] +marker = "std:slice" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::index_mut"]] +marker = "std:slice" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["std::fs::OpenOptions::open"]] +marker = "std:fs" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["<[_] as std::ops::Index>::index"]] +marker = "std:slice" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["std::path"]] +marker = "std:path" +_internal_can_fail_resolve_silently = true +_internal_on_all_module_children = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["std::path::Path::new"]] +marker = "std:fs" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::fmt::rt::Argument::new"]] +marker = "std:fmt" +_internal_can_fail_resolve_silently = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::fmt::rt::Argument::new_display"]] +marker = "std:fmt" +_internal_can_fail_resolve_silently = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["core::fmt::rt::Argument::new_debug"]] +marker = "std:fmt" +_internal_can_fail_resolve_silently = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["<&std::fs::File as std::io::Write>::write"]] +marker = "side-effect:fs:write" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::from_bytes"]] +marker = "std:str:OsStrExt-OsStr" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::as_bytes"]] +marker = "std:str:OsStrExt-OsStr" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["::eq"]] +marker = "std:path:eq" +_internal_can_fail_resolve_silently = true +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true + +[["<[_;_] as core::iter::IntoIterator>::into_iter"]] +marker = "std:array_into_iter" +on_argument = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +on_return = true diff --git a/crates/paralegal-flow/tests/purity/test-crate-misc/src/lib.rs b/crates/paralegal-flow/tests/purity/test-crate-misc/src/lib.rs index 40cf8786bc..d0f9015a1b 100644 --- a/crates/paralegal-flow/tests/purity/test-crate-misc/src/lib.rs +++ b/crates/paralegal-flow/tests/purity/test-crate-misc/src/lib.rs @@ -304,3 +304,8 @@ fn structs(a: usize) { foo.a = 30; foo.b = "hello2"; } + +#[paralegal::analyze] +fn array_iter_marker() { + [1,2,3].into_iter(); +} \ No newline at end of file diff --git a/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.lock b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.lock new file mode 100644 index 0000000000..bd87f2950f --- /dev/null +++ b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.lock @@ -0,0 +1,49 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "paralegal" +version = "0.1.0" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "purity-tests-side-effect" +version = "0.1.0" +dependencies = [ + "paralegal", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "unicode-ident" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" diff --git a/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.toml b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.toml new file mode 100644 index 0000000000..be6b4ab46a --- /dev/null +++ b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "purity-tests-side-effect" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +paralegal = { path = "../../../../paralegal" } diff --git a/crates/paralegal-flow/tests/purity/test-crate-side-effect/src/lib.rs b/crates/paralegal-flow/tests/purity/test-crate-side-effect/src/lib.rs new file mode 100644 index 0000000000..0da8907e5e --- /dev/null +++ b/crates/paralegal-flow/tests/purity/test-crate-side-effect/src/lib.rs @@ -0,0 +1,15 @@ +#[paralegal::analyze] +fn fs() { + std::fs::write("test", "Content").unwrap(); +} + +#[paralegal::analyze] +fn path_eq() { + std::path::Path::new("test") == std::path::Path::new("test"); +} + +#[paralegal::analyze] +fn os_str_from_bytes() { + use std::os::unix::prelude::OsStrExt; + std::ffi::OsStr::from_bytes(b"test"); +} diff --git a/crates/paralegal-flow/tests/sameness.rs b/crates/paralegal-flow/tests/sameness.rs new file mode 100644 index 0000000000..44e42ea8dd --- /dev/null +++ b/crates/paralegal-flow/tests/sameness.rs @@ -0,0 +1,309 @@ +#![feature(rustc_private)] + +use paralegal_flow::{inline_test, test_utils::*}; + +#[test] +fn simple() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn main() { + target(source()); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn from_async_arg() { + inline_test! { + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + #[paralegal_flow::marker(start, arguments = [0])] + async fn main(i: usize) { + target(i); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn no() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn main() { + target(source()+1); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_argument() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn wrapper(i: usize) { + target(i) + } + + fn main() { + wrapper(source()); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_return() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn wrapper(i: usize) { + target(i) + } + + fn main() { + wrapper(source()); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_opaque_call() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + + fn main() { + target(source().wrapping_add(3)); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_opaque_call_2() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + #[paralegal_flow::marker(noinline, return)] + fn in_between(i: usize) -> usize { i } + + fn main() { + target(in_between(source())); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn in_aggregate() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: T) {} + + fn main() { + let tup = (source(), 1, 2); + target(tup); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn not_from_projection() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> (usize, usize) { (0,0) } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: T) {} + + fn main() { + target(source().0); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn test_mutable_references() { + // This doesn't actually test any functionality but is an experiment to see + // whether the PDG accurately shows an absence of a flow if a coerced + // reference is used as though it has local lifetime. + // + // When a simple stack variable like this is used the PDG correctly shows + // the absence of a flow. However using a `Vec` instead (see next test case) + // already introduces the flow. + // + // Note that we aren't analyzing the stdlib here, but even if we did, we add + // exception markers to vectors, so the outcome is likely the same. + + inline_test! { + use std::cell::RefCell; + + #[paralegal_flow::marker(source, return)] + fn mark(t: T) -> T{ + t + } + + struct S; + + fn with(mut f: impl for<'a> FnMut(&'a mut S)) { + let r = unsafe { std::mem::transmute(&mut SC) }; + f(r) + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn on_s(s: &mut S) {} + + const SC: S = S; + + fn main() { + let mutex : RefCell<&'static mut S> = RefCell::new(unsafe { std::mem::transmute(&mut SC) }); + with(|s| { + let s = mark(s); + let mut guard = mutex.borrow_mut(); + let mut my_s = s; + my_s = *guard; + on_s(my_s); + }) + } + }.check_ctrl(|ctrl| { + let sources = ctrl.marked("source"); + let targets = ctrl.marked("target"); + assert!(!sources.is_empty()); + assert!(!targets.is_empty()); + assert!(!sources.flows_to_data(&targets)); + }); +} + +#[test] +fn test_mutable_references_2() { + // See comments on `test_mutable_references` + + inline_test! { + use std::cell::RefCell; + + #[paralegal_flow::marker(source, return)] + fn mark(t: T) -> T{ + t + } + + struct S; + + fn with(mut f: impl for<'a> FnMut(&'a mut S)) { + let r = unsafe { std::mem::transmute(&mut SC) }; + f(r) + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn on_s(s: &mut S) {} + + const SC: S = S; + + fn main() { + let mutex : RefCell<&'static mut S> = RefCell::new(unsafe { std::mem::transmute(&mut SC) }); + with(|s| { + let s = mark(s); + let mut v = vec![s]; + let mut guard = mutex.borrow_mut(); + v.pop(); + v.push(*guard); + on_s(v.pop().unwrap()); + }) + } + }.check_ctrl(|ctrl| { + let sources = ctrl.marked("source"); + let targets = ctrl.marked("target"); + assert!(!sources.is_empty()); + assert!(!targets.is_empty()); + assert!(sources.flows_to_data(&targets)); + }); +} diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.lock b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.lock new file mode 100644 index 0000000000..92ece0a444 --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.lock @@ -0,0 +1,130 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "consumer" +version = "0.1.0" +dependencies = [ + "memchr", + "paralegal", + "producer", + "serde_json", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "paralegal" +version = "0.1.0" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "producer" +version = "0.1.0" +dependencies = [ + "paralegal", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "syn" +version = "2.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.toml b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.toml new file mode 100644 index 0000000000..799a50a66e --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "consumer" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +paralegal = { path = "../../../../paralegal" } +producer = { path = "../producer" } +serde_json = "1.0" +memchr = "2" diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/external-annotations.toml b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/external-annotations.toml new file mode 100644 index 0000000000..05ef2c7920 --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/external-annotations.toml @@ -0,0 +1,34 @@ +[["producer"]] +marker = "found" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["serde_core"]] +marker = "serde" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["memchr"]] +marker = "memchr" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +# [["memchr::memrchr"]] +# marker = "memchr" +# on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +# on_return = true + +[["producer::a_mod"]] +marker = "submod-conflict" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["producer::a_mod"]] +marker = "direct-conflict" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/src/lib.rs b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/src/lib.rs new file mode 100644 index 0000000000..31f588859d --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/src/lib.rs @@ -0,0 +1,19 @@ +#[paralegal::analyze] +fn crate_marker() { + producer::target(); +} + +#[paralegal::analyze] +fn serde_json() { + serde_json::to_string(&34_i32).unwrap(); +} + +#[paralegal::analyze] +fn memchr() { + memchr::memrchr(b'a', b"hello"); +} + +#[paralegal::analyze] +fn marker_overlap() { + producer::a_mod::target() +} diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/Cargo.toml b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/Cargo.toml new file mode 100644 index 0000000000..4b5c1458df --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "producer" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +paralegal = { path = "../../../../paralegal" } diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/src/lib.rs b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/src/lib.rs new file mode 100644 index 0000000000..8b15dab228 --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/src/lib.rs @@ -0,0 +1,5 @@ +pub fn target() {} + +pub mod a_mod { + pub fn target() {} +} diff --git a/crates/paralegal-policy/src/algo/flows_to.rs b/crates/paralegal-policy/src/algo/flows_to.rs index bd102de65e..2153cd424b 100644 --- a/crates/paralegal-policy/src/algo/flows_to.rs +++ b/crates/paralegal-policy/src/algo/flows_to.rs @@ -138,8 +138,8 @@ fn test_ctrl_flows_to() { let src_c = ctx.controller_argument(controller, 2).unwrap(); let cs1 = crate::test_utils::get_callsite_node(&ctx, controller, "sink1"); let cs2 = crate::test_utils::get_callsite_node(&ctx, controller, "sink2"); - let switch_int_after_src_a = ctx.nth_successors(2, src_a); - let switch_int_after_src_c = ctx.nth_successors(2, src_c); + let switch_int_after_src_a = ctx.nth_successors(2, &src_a); + let switch_int_after_src_c = ctx.nth_successors(2, &src_c); assert!(switch_int_after_src_a.flows_to(&cs1, &ctx, EdgeSelection::Control)); assert!(switch_int_after_src_c.flows_to(&cs2, &ctx, EdgeSelection::Control)); assert!(switch_int_after_src_a.flows_to(&cs2, &ctx, EdgeSelection::Control)); diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 734b522d59..a19e33013c 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -8,7 +8,8 @@ use std::{io::Write, process::exit, sync::Arc}; use fixedbitset::FixedBitSet; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; use paralegal_spdg::traverse::{ - generic_flows_to, generic_influencees, generic_influencers, EdgeSelection, + edge_generic_flows_to, generic_flows_to, generic_influencees, generic_influencers, + EdgeSelection, }; use paralegal_spdg::{ CallString, DefKind, DisplayNode, Endpoint, FunctionHandling, GlobalNode, HashMap, HashSet, @@ -281,7 +282,7 @@ impl RootContext { pub fn report_marker_if_absent(&self, marker: Marker) { assert_warning!( *self, - self.marker_to_ids.contains_key(&marker), + self.marker_is_defined(marker), format!("Marker {marker} is mentioned in the policy but not defined in source") ) } @@ -345,11 +346,20 @@ impl RootContext { /// /// If the controller with this id does not exist *or* the controller has /// fewer than `index` arguments. - pub fn controller_argument(&self, ctrl_id: Endpoint, index: u32) -> Option { + pub fn controller_argument(&self, ctrl_id: Endpoint, index: u32) -> Option { let ctrl = self.desc.controllers.get(&ctrl_id)?; - let inner = *ctrl.arguments.get(index as usize)?; + let inner = NodeCluster::new( + ctrl_id, + ctrl.arguments.iter().copied().filter(|n| { + ctrl.graph + .node_weight(*n) + .unwrap() + .is_arg + .is_some_and(|a| a == index as u16) + }), + ); - Some(GlobalNode::from_local_node(ctrl_id, inner)) + (!inner.nodes().is_empty()).then_some(inner) } /// Returns whether there is direct control flow influence from influencer to sink, or there is some node which is data-flow influenced by `influencer` and has direct control flow influence on `target`. Or as expressed in code: @@ -638,12 +648,17 @@ pub trait Context { fn root(&self) -> &RootContext; /// Returns an iterator over all objects marked with `marker`. - fn marked_nodes(&self, marker: Marker) -> impl Iterator + '_; + fn marked_nodes(&self, marker: impl Into) -> impl Iterator + '_; /// All nodes with this marker, be that via type or directly fn nodes_marked_any_way(&self, marker: Marker) -> impl Iterator + '_; /// All nodes that have this marker through a type fn nodes_marked_via_type(&self, marker: Marker) -> impl Iterator + '_; + + /// Is this marker assigned to anything in the PDG? + fn marker_is_defined(&self, m: impl Into) -> bool { + self.root().marker_to_ids.contains_key(&m.into()) + } } impl Context for RootContext { @@ -667,7 +682,8 @@ impl Context for RootContext { }) } - fn marked_nodes(&self, marker: Marker) -> impl Iterator + '_ { + fn marked_nodes(&self, marker: impl Into) -> impl Iterator + '_ { + let marker = marker.into(); self.report_marker_if_absent(marker); self.marker_to_ids .get(&marker) @@ -729,7 +745,7 @@ where }); } } - generic_flows_to( + edge_generic_flows_to( self.iter_nodes(), edge_type, &ctx.desc.controllers[&cf_id], @@ -737,6 +753,7 @@ where ) .is_some() } + /// An optimized version of the following pattern that performs only one /// graph traversal instead of `sink.len()` traversals: /// @@ -808,7 +825,7 @@ where // }); // } // } - generic_flows_to( + edge_generic_flows_to( self.iter_nodes(), edge_type, &ctx.desc.controllers[&cf_id], @@ -1022,6 +1039,9 @@ pub trait NodeExt: private::Sealed { ctx: &RootContext, edge_selection: EdgeSelection, ) -> Option>; + + #[doc(hidden)] + fn flows_to_unchanged(self, other: GlobalNode, ctx: &RootContext) -> bool; } impl NodeExt for GlobalNode { @@ -1128,6 +1148,22 @@ impl NodeExt for GlobalNode { .collect() }) } + + fn flows_to_unchanged(self, other: GlobalNode, ctx: &RootContext) -> bool { + let cf_id = self.controller_id(); + if other.controller_id() != cf_id { + return false; + } + + let spdg = &ctx.desc.controllers[&cf_id]; + let graph = petgraph::visit::NodeFiltered::from_fn(&spdg.graph, |n| { + n == self.local_node() + || n == other.local_node() + || spdg.graph.node_weight(n).unwrap().same + }); + + generic_flows_to(self.iter_nodes(), &graph, other.iter_nodes()).is_some() + } } impl Sealed for &'_ T {} @@ -1181,6 +1217,10 @@ impl NodeExt for &'_ T { fn associated_call_site(self, ctx: &RootContext) -> CallString { (*self).associated_call_site(ctx) } + + fn flows_to_unchanged(self, other: GlobalNode, ctx: &RootContext) -> bool { + (*self).flows_to_unchanged(other, ctx) + } } /// Provide display trait for DefId in a Context. @@ -1270,11 +1310,11 @@ fn test_influencees() -> Result<()> { let sink_callsite = crate::test_utils::get_callsite_node(&ctx, ctrl_name, "sink1"); let influencees_data_control = ctx - .influencees(dbg!(src_a), EdgeSelection::Both) + .influencees(dbg!(&src_a), EdgeSelection::Both) .unique() .collect::>(); let influencees_data = ctx - .influencees(src_a, EdgeSelection::Data) + .influencees(&src_a, EdgeSelection::Data) .unique() .collect::>(); @@ -1330,11 +1370,11 @@ fn test_influencers() -> Result<()> { .collect::>(); assert!( - influencers_data_control.contains(&src_a), + influencers_data_control.contains(&src_a.try_as_single().unwrap()), "input argument a influences sink via data" ); assert!( - influencers_data_control.contains(&dbg!(src_b)), + influencers_data_control.contains(&dbg!(src_b.try_as_single().unwrap())), "input argument b influences sink via control" ); assert!( @@ -1346,11 +1386,11 @@ fn test_influencers() -> Result<()> { ); assert!( - !influencers_data.contains(&src_a), + !influencers_data.contains(&src_a.try_as_single().unwrap()), "input argument a doesnt influences sink via data" ); assert!( - influencers_data.contains(&src_b), + influencers_data.contains(&src_b.try_as_single().unwrap()), "input argument b influences sink via data" ); assert!( @@ -1379,7 +1419,7 @@ fn test_has_ctrl_influence() -> Result<()> { let sink_cs = crate::test_utils::get_callsite_or_datasink_node(&ctx, ctrl_name, "sink1"); assert!( - ctx.has_ctrl_influence(src_a, &sink_cs), + ctx.has_ctrl_influence(&src_a, &sink_cs), "src_a influences sink" ); assert!( @@ -1391,7 +1431,7 @@ fn test_has_ctrl_influence() -> Result<()> { "validate_foo influences sink" ); assert!( - !ctx.has_ctrl_influence(src_b, &sink_cs), + !ctx.has_ctrl_influence(&src_b, &sink_cs), "src_b doesnt influence sink" ); assert!( diff --git a/crates/paralegal-policy/src/diagnostics.rs b/crates/paralegal-policy/src/diagnostics.rs index 79269e87f1..1d136ef88f 100644 --- a/crates/paralegal-policy/src/diagnostics.rs +++ b/crates/paralegal-policy/src/diagnostics.rs @@ -96,7 +96,9 @@ use indexmap::IndexMap; use std::rc::Rc; use std::{io::Write, sync::Arc}; -use paralegal_spdg::{DisplayPath, Endpoint, GlobalNode, Identifier, Span, SpanCoord, SPDG}; +use paralegal_spdg::{ + DisplayPath, Endpoint, GlobalNode, Identifier, NodeCluster, Span, SpanCoord, SPDG, +}; use crate::{Context, NodeExt, RootContext}; @@ -106,16 +108,11 @@ macro_rules! assert_error { ($ctx:expr, $cond: expr $(,)?) => { assert_error!($ctx, $cond, "Error: {}", stringify!($cond)) }; - ($ctx:expr, $cond: expr, $msg:expr $(,)?) => { - if !$cond { - use $crate::diagnostics::Diagnostics; - Diagnostics::error(&$ctx, $msg); - } - }; - ($ctx:expr, $cond: expr, $msg:expr, $($frag:expr),+ $(,)?) => { + + ($ctx:expr, $cond: expr, $msg:expr $(, $($frag:expr),+ )? $(,)?) => { if !$cond { use $crate::diagnostics::Diagnostics; - Diagnostics::error(&$ctx, format!($msg, $($frag),+)); + Diagnostics::error(&$ctx, format!($msg, $( $($frag),+ )? )); } }; } @@ -892,6 +889,27 @@ impl ControllerContext { }) .map(Arc::new) } + + /// Get the nodes representing the `idx`'th argument to this controller (if + /// the argument exists). + pub fn argument(&self, idx: u32) -> Option { + self.controller_argument(self.id, idx) + } + + /// Get all arguments for this controller grouped by argument index + pub fn arguments_by_index(&self) -> Vec { + let mut args = vec![]; + let g = &self.current().graph; + for n in self.current().arguments.iter() { + let w = g.node_weight(*n).unwrap(); + let Some(a) = w.is_arg else { continue }; + if (a as usize) >= args.len() { + args.resize_with((a as usize) + 1, || NodeCluster::new(self.id(), [])); + } + args[a as usize].push_node(*n); + } + args + } } impl HasDiagnosticsBase for ControllerContext { @@ -914,7 +932,10 @@ impl Context for ControllerContext { self.as_ctx() } - fn marked_nodes(&self, marker: crate::Marker) -> impl Iterator + '_ { + fn marked_nodes( + &self, + marker: impl Into, + ) -> impl Iterator + '_ { self.root() .marked_nodes(marker) .filter(|n| n.controller_id() == self.id) diff --git a/crates/paralegal-policy/src/lib.rs b/crates/paralegal-policy/src/lib.rs index c4ad77d6a2..e46fe53005 100644 --- a/crates/paralegal-policy/src/lib.rs +++ b/crates/paralegal-policy/src/lib.rs @@ -164,6 +164,13 @@ impl SPDGGenCommand { self } + /// Register a Rust function as entrypoint for the analysis. Equivalent to + /// the `#[analyze]` annotation. + pub fn analysis_target(&mut self, target: impl AsRef) -> &mut Self { + self.0.args(["--analyze", target.as_ref()]); + self + } + /// Abort compilation once the analysis artifacts have been created. Also /// sets the expectation for the compilation to succeed to `false`. pub fn abort_after_analysis(&mut self) -> &mut Self { diff --git a/crates/paralegal-spdg/build.rs b/crates/paralegal-spdg/build.rs index bfc6457b32..0bea236f57 100644 --- a/crates/paralegal-spdg/build.rs +++ b/crates/paralegal-spdg/build.rs @@ -7,7 +7,17 @@ use std::hash::{Hash, Hasher}; use std::path::Path; use std::time::SystemTime; -/// Helper for calculating a hash of all (modification dates of) Rust files in this crate. + +fn hash_via_content(path: &Path, hasher: &mut DefaultHasher) -> std::io::Result<()> { + use std::io::Read; + let mut file = std::fs::File::open(path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + buffer.hash(hasher); + Ok(()) +} + +/// Helper for calculating a hash of all Rust files in this crate. fn visit_dirs(dir: &Path, hasher: &mut DefaultHasher) -> Result<()> { if !dir.is_dir() { return Ok(()); @@ -18,11 +28,7 @@ fn visit_dirs(dir: &Path, hasher: &mut DefaultHasher) -> Result<()> { if path.is_dir() { visit_dirs(&path, hasher)?; } else if path.extension().is_some_and(|ext| ext == "rs") { - let metadata = entry.metadata()?; - let modified = metadata.modified()?; - - let duration = modified.duration_since(SystemTime::UNIX_EPOCH)?; - duration.as_secs().hash(hasher); + hash_via_content(&path, hasher)?; // Tell Cargo to rerun if this source file changes println!("cargo:rerun-if-changed={}", path.display()); } diff --git a/crates/paralegal-spdg/src/dot.rs b/crates/paralegal-spdg/src/dot.rs index 124f61be8b..fdbcca8812 100644 --- a/crates/paralegal-spdg/src/dot.rs +++ b/crates/paralegal-spdg/src/dot.rs @@ -153,11 +153,7 @@ impl<'a> dot::Labeller<'a, CallString, GlobalEdge> for DotPrintableProgramDescri let mut all_markers = markers.chain(type_markers).copied().peekable(); let write_id_and_desc = |s: &mut String| { let idx = n.index(); - let desc = weight - .kind - .to_string() - .replace('<', "<") - .replace('>', ">"); + let desc = escape_label_str(&weight.kind.to_string()); write!(s, " ({idx}) {desc}") }; s.push('|'); @@ -205,6 +201,21 @@ impl<'a> dot::Labeller<'a, CallString, GlobalEdge> for DotPrintableProgramDescri } } +fn escape_label_str(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '<' => out.push_str("<"), + '>' => out.push_str(">"), + '{' => out.push_str("{"), + '}' => out.push_str("}"), + '&' => out.push_str("&"), + _ => out.push(c), + } + } + out +} + /// Dump all SPDGs in a single dot expression pub fn dump(spdg: &ProgramDescription, out: W) -> std::io::Result<()> { dump_for_selection(spdg, out, |_| true) diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 7aaf38d47f..ff36002dc2 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -300,6 +300,18 @@ impl Span { } } +impl std::fmt::Display for Span { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "{}:{}:{}", + self.source_file.abs_file_path.display(), + self.start.line, + self.start.col, + ) + } +} + /// Metadata on a function call. #[derive(Debug, Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Allocative)] pub struct FunctionCallInfo { @@ -759,7 +771,7 @@ pub mod node_cluster { #[derive(Debug, Hash, Clone)] pub struct NodeCluster { controller_id: Endpoint, - nodes: Box<[Node]>, + nodes: Vec, } /// Owned iterator of a [`NodeCluster`] @@ -851,9 +863,23 @@ pub mod node_cluster { controller_id: ctrl_id, nodes: std::iter::once(Some(first.local_node())) .chain(it.map(|n| (n.controller_id() == ctrl_id).then_some(n.local_node()))) - .collect::>>()?, + .collect::>()?, }) } + + /// Returns some if this cluster contains only one node. + pub fn try_as_single(&self) -> Option { + if let [node] = &*self.nodes { + Some(GlobalNode::from_local_node(self.controller_id, *node)) + } else { + None + } + } + + #[doc(hidden)] + pub fn push_node(&mut self, node: Node) { + self.nodes.push(node); + } } } @@ -887,6 +913,13 @@ pub struct NodeInfo { /// Whether this node is an argument to a function call, and if so which pub is_arg: Option, + + /// Is this node (the value behind it) the same before and after the + /// operation applied at this location? + /// + /// Specifically this considers taking references (and pointers) and + /// dereferencing them to mean the value is the same. + pub same: bool, } impl Display for NodeInfo { @@ -1185,3 +1218,41 @@ impl Display for DisplayNode<'_> { } } } + +pub struct AutoMarkers { + pub side_effect_unknown_virtual: Identifier, + pub side_effect_foreign: Identifier, + pub side_effect_unknown_fn_ptr: Identifier, + pub side_effect_raw_ptr: Identifier, + pub side_effect_transmute: Identifier, + pub side_effect_unknown: Identifier, + pub side_effect_intrinsic: Identifier, +} + +impl Default for AutoMarkers { + fn default() -> Self { + AutoMarkers { + side_effect_unknown_virtual: Identifier::new_intern("auto:side-effect:unknown:virtual"), + side_effect_foreign: Identifier::new_intern("auto:side-effect:foreign"), + side_effect_unknown_fn_ptr: Identifier::new_intern("auto:side-effect:unknown:fn-ptr"), + side_effect_raw_ptr: Identifier::new_intern("auto:side-effect:raw-ptr"), + side_effect_transmute: Identifier::new_intern("auto:side-effect:transmute"), + side_effect_unknown: Identifier::new_intern("auto:side-effect:unknown"), + side_effect_intrinsic: Identifier::new_intern("auto:side-effect:intrinsic"), + } + } +} + +impl AutoMarkers { + pub fn all(&self) -> [Identifier; 7] { + [ + self.side_effect_unknown_virtual, + self.side_effect_foreign, + self.side_effect_unknown_fn_ptr, + self.side_effect_raw_ptr, + self.side_effect_transmute, + self.side_effect_unknown, + self.side_effect_intrinsic, + ] + } +} diff --git a/crates/paralegal-spdg/src/traverse.rs b/crates/paralegal-spdg/src/traverse.rs index 0077b54e49..324b527ab5 100644 --- a/crates/paralegal-spdg/src/traverse.rs +++ b/crates/paralegal-spdg/src/traverse.rs @@ -67,19 +67,34 @@ impl EdgeSelection { /// A primitive that queries whether we can reach from one set of nodes to /// another -pub fn generic_flows_to( +pub fn edge_generic_flows_to( from: impl IntoIterator, edge_selection: EdgeSelection, spdg: &SPDG, other: impl IntoIterator, ) -> Option<(Node, Node)> { + let graph = edge_selection.filter_graph(&spdg.graph); + generic_flows_to(from, &graph, other) +} + +use petgraph::visit as pgv; + +/// A primitive that queries whether we can reach from one set of nodes to +/// another +pub fn generic_flows_to( + from: impl IntoIterator, + graph: G, + other: impl IntoIterator, +) -> Option<(Node, Node)> +where + G: pgv::Visitable + pgv::IntoNeighbors + pgv::GraphBase, +{ let targets = other.into_iter().collect::>(); let mut from = from.into_iter().peekable(); if from.peek().is_none() || targets.is_empty() { return None; } - let graph = edge_selection.filter_graph(&spdg.graph); let mut search = petgraph::visit::Dfs::from_parts(vec![], graph.visit_map()); for n in from { search.move_to(n); @@ -90,15 +105,6 @@ pub fn generic_flows_to( } } None - - // let result = petgraph::visit::depth_first_search(&graph, from, |event| match event { - // DfsEvent::Discover(d, _) if targets.contains(&d) => Control::Break(d), - // _ => Control::Continue, - // }); - // match result { - // Control::Break(r) => Some(r), - // _ => None, - // } } /// The current policy for this iterator is that it does not return the start