From 36b605d52577d34eb1a6c4c9f0af104fddf82c7b Mon Sep 17 00:00:00 2001 From: Fabian Sandkuhl Date: Mon, 9 Mar 2026 14:46:45 +0100 Subject: [PATCH 1/6] Added Inductive Miner including as described in 'Robust Process Mining with Guarantees' from S. J. J. Leemans --- process_mining/.idea/.gitignore | 10 + process_mining/.idea/vcs.xml | 6 + .../process_tree/process_tree_struct.rs | 6 +- .../base_cases/base_cases.rs | 49 +++ .../inductive_miner_app/base_cases/mod.rs | 1 + .../cut_finder/concurrent.rs | 388 ++++++++++++++++++ .../inductive_miner_app/cut_finder/cut.rs | 66 +++ .../cut_finder/exclusive_choice.rs | 232 +++++++++++ .../cut_finder/loop_cut.rs | 292 +++++++++++++ .../inductive_miner_app/cut_finder/mod.rs | 78 ++++ .../cut_finder/sequence_cut.rs | 318 ++++++++++++++ .../fallthrough/activity_concurrent.rs | 203 +++++++++ .../fallthrough/activity_once_per_trace.rs | 329 +++++++++++++++ .../fallthrough/empty_traces.rs | 122 ++++++ .../fallthrough/fallthrough.rs | 37 ++ .../fallthrough/flower_model.rs | 80 ++++ .../inductive_miner_app/fallthrough/mod.rs | 73 ++++ .../fallthrough/strict_tau_loop.rs | 239 +++++++++++ .../fallthrough/tau_loop.rs | 131 ++++++ .../case_centric/inductive_miner_app/mod.rs | 266 ++++++++++++ .../inductive_miner_app/splits/concurrency.rs | 126 ++++++ .../splits/exclusice_choice.rs | 218 ++++++++++ .../inductive_miner_app/splits/mod.rs | 80 ++++ .../inductive_miner_app/splits/redo_loop.rs | 208 ++++++++++ .../inductive_miner_app/splits/sequence.rs | 233 +++++++++++ .../inductive_miner_app/splits/split.rs | 35 ++ .../structures/components.rs | 200 +++++++++ .../structures/minimum_self_distance.rs | 272 ++++++++++++ .../inductive_miner_app/structures/mod.rs | 3 + .../structures/parameter.rs | 32 ++ .../src/discovery/case_centric/mod.rs | 2 + 31 files changed, 4333 insertions(+), 2 deletions(-) create mode 100644 process_mining/.idea/.gitignore create mode 100644 process_mining/.idea/vcs.xml create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusice_choice.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs create mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs diff --git a/process_mining/.idea/.gitignore b/process_mining/.idea/.gitignore new file mode 100644 index 0000000..ab1f416 --- /dev/null +++ b/process_mining/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/process_mining/.idea/vcs.xml b/process_mining/.idea/vcs.xml new file mode 100644 index 0000000..6c0b863 --- /dev/null +++ b/process_mining/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs index 543f39e..934a2ff 100644 --- a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs +++ b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs @@ -14,7 +14,7 @@ pub enum LeafLabel { /// /// Node in a process tree /// -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, PartialEq)] pub enum Node { /// Operator node of a process tree Operator(Operator), @@ -69,7 +69,7 @@ impl Node { /// /// Operator type enum for [`Operator`] /// -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)] pub enum OperatorType { /// Sequence operator Sequence, @@ -176,6 +176,7 @@ impl ProcessTree { /// An operator node in a process tree /// #[derive(Debug, Serialize, Deserialize)] +#[derive(PartialEq)] pub struct Operator { /// The [`OperatorType`] of the tree itself pub operator_type: OperatorType, @@ -221,6 +222,7 @@ impl Operator { /// /// A leaf in a process tree /// +#[derive(PartialEq)] pub struct Leaf { /// The silent or non-silent activity label [`LeafLabel`] pub activity_label: LeafLabel, diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs new file mode 100644 index 0000000..67e8d02 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs @@ -0,0 +1,49 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::EventLog; + +#[derive(Debug)] +pub enum BaseCases { + None, // No base case is found + Empty, // the event log is completely empty + SingleActivity(String) // just one activity in every single trace in the event log +} + + +/// Checks whether the base case single activity applies to the given event log. +/// The BaseCase applies if the event log only contains traces with precisely one event, +/// which must have the same activity attribute. +fn check_single_activity_case(log: &EventLog, classifier: &EventLogClassifier) -> Option { + let mut activity: Option = None; + for t in &log.traces{ + if t.events.len() != 1{ // catch empty traces + return None; + } + let act = classifier.get_class_identity(&t.events[0]); + if let Some(activity) = &activity{ + if act != *activity{ + return None; + } + } else { + activity = Some(act); + } + } + activity +} + +/// Checks whether a BaseCase applies to a given event log. +/// +/// There are two possible base cases: +/// - 'empty trace' where the entire event log consists of one single empty trace, +/// - 'single activity' where the entire event log consist of traces containing only one single event with the same activity attribute. +pub fn find_base_case(log: &EventLog, event_log_classifier: &EventLogClassifier) -> BaseCases { + + if log.traces.len() == 0{ + // this just checks for an empty event log, this means, even if there are only empty traces, this case case does not apply + BaseCases::Empty + } else if let Some(activity) = check_single_activity_case(log, event_log_classifier){ + BaseCases::SingleActivity(activity) + } else { + // no base case applied to this one + BaseCases::None + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs new file mode 100644 index 0000000..b8acedb --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs @@ -0,0 +1 @@ +pub mod base_cases; \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs new file mode 100644 index 0000000..97ba690 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs @@ -0,0 +1,388 @@ +/// This implementation (ot the function parallel cut) follows the parallel cut algorithm as implemented in +/// the ProM framework (`InductiveMiner`), originally written in Java. +/// +/// Reference: +/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +/// Application of Concurrency to System Design (ACSD), 2013. +/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +/// University of Technology, 09.05.2017 +/// - ProM source code: +/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMConcurrent.java +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::structures::components::Components; +use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance; +// Following the definition of an parallel cut, every element has to be either a starting activity or an end activity. +// Also, every element has to be connected to each other element - like a mesh + +// Example +// / -> A -> B -\ +// START -->| |-> END +// \ -> B -> A -/ +// . + + + +/// Ensures that every resulting component has both start and end activities, +/// a concurrent cut only makes sense if every isolated component can be entered or left independently. +/// +/// To do this, this functions categorizes each connected component into one of four categories: +/// (start & end, start only, end only, neither start nor end). +/// Every not start & end category components is merged with an arbitrary component (here the first one) +/// containing both start & end activities. +#[allow(dead_code)] +fn ensure_start_end_in_each<'a>( + dfg: &'a DirectlyFollowsGraph<'_>, + con_components: Vec>>, +) -> Option>>> { + // create for different classes of components + + let mut start_end = Vec::new(); + let mut start = Vec::new(); + let mut end = Vec::new(); + let mut neither = Vec::new(); + + for component in con_components { + let has_start = component + .iter() + .any(|act| dfg.start_activities.contains(act.as_ref())); + let has_end = component + .iter() + .any(|act| dfg.end_activities.contains(act.as_ref())); + + match (has_start, has_end) { + (true, true) => { + // components which have both start and end activities + start_end.push(component); + } + (true, false) => { + // components which contain start and no end activity + start.push(component); + } + (false, true) => { + // components which contains no start but end activities + end.push(component); + } + (false, false) => { + // neither start nor end activities in this components + neither.push(component); + } + } + } + + // no component with start and end -> no parallel cut + if start_end.len() == 0 { + return None; + } + + // Start building final components + let mut result = start_end; + + loop { + match (start.pop(), end.pop()) { + // combine start-only and end-only components + (Some(mut start), Some(end)) => { + start.extend(end); + result.push(start); + } + + (Some(start), None) => { + // add remaining start only components to any component + (&mut result[0]).extend(start); + } + (None, Some(end)) => { + // add remaining end only components to any component + (&mut result[0]).extend(end); + } + (None, None) => { + // add components that have neither start nor end + for component in neither { + (&mut result[0]).extend(component) + } + // no components left -> break the loop + break; + } + } + } + Some(result) +} + +///Partitions activities into components, such that activities in different components can occur +/// concurrently. Two activities are in the same component if they are not bidirectionally reachable. +/// +/// Optionally, a minimum self distance constraint can further restrict concurrency, by +/// forcing activities, which are in a minimum self distance relation with other activities, +/// into the same component. +/// +/// # Parameters +fn concurrent_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: &Option) -> Option>>> { + let activities: Vec> = dfg.activities.keys().map(|act| Cow::from(act)).collect(); + if activities.is_empty() { + return None; + } + + // merge activities into components (based on which other activities are reachable) + let mut components = Components::new(&activities); + + for (i, act1) in activities.iter().enumerate() { + for (j, act2) in activities.iter().enumerate() { + // do not do that for the same activity + if i < j && !components.same_component(act1, act2) { + // merge only bidirectional activities + let has_a1_a2 = dfg.contains_df_relation((act1.clone(), act2.clone())); + let has_a2_a1 = dfg.contains_df_relation((act2.clone(), act1.clone())); + + if !has_a1_a2 || !has_a2_a1 { + components.merge_components_of(act1, act2); + } + } + } + } + + // optional minimum self distance + if let Some(mindist) = mindist { + for activity1 in activities.iter(){ + if let Some(mindist) = mindist.get_minimum_distance(activity1){ + for activity2 in &mindist.1{ + components.merge_components_of(activity1, activity2.as_str()); + } + } + } + } + + let components = components.get_components(); + if components.len() > 1 { + ensure_start_end_in_each(dfg, components) + } else { + None + } +} + + + +/// Examines whether in a given Directly Follows Graph a concurrent cut can be applied. +/// +/// Public wrapper for [`concurrent_cut`] +/// +/// # Parameters +/// - 'dfg': the directly follows Graph which shall be examined +/// - 'mindist': Optional a minimum self distance constraint can be applied, by providing a Minimum self distance struct. +/// # Returns +/// - a cut struct containing at least 2 components of concurrent activities +/// - None, otherwise (this means a concurrent cut can not be applied) +pub fn concurrent_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: Option) -> Option> { + // if there are not start or end activities, there is no cut + if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() { + return None; + } + + let result = concurrent_cut(dfg, &mindist); + if let Some(result) = result { + if result.len() <= 1 { + None + } else { + Some(Cut::new(OperatorType::Concurrency, result)) + } + } else { + None + } +} + +mod test_parallel_cut { + use std::borrow::Cow; + use std::collections::{HashMap, HashSet}; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::{concurrent_cut, concurrent_cut_wrapper}; + use crate::event_log; + + #[test] + fn test_leeman_example() { + let log = event_log!( + ["a", "b", "c"], + ["a", "c", "b"], + ["c", "a", "b"] + ); + let dfg = &DirectlyFollowsGraph::discover(&log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + let mut partitions = cut.unwrap().get_own(); + // sort to ensure order + partitions.sort_by(|x,y| x.len().cmp(&y.len())); + assert_eq!( + partitions, + Vec::from([ + HashSet::from(["c".into()]), + HashSet::from(["a".into(), "b".into()]) + ]) + ); + } + + #[test] + fn test_parallel_cut_with_trailing_activity() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!(["a", "b", "c"], ["b", "a", "c"]) + ); + let cut = concurrent_cut(&dfg, &None); + println!("CUT {:?}", cut); + } + + #[test] + fn test_easy_parallel_cut_wrapper() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!(["a", "b"], ["b", "a"]) + ); + let cut = concurrent_cut_wrapper(&dfg, None); + println!("CUT {:?}", cut); + assert!(cut.is_some()); + assert_eq!(cut.unwrap().len(), 2); + } + + #[test] + fn test_three_branch_parallel() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!( + ["a", "b"], + ["b", "c"], + ["c", "a"], + ["a", "c"], + ["b", "a"], + ["c", "b"] + ) + ); + + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + + let parts = cut.unwrap(); + assert_eq!(parts.len(), 3); + + let flattened: HashSet> = parts + .get_iter() + .flat_map(|p| p.iter().map(|s| s.clone())) + .collect(); + + assert!(flattened.contains("a")); + assert!(flattened.contains("b")); + assert!(flattened.contains("c")); + } + + #[test] + fn test_sequence_cut_in_parallel() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!(["a", "b", "c"], ["a", "d", "c"]) + ); + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_hard_parallel_cut_multiple_starts_and_endings() { + let mut dfg = DirectlyFollowsGraph::new(); + dfg.activities = HashMap::from([("a".into(), 1), ("b".into(), 2), ("c".into(), 3)]); + + dfg.start_activities = HashSet::from(["a".into()]); + dfg.end_activities = HashSet::from(["c".into(), "b".into()]); + dfg.directly_follows_relations = HashMap::from([ + (("a".into(), "b".into()), 1), + (("b".into(), "a".into()), 1), + // a <-> c + (("a".into(), "c".into()), 1), + (("c".into(), "a".into()), 1), + // c <-> b + (("b".into(), "c".into()), 1), + (("c".into(), "b".into()), 1), + ]); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + + // // set multiple starts + dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string()]); + dfg.end_activities = HashSet::from(["c".to_string()]); + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + + // overlap + dfg.end_activities = HashSet::from(["c".to_string(), "b".to_string()]); + assert!(concurrent_cut_wrapper(&dfg, None).is_some()); + + // everything is end and start activity + dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string(), "c".to_string()]); + dfg.end_activities = HashSet::from(["a".to_string(), "b".to_string(), "c".to_string()]); + assert!(concurrent_cut_wrapper(&dfg, None).is_some()); + + // no ending or start at b -> AND cut + dfg.start_activities = HashSet::from(["a".to_string(), "c".to_string()]); + dfg.end_activities = HashSet::from(["a".to_string(), "c".to_string()]); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + assert_eq!(cut.unwrap().len(), 2); + } + + #[test] + fn test_perfect_parallel_three_branches() { + // all permutations of a, b, c to allow full bidirectional behavior + let log = event_log!( + ["a", "b", "c"], + ["a", "c", "b"], + ["b", "a", "c"], + ["b", "c", "a"], + ["c", "a", "b"], + ["c", "b", "a"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + assert_eq!(cut.unwrap().get_own().len(), 3); + } + + #[test] + fn test_sequence_cut() { + let log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_xor_cut() { + let log = event_log!(["a", "b"], ["c", "d"], ["a", "b"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + // XOR-Components would be {a, b} and {c, d} + // Parallel Cut has to be None as there are no edges between {a,b} and {c,d} + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_noisy_parallel_fails_without_filter() { + let log = event_log!( + // ("a", "b", "c"), // b-> c missing + ["b", "a", "c"], + ["a", "c", "b"], + //("b", "c", "a"), // c-> a missing + ["c", "b", "a"] //o_trace!("c", "a", "b") is missing -> no edge c -> a + ); + // there are two edges missing c->a and b-> c, therefore there is no bidirectional relation in any case + let dfg = DirectlyFollowsGraph::discover(&log); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_loop_cut() { + let log = event_log!( + ["a"], // Start + ["a", "b", "a"], // Loop + ["a", "b", "a", "b", "a"] // Loop + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs new file mode 100644 index 0000000..d0e338c --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs @@ -0,0 +1,66 @@ +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::process_models::process_tree::OperatorType; + +/// Represents a cut typically found by the inductive miner in a directly follows graph. +/// A 'Cut' partitions activities of a graph or log into disjoints sets, according to a +/// specific cut operator (e.g. sequence, xor etc.) +#[derive(Debug, PartialEq)] +pub struct Cut<'a>{ + operator: OperatorType, // define what operator this cut is about + partitions: Vec>>, +} + +impl<'a> Cut<'a>{ + + /// Creates a new cut with the given Operator and partitions. + /// + /// The caller must ensure that partitions form a valid cut according to the chosen operator. + pub fn new(operator: OperatorType, partitions: Vec>>) -> Cut<'a>{ + Self{operator, partitions} + } + + + /// Returns the number of partitions in this cut. + pub fn len(&self) -> usize { + self.partitions.len() + } + + /// Returns an iterator over the partitions of this cut. + pub fn get_iter(&self) -> std::slice::Iter<'_, HashSet>> { + self.partitions.iter() + } + + /// Consumes the cut and returns the partitions of this cut. + pub fn get_own(self) -> Vec>> { + self.partitions + } + + /// Returns the operator associated with this cut + pub fn get_operator(&self) -> OperatorType { + self.operator // possible due to copy trait + } + + + /// Returns true if this cut contains no partitions. + pub fn is_empty(&self) -> bool{ + self.partitions.is_empty() + } + + + + /// Converts this cut into an owned version with `'static` lifetime. + /// + /// All activity labels are cloned into owned `String`s. + /// This is useful when the cut must outlive the original event log data. + pub fn to_owned_cut(&self) ->Cut<'static>{ + let owned_partitions = self.partitions.iter().map(|partition|{ + partition.iter().map(|cow| Cow::Owned(cow.to_string())).collect() + }).collect::>>>(); + + Cut{ + operator: self.operator, + partitions: owned_partitions, + } + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs new file mode 100644 index 0000000..761e8b0 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs @@ -0,0 +1,232 @@ +//** +// This Code is based on the paper: +// +// Discovering Block-Structured Process Models From Event Logs - A Constructive Approach +// by S.J.J. Leemans, D. Fahland, and W.M.P. van der Aalst +// +// +// The algorithm works by recursively identifying splits in the process behavior, +// constructing a hierarchical representation (in case of a process tree). +// +// There are typically four split conditions: +// +// 1. Exclusive choice (xor) +// 2. Sequence +// 3. Concurrent (parallel) +// 4. Loop +// +// If a split condition is matched, an accordingly named cut function is used to cut the log, +// the algorithm continues recursively. + +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use std::borrow::Cow; +use std::collections::{HashMap, HashSet, VecDeque}; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::core::process_models::process_tree::OperatorType; + +/// Calculates all connected components of a Graph. +/// For this it starts from every unvisited activity a Breadth First Search over the Graph. +/// +/// # Returns +/// A vector containing all connected components +fn calc_connected_components<'a>( + activities: &'a HashMap, + adjacent: HashMap, HashSet>>, +) -> Vec>> { + // visited nodes + let mut visited: HashSet> = HashSet::new(); + // components (if cut) + let mut components: Vec>> = Vec::new(); + + // iterate over every activity + for node in activities.keys() { + let node = Cow::from(node); + if !visited.contains(&node) { + // search in components + + // components of the components xd + let mut comp = HashSet::new(); + let mut queue = VecDeque::new(); + + // mark node as already visited + visited.insert(node.clone()); + // Push starting node + queue.push_back(node); + // Explore connected component by looking at every edge of this activity + while let Some(current) = queue.pop_front() { + // the starting node is ofc the first node of this nodes component + comp.insert(current.clone()); + + // insert every other node which is reachable and has not already been visited + if let Some(neighbors) = adjacent.get(¤t) { + for neighbor in neighbors { + if !visited.contains(neighbor) { + visited.insert(neighbor.clone()); + queue.push_back(neighbor.clone()); + } + } + } + } + components.push(comp); + } + } + + components +} + +/// Calculates an undirected adjacency matrix of a given Directly Follows Graph. +/// The matrix is calculated based on direct reachability and does not include +/// transitive reachability. +/// +/// # Returns +/// A hashset mapping each activity to it's neighboring activities, i.e. to activities occurring in an edge with this one +/// +/// Note: Only activities occurring at least once inside an edge are taken into account. +pub fn calculate_undirected_adjacency_matrix<'a>( + dfg: &DirectlyFollowsGraph<'a>, +) -> HashMap, HashSet>> { + let mut adjacent = HashMap::new(); + + for ((a1, a2), _) in &dfg.directly_follows_relations { + // insert both directions + adjacent + .entry(a1.clone()) + .or_insert(HashSet::new()) + .insert(a2.clone()); + adjacent + .entry(a2.clone()) + .or_insert(HashSet::new()) + .insert(a1.clone()); + } + adjacent +} + + +/// Attempts to find an exclusive choice cut in the given Directly Follows Graph, by calculating the connected components of the Graph. +/// +/// Public wrapper for [`calc_connected_components`] +/// +/// # Returns +/// Some(cut) containing the partitions/ connected components found, otherwise None. +#[allow(dead_code)] +pub fn exclusive_choice_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Option> { + // no start or end activity results in no cut + if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() { + return None; + } + + let components = + calc_connected_components(&dfg.activities, calculate_undirected_adjacency_matrix(dfg)); + + // XOR cut only if > 1 disjoint component + if components.len() > 1 { + Some(Cut::new(OperatorType::ExclusiveChoice, components)) + } else { + None + } +} + + +#[allow(unused_imports)] +mod tests { + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; + use crate::{event, event_log, trace}; + + + #[test] + fn test_exclusive_choice_cut_2() { + let log = event_log!(["a", "b"], ["e"]); + let dfg: DirectlyFollowsGraph<'_> = DirectlyFollowsGraph::discover(&log); + let result = exclusive_choice_cut_wrapper(&dfg); + assert!(result.is_some()); + assert_eq!(result.unwrap().len(), 2); + } + + // Case 1: Clear XOR between b and c + // Traces: start -> b -> d OR start -> c -> d + #[test] + fn xor_cut_simple_two_branches() { + let log = event_log!(["b", "d"], ["c", "e"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg).unwrap(); + + // Expect two components: {"b","d"} and {"e","c"} + // + assert_eq!(cut.len(), 2); + assert!(cut.get_iter().any(|comp| comp.contains("b"))); + assert!(cut.get_iter().any(|comp| comp.contains("c"))); + } + + // Case 2: XOR with 3 different branches + // Traces: start -> b -> e, start -> c -> f, start -> d -> g + #[test] + fn xor_cut_three_way_branch() { + let log = event_log!(["b", "e"], ["c", "f"], ["d", "g"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg).unwrap(); + + // Expect three components: one with b, one with c, one with d + assert_eq!(cut.len(), 3); + assert!(cut.get_iter().any(|comp| comp.contains("b"))); + assert!(cut.get_iter().any(|comp| comp.contains("c"))); + assert!(cut.get_iter().any(|comp| comp.contains("d"))); + } + + // Case 3: No XOR (sequence only) + // Traces: a -> b -> c (repeated) + #[test] + fn no_xor_cut_sequence() { + let log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg); + + // Should be None because it’s just a sequence + assert!(cut.is_none()); + } + + // Case 4: Single-event traces -> XOR between start activities + // Traces: ["a"], ["e"], ["f"] + #[test] + fn xor_cut_multiple_single_events() { + let log = event_log!(["a"], ["e"], ["f"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg).unwrap(); + + // Expect 3 disjoint components + assert_eq!(cut.len(), 3); + assert!(cut.get_iter().any(|comp| comp.contains("a"))); + assert!(cut.get_iter().any(|comp| comp.contains("e"))); + assert!(cut.get_iter().any(|comp| comp.contains("f"))); + } + + #[test] + fn greater_test() { + let log = event_log!(["a", "b", "c"], ["e", "f"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + + assert!(cut.is_some()); + } + + #[test] + fn test_parallel_log_no_cut() { + let log = event_log!(["a", "b"], ["b", "a"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + + // This is a parallel cut, not an exclusive choice cut + assert!(cut.is_none()); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs new file mode 100644 index 0000000..c58f978 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs @@ -0,0 +1,292 @@ +/// This implementation follows the Loop cut finder algorithm as implemented in +/// the ProM framework (`InductiveMiner`), originally written in Java. +/// +/// Reference: +/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +/// Application of Concurrency to System Design (ACSD), 2013. +/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +/// University of Technology, 09.05.2017 +/// - ProM source code: +/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMLoop.java +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::structures::components::Components; + + +/// Attempts to find a loop cut in a given Directly Follows Graph (DFG). +/// +/// The algorithm groups activities into connected components by using a union-find like structure. +/// +/// 1. Selects a pivot activity from the sets of start activities. +/// 2. Merges all start and end activities with component of pivot. +/// 3. Merges internal activities (no start nor end activity) based on the edges in the DFG, excluding +/// edges that would violate redo-loop semantic. +/// 4. Merges components based on certain rules about their connectivity +/// +/// +/// The resulting vector represents the activity partitions of the +/// candidate redo-loop cut. The first partition corresponds to the +/// component containing the pivot (the "do" part), +/// and the remaining partitions correspond to the "redo" part(s). +/// +/// # Panic +/// Panics if the dfg contains no start activity +fn redo_loop_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec>> { + // activities + let nodes: Vec> = dfg.activities.iter().map(|(act, _)| Cow::from(act)).collect(); + let mut components = Components::new(&nodes); + + // start element as pivot element -> safe unwrap as there has to be at least one start element + let pivot = dfg.start_activities.iter().next().unwrap(); + for start in &dfg.start_activities { + components.merge_components_of(pivot, start); + } + for end in &dfg.end_activities { + components.merge_components_of(pivot, end); + } + + // merge inner components + for ((v0, v1), _) in &dfg.directly_follows_relations { + let v0_is_start = dfg.start_activities.contains(v0.as_ref()); + let v0_is_end = dfg.end_activities.contains(v0.as_ref()); + let v1_is_start = dfg.start_activities.contains(v1.as_ref()); + + if !v0_is_start { + if !v0_is_end { + if !v1_is_start { + components.merge_components_of(v0, v1); + } + } + } else if v0_is_end { + components.merge_components_of(v0, v1); + } + } + + // create sub end and sub start activities + let mut sub_end_activities = HashSet::new(); + let mut sub_start_activities = HashSet::new(); + + // sort edges into components + for ((v0, v1), _) in &dfg.directly_follows_relations { + if components.same_component(&v0, &v1) { + sub_start_activities.insert(v0); + sub_end_activities.insert(v1); + } + } + + // check if sub-end-activities are connected to all start activities + for sub_end in sub_end_activities { + for start in &dfg.start_activities { + if components.same_component(sub_end, start) { + break; + } + if !dfg.contains_df_relation((sub_end.clone(), start.into())) { + components.merge_components_of(sub_end, start); + break; + } + } + } + + for sub_start in sub_start_activities { + for end_activity in dfg.end_activities.iter() { + if components.same_component(&sub_start, &end_activity) { + break; + } + if dfg.contains_df_relation((sub_start.clone(), end_activity.into())) { + components.merge_components_of(sub_start, end_activity); + break; + } + } + } + + // reorder so that pivot comes first + let mut partition = components.get_components(); + let pivot = Cow::Owned(pivot.to_string()); + if let Some(pos) = partition.iter().position(|set| set.contains(&pivot)) { + partition.swap(0, pos); + } + + partition + + // check whether those sub component belongs to the do or the redo +} + +/// Attempts to find a Loop cut in a given DFG. +/// +/// Public wrapper for [`redo_loop_cut`] +/// +/// #Returns +/// Some(cut) if a loop cut has successfully been discovered, None otherwise +pub fn redo_loop_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Option>{ + + // only possible if there are start and end activities + if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() { + return None; + } + + // calculate do-redo loop components + let components = redo_loop_cut(dfg); + + // a cut is found if there is more than one component + if components.len() > 1{ + Some(Cut::new(OperatorType::Loop, components)) + } else { + None + } + + +} + +#[allow(unused_imports)] +mod test_redo_loop_cut{ + use std::collections::HashMap; + use crate::{event_log, trace, event}; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::core::process_models::process_tree::OperatorType; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; + use super::*; + + #[test] + fn test_redo_on_single_activity(){ + let log = event_log!( + ["a", "c"], + ["a", "c", "b", "a", "c"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 2); + assert!(cut[0].contains("a") && cut[0].contains("c")); + assert!(cut[1].contains("b")); + } + + #[test] + fn test_no_loop() { + let log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 1); + assert!(cut[0].contains("a") && cut[0].contains("b") && cut[0].contains("c")); + } + + #[test] + fn test_multi_activity_redo() { + let log = event_log!( + ["a", "c"], + ["a", "c", "b", "d", "a", "c"], + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 2); + + let do_group = &cut[0]; + let redo_group = &cut[1]; + + assert!(do_group.contains("a") && do_group.contains("c")); + assert!(redo_group.contains("b") && redo_group.contains("d")); + } + + #[test] + fn test_nested_loops_only_outer_cut() { + let log = event_log!( + ["s", "a", "c", "e"], + ["s", "a", "c", "b", "a", "c", "e"], // inner loop + ["s", "a", "c", "e", "g", "s", "a", "c", "e"], + ["s", "a", "c", "b", "a", "c", "b", "a", "c", "e"], + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + + + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 2); + + assert!(cut[1].contains("g")); + assert!(cut[0].contains("a") && cut[0].contains("c")); + } + + + #[test] + fn test_complex_test(){ + let mut dfg = DirectlyFollowsGraph::new(); + dfg.activities = HashMap::from([("a".to_string(), 1), ("b".to_string(), 1),("c".to_string(), 1)]); + dfg.directly_follows_relations = + HashMap::from([ + (("a".into(),"b".into()),1), + (("b".into(),"a".into()),1), + (("b".into(),"c".into()),1), + (("c".into(),"b".into()),1), + (("c".into(),"a".into()),1), + (("a".into(),"c".into()),1), + ] + ); + dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string()]); + dfg.end_activities = HashSet::from(["c".to_string()]); + + println!("Found component: {:?}", redo_loop_cut(&dfg)); + } + + + + #[test] + fn test_double_loop(){ + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "b", "d", "a", "b"], + ["a", "b", "d", "a", "b", "a", "b", "c", "a", "b"], + ["a", "b", "c", "a", "b", "a", "b", "d", "a", "b"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut_wrapper(&dfg); + assert!(cut.is_some()); + + //expect a cut of three partitions + let expectations = Cut::new(OperatorType::Loop, vec![ + HashSet::from(["a".into(), "b".into()]), + HashSet::from(["c".into()]), + HashSet::from(["d".into()]) + ]); + assert_eq!(cut.unwrap(), expectations); + } + + #[test] + fn test_loop_over_parallel(){ + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "d", "b"], + ["a", "d", "b", "c", "a", "d", "b" ], + ["a", "d", "b", "c", "a", "b" ] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = redo_loop_cut_wrapper(&dfg); + + assert!(cut.is_some()); + let expectations = Cut::new(OperatorType::Loop, + vec![ + HashSet::from( + ["a".into(), "b".into(), "d".into()]), + HashSet::from(["c".into()])] + ); + + assert_eq!(cut.unwrap(), expectations); + } + + +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs new file mode 100644 index 0000000..d15245e --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs @@ -0,0 +1,78 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::concurrent_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::loop_cut::redo_loop_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters}; +use crate::EventLog; + +pub mod exclusive_choice; +pub mod cut; +pub mod sequence_cut; +pub mod concurrent; +pub mod loop_cut; + + + + +/// Attempts to find a valid cut in the given DirectlyFollowsGraph, by evaluating possible cut types +/// in the following strict order: +/// 1. exclusive choice cut [`exclusive_choice_cut_wrapper`] +/// 2. Sequence cut [`sequence_cut_wrapper`] +/// 3. Concurrent / AND cut [`concurrent_cut_wrapper`] +/// 4. Loop cut [`redo_loop_cut_wrapper`] +/// +/// # Returns +/// - Some([`Cut`]) containing the first detected cut according to the strict order. +/// - None otherwise +pub fn find_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>, log: &EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters) -> Option>{ + // if any cut is found in the presented order, return the first one + if let Some(cut) = exclusive_choice_cut_wrapper(dfg){ + Some(cut) + } else if let Some(cut) = sequence_cut_wrapper(dfg, parameters){ + Some(cut) + } else { + // check whether minimum self distance shall be used + let mindist = if parameters.contains(&Parameter::MinimumSelfDistance) { + Some(MinimumSelfDistance::new(log, event_log_classifier)) + } else { None }; + + if let Some(cut) = concurrent_cut_wrapper(dfg, mindist) { + Some(cut) + } else if let Some(cut) = redo_loop_cut_wrapper(dfg) { + Some(cut) + } else { + None // if no cut is found return none + } + } +} + + +mod test_cut_finder{ + use std::collections::HashSet; + use crate::{ + discovery::case_centric::dfg::discover_dfg, + core::event_data::case_centric::EventLogClassifier, + event_log, + discovery::case_centric::inductive_miner_app::cut_finder::find_cut + }; + + #[test] + fn test_log_with_no_cut(){ + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let dfg = discover_dfg(&log); + let cut = find_cut(&dfg, &log, &EventLogClassifier::default(), &HashSet::new()); + assert!(cut.is_none()); + } + + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs new file mode 100644 index 0000000..8b17dde --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs @@ -0,0 +1,318 @@ +use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; +use crate::core::process_models::dfg::{Activity, DirectlyFollowsGraph}; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; + +/// Calculate transitiv reachability using Floyd Warshall +fn compute_reachability_matrix(dfg: &DirectlyFollowsGraph<'_>) -> (HashMap, Vec>) { + let activities = dfg.activities.iter().map(|(a,_)| a.clone()).collect::>(); + let n = activities.len(); + let mut map = HashMap::new(); + + // Activity_string -> index + for (i, act) in activities.iter().enumerate() { + map.insert(act.clone(), i); + } + + // initialize matrix + let mut matrix = vec![vec![false; n]; n]; + + // mark direct edges + for ((a, b), _) in &dfg.directly_follows_relations{ + if let (Some(idx_a), Some(idx_b)) = (map.get(a.as_ref()), map.get(b.as_ref())) { + matrix[*idx_a][*idx_b] = true; + } + } + + // Floyd Warshall + for k in 0..n { + for i in 0..n{ + for j in 0..n{ + // only update if cell isn't already true + matrix[i][j] = matrix[i][j] || (matrix[i][k] && matrix[k][j]); + } + } + } + + (map, matrix) +} + +/// Helper function which calculates whether a set of activities a can reach another set of activities b. +/// +/// # Returns +/// - 'true' if at least one activity in a can transitively reach any activity in b +fn reaches_any_transitive(a: &HashSet>, b: &HashSet>, + idx_map: &HashMap, + matrix: &Vec> +) -> bool { + for act_a in a { + for act_b in b { + if let (Some(&idx_a), Some(&idx_b)) = (idx_map.get(act_a.as_ref()), idx_map.get(act_b.as_ref())) { + if matrix[idx_a][idx_b] { + return true; + } + } + } + } + false +} + + +/// Helper function which calculates whether every activity in a set a can reach every activity in another set b. +fn reaches_all_transitive(a: &HashSet>, b: &HashSet>, + idx_map: &HashMap, + matrix: &Vec>) -> bool { + for act_a in a { + for act_b in b { + if let (Some(&idx_a), Some(&idx_b)) = (idx_map.get(act_a.as_ref()), idx_map.get(act_b.as_ref())) { + if !matrix[idx_a][idx_b] { + return false; + } + } + } + } + true +} + + +/// Calculates Activity Sequences in a given Directly Follows Graph. +/// Two activities are in sequence if they are neither mutually reachable nor mutually unreachable. +/// +/// # Returns +/// A vector of activity partitions representing a candidate sequence cut. +/// Each hashset contains the activity labels belonging to the same sequence block. +/// The partitions are ordered s.t. for any 'i < j', activities in partitions\[i] can (transitively) +/// reach activities in partitions\[j]. +fn calc_sequences<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec>>{ + let (idx_map, matrix) = compute_reachability_matrix(dfg); + + // Initialize each activity with its own partition + let mut partitions : Vec>> = dfg.activities.keys().map( + |a| { + let mut s = HashSet::new(); + s.insert(a.into()); + s + } + ).collect(); + + // break flag + let mut changed = true; + while changed { + changed = false; + // iterative over all activities and find bidirectional reachacble components or mutually non reachable components + let mut i = 0; + while i < partitions.len() { + // safe some iterations as the edges are non directional + let mut j = i + 1; + while j < partitions.len() { + // get the current working partitions + let p_a = &partitions[i]; + let p_b = &partitions[j]; + + // Check connectivity between groups - true if at least one activity in p_a reaches at least one other activity in p_b + let a_reaches_b = reaches_any_transitive(p_a, p_b, &idx_map, &matrix); + let b_reaches_a = reaches_any_transitive(p_b, p_a, &idx_map, &matrix); + + // Merge if: + // 1. Mutually reachable (Loop) + // 2. Mutually unreachable (Exclusive Choice / Parallelism) + if (a_reaches_b && b_reaches_a) || (!a_reaches_b && !b_reaches_a) { + // Merge the whole partition j into partition i + let part_j = partitions.remove(j); + partitions[i].extend(part_j); + // as we changed this partition, we need to iterate over all partitions again, bc maybe the merged partitions are reachable + changed = true; + // Don't increment j, as the vector shrunk + } else { + // process with next partition + j += 1; + } + } + i += 1; + } + } + + // 2. Sort partitions to form the candidate sequence + partitions.sort_by(|p1, p2| { + let p1_to_p2 = reaches_any_transitive(p1, p2, &idx_map, &matrix); + let p2_to_p1 = reaches_any_transitive(p2, p1, &idx_map, &matrix); + // p1 reaches more than p2 + if p1_to_p2 && !p2_to_p1 { // p1 -> p2 but not p2 -> p1 + std::cmp::Ordering::Less + } else if !p1_to_p2 && p2_to_p1 { // p2 -> p1 but not p1 -> p2 + std::cmp::Ordering::Greater + } else { // mutually reachable or not reachable - should not happen at all + panic!("Partitions are in sequence cut are nevertheless mutually reachable or not reachable"); + } + }); + + partitions +} + +/// Public wrapper for [`calc_sequences`]. +/// +/// This function simply forwards its arguments to +/// `calc_sequences` and returns Some(cut) if a cut is found, otherwise None. +/// +/// If a [`strict_sequence_cut`] should be applied, this has to be set in a [`Parameter`] +pub fn sequence_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, _parameters: &Parameters) -> Option>{ + // calculate sequence blocks + let sequences = calc_sequences(dfg); + + // early return + if sequences.len() <= 1{ + return None; + } + + // at this point we could check whether the sequence satisfies the conditions for a strict sequence cut + + // if there is more than one sequence block, a cut is found successfully + if sequences.len() > 1 { + Some(Cut::new(OperatorType::Sequence, sequences)) + } else { + None + } +} + +#[allow(unused_imports)] +mod test_sequence_cut{ + use std::borrow::Cow; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::calc_sequences; + use std::collections::HashSet; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::dfg::discover_dfg; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::{ sequence_cut_wrapper}; + use crate::{event_log, trace, event}; + #[test] + fn test_single_activity(){ + let dfg = DirectlyFollowsGraph::discover(&event_log!(["a"])); + let cut = calc_sequences(&dfg); + let expected = vec![HashSet::from([Cow::from("a".to_string())])]; + assert_eq!(cut, expected); + } + + #[test] + fn test_exclusive_choice_cut(){ + let input = event_log!(["a", "b", "c"], ["d"]); + let dfg = DirectlyFollowsGraph::discover(&input); + let result = sequence_cut_wrapper(&dfg, &HashSet::new()); + println!("{:?}", result); + assert!(result.is_some()); + assert_eq!(result.unwrap().get_own().len(), 3); + + } + #[test] + fn test_simple_sequence(){ + let input = event_log!(["a", "b", "c"]); + let dfg = DirectlyFollowsGraph::discover(&input); + let result = calc_sequences(&dfg); + let expected = vec![HashSet::from(["a".into()]) , HashSet::from(["b".into()]), HashSet::from(["c".into()])]; + assert_eq!(expected, result) + } + + + #[test] + fn test_leemans_example(){ + let input = event_log!(["a", "c", "d"], ["b", "c", "e "]); + let dfg = DirectlyFollowsGraph::discover(&input); + println!("{:?}", calc_sequences(&dfg)); + let result = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(result.is_some()); + let result = result.unwrap(); + println!("{:?}", result); + assert_eq!(result.get_own().len(), 3); + } + + + #[test] + fn test_sequence_with_internal_parallelism() { + // Log: A -> (B || C) -> D + // Traces: A->B->C->D, A->C->B->D + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["A", "B", "C", "D"], + ["A", "C", "B", "D"] + )); + + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()).unwrap(); + let expected: Vec>> = vec![ + HashSet::from(["A".into()]), + HashSet::from(["B".into(), "C".into()]), + HashSet::from(["D".into()]), + ]; + + assert_eq!(cut.get_own(), expected); + } + + #[test] + fn test_parallel_branches_no_sequence_cut() { + // Log: A -> B and A -> C in parallel + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["B", "C"], + ["C", "B"] + )); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_none()); + } + + #[test] + fn test_xor_branch_sequence_cut() { + // Log: A -> B -> D OR A -> C -> D + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["A", "B", "D"], + ["A", "C", "D"], + )); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let cut = cut.unwrap(); + let expected: Vec>> = vec![HashSet::from(["A".into()]) , HashSet::from(["B".into(), "C".into()]), HashSet::from(["D".into()])]; + assert_eq!(cut.get_own(), expected); + + } + + + #[test] + fn test_with_loop(){ + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["B", "C"], + ["C", "B"], + ["B", "C", "E", "F", "B", "C"], + ["C", "B", "E", "F", "B", "C"], + ["B", "C", "E", "F", "C", "B"], + ["C", "B", "E", "F", "B", "C", "E", "F", "C", "B"], + )); + assert!(sequence_cut_wrapper(&dfg, &HashSet::new()).is_none()); + } + + + #[test] + fn test_triangle_cut() { + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["A", "C"], + ["B", "C", "D"], + ["B", "D"] + )); + + + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + + if let Some(c) = cut { + assert_eq!(c.get_own() , Vec::from([HashSet::from(["A".into(), "B".into()]) , HashSet::from(["C".into()]), HashSet::from(["D".into()])])); + } + } + + + #[test] + fn test_strict_sequence_cut_wrapper(){ + let log = event_log!( + ["a", "b", "c"], + ["a", "c"], + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()).unwrap(); + println!("{:?}", cut); + } + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs new file mode 100644 index 0000000..dc7f073 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs @@ -0,0 +1,203 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::find_cut; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityConcurrent, Return}; +use crate::discovery::case_centric::inductive_miner_app::splits::perform_split; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +/// Filters an event log by removing all events whose activity matches a pivot. +/// +/// The function splits the input log into two logs: +/// - one log containing the original traces **without** the pivot activity +/// - one log containing traces consisting only of the filtered-out pivot events +/// +/// The number of traces is preserved in both logs. +/// +/// # Returns +/// A tuple `(filtered_out_log, filtered_log)` where: +/// +/// - `filtered_out_log` contains only the removed pivot events (possibly empty traces). +/// - `filtered_log` contains the original behavior without the pivot events. +fn filter_out_activity( + log: EventLog, + event_log_classifier: &EventLogClassifier, + pivot: String, +) -> (EventLog, EventLog) { + let mut filtered_log = log.clone_without_traces(); // the logs containing the filtered activities + let mut filtered_out_log = log.clone_without_traces(); // the log containing left behavior + + for trace in log.traces { + // get the trace length + let len_t = trace.events.len(); + + // do the same for the traces again + let mut new_trace = trace.clone_without_events(); + let mut other_new_trace = trace.clone_without_events(); + + // need the option for initialization purpose, this option marks whether the element was actually contained in the trace + let mut pivot_event = None; // if set the activity was actually contained in this trace + + // check on every event in this trace + for event in trace.events { + let other = event_log_classifier.get_class_identity(&event); + if pivot != other { + new_trace.events.push(event); + } else if pivot_event.is_none() { + // set the pivot event + pivot_event = Some(event) + } + } + + // check whether the event was actually part of the trace + if pivot_event.is_some() { + // if so push the trace, as it is (excluding the left out events) + let event = pivot_event.unwrap(); + // push the pivot event as often as it has been filtered out (maybe use a counter here) + for _ in 0..(len_t - new_trace.events.len()) { + other_new_trace.events.push(event.clone()); + } + // push the filtered logs + filtered_log.traces.push(new_trace); + filtered_out_log.traces.push(other_new_trace); + } else { + // new trace equals the trace from before, therefore we should not push the empty lg (right?) + filtered_log.traces.push(new_trace); + + + //mind that empty traces are being pushed too + filtered_out_log.traces.push(other_new_trace); + + } + + } + + (filtered_out_log, filtered_log) +} + +/// Attempts to detect an *activity concurrent* fall-through pattern. +/// +/// This fall through iteratively removes one activity at a time +/// (starting with the most frequent one) and checks whether the remaining logs yield any valid cut. +/// If removing the activity yields a valid cut, the activity is considered concurrent to the rest of the process. +/// +/// The split operations is performed on a valid cut as well, for efficiency reasons. +/// +/// # Returns +/// - 'ActivityConcurrent(...)' enum if a concurrent activity is detected, containing the constructed concurrency node, the log of removed activity instances and the already performed split. +/// - 'Return(log)' the original log without changes +fn activity_concurrent( + log: EventLog, + event_log_classifier: &EventLogClassifier, + parameters: &Parameters) -> Fallthrough { + let dfg = discover_dfg_with_classifier(&log, event_log_classifier); + + // get the activities and transform into a vector + let mut activities: Vec<(String,u32)> = dfg.activities.clone().into_iter().collect(); + // sort by cardinality (descending) + (&mut activities).sort_by(|a,b| a.1.partial_cmp(&b.1).unwrap()); // safe unwrap as working with u32 here + + // now leave out one activity after another and try to find a cut + for (activity, _) in activities.into_iter().rev() { + // remove activity from this log + let (filtered_out_log, filtered_log) = + filter_out_activity(log.clone(), event_log_classifier, activity); + + // build a dfg in order to use already established find_cut method + let dfg = discover_dfg_with_classifier(&filtered_log, event_log_classifier); + match find_cut(&dfg, &filtered_log, event_log_classifier, parameters) { + None => continue, // leave out another activity (if another is left) + Some(cut) => { + // do the split here + let split = perform_split(&filtered_log, event_log_classifier, cut); + + // create a node without children, as this has to be processed in the more high level functions + let node = Node::new_operator(OperatorType::Concurrency); + + // return if a cut is found + return ActivityConcurrent(node, filtered_out_log, split); + } + } + } + + // default return + Return(log) +} + +/// Public wrapper for [`activity_concurrent`]. +/// +/// This function simply forwards its arguments to +/// `activity_concurrent` and exists for consistency +/// with other fall-through detection wrappers. +pub fn activity_concurrent_wrapper(log: EventLog, + event_log_classifier: &EventLogClassifier, + parameters: &Parameters) -> Fallthrough { + activity_concurrent(log, event_log_classifier, parameters) +} + +mod test_activity_concurrent { + use std::collections::HashSet; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::{event_log, EventLog}; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_concurrent::{activity_concurrent, filter_out_activity}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::ActivityConcurrent; + + fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: &EventLogClassifier) -> bool { + if log.traces.len() == o_log.traces.len() { + for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) { + if t0.events.len() == t1.events.len() { + for (e0,e1) in t0.events.iter().zip(t1.events.iter()) { + let a0 = event_log_classifier.get_class_identity(e0); + let a1 = event_log_classifier.get_class_identity(e1); + if a0 != a1 { + println!("Two activities did not match{:?}", (a0, a1)); + + return false; + } + } + } + } + return true; + } + false + } + + #[test] + fn test_filter_out_activity_and_activity_concurrent_yield_same_result() { + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + // mind the empty trace + let ex1 = event_log!(["b"], ["b"],[], ["b"]); + + let ex2 = event_log!( + ["a", "c", "d"], + ["d","a"], + ["a", "d", "c"], + ["c", "d"], + ); + + let classifier = EventLogClassifier::default(); + + let (log1, log2) = + filter_out_activity(log.clone(), &EventLogClassifier::default(), "b".to_string()); + + assert!(events_equal(&log1, &ex1, &classifier)); + assert!(events_equal(&log2, &ex2, &classifier)); + let ActivityConcurrent(node, log1, split)= activity_concurrent(log, &classifier, &HashSet::new()) else { return assert!(false); }; + assert!(!log1.traces.is_empty() && !split.is_empty()); + let ex_node = Node::new_operator(OperatorType::Concurrency); + assert_eq!(node, ex_node); + + } + + + +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs new file mode 100644 index 0000000..b7da602 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs @@ -0,0 +1,329 @@ + +use std::collections::HashMap; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::{event_log, EventLog}; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return}; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; + +/// This function iterates over every event of every trace and removes the specified event +fn remove_activity_from_log( + mut log: EventLog, + event_log_classifier: &EventLogClassifier, + activity: String, +) -> EventLog { + log.traces = log + .traces + .into_iter() + .map(|mut trace| { + trace.events = trace + .events + .into_iter() + .filter(|event| { + let other = event_log_classifier.get_class_identity(event); + activity != other + }) + .collect(); + trace + }) + .collect(); + + // experimental, what if we only retain traces not empty? + // log.traces.retain(|trace| {trace.events.len() > 0}); + log +} + +#[test] +fn test_remove_activity_with_empty_trace() { + let log = event_log!([], ["a"], ["a", "b"]); + let r = remove_activity_from_log(log, &EventLogClassifier::default(), "a".to_string()); + + let expected = event_log!([], [], ["b"],); + assert_eq!(r, expected); +} + +/// Helper struct to count the occurrences of each activity in the whole log and in every trace. +/// In 'trace_activities' each index corresponds to a trace at the same index in the event log. +/// The 'activities' member contains information about how often every activity occurs in the whole event log. +struct ActivityTraceCounter { + activities: HashMap, + trace_activities: Vec>, +} + +impl ActivityTraceCounter { + /// Counts how often every activity of the event log occurs in every trace and in the whole + /// event log. + fn new(log: &EventLog, event_log_classifier: &EventLogClassifier) -> ActivityTraceCounter { + let mut activities = HashMap::new(); + let mut trace_activities = Vec::with_capacity(log.traces.len()); + + for (i, trace) in log.traces.iter().enumerate() { + trace_activities.push(HashMap::new()); + for event in &trace.events { + let activity = event_log_classifier.get_class_identity(event); + // update activities + if let Some(count) = activities.get_mut(&activity) { + *count += 1; + } else { + activities.insert(activity.clone(), 1); + } + + if let Some(count) = trace_activities[i].get_mut(&activity) { + *count += 1; + } else { + trace_activities[i].insert(activity, 1); + } + } + } + + ActivityTraceCounter { + activities, + trace_activities, + } + } + + /// Consume the object and returns the activity count as well as the vector containing the activity count for every trace. + fn get(self) -> (HashMap, Vec>) { + (self.activities, self.trace_activities) + } +} + +fn cleanup_log( + log: EventLog, + event_log_classifier: &EventLogClassifier, + activity: String, +) -> Fallthrough { + let log = remove_activity_from_log(log, event_log_classifier, activity.clone()); + + let mut node = Node::new_operator(OperatorType::Concurrency); + let activity_leaf = Node::new_leaf(Some(activity)); + node.add_child(activity_leaf); + + ActivityOncePerTrace(node,log) +} + +///This fall through applies if an activity occurs once in every trace of the log. +/// In case this applies to multiple ones an arbitrary is chosen (with the lowest cardinality) +pub fn activity_once_per_trace( + log: EventLog, + event_log_classifier: &EventLogClassifier, +) -> Fallthrough { + let k = log.traces.len(); + // count how often every activity occurs in the event log and in every trace + let (activities, trace_activities) = + ActivityTraceCounter::new(&log, event_log_classifier).get(); + let mut activities: Vec<(String, usize)> = activities.into_iter().collect(); // transform to vector in order to sort the activities according to cardinality + + // Sort the activities by cardinality + (&mut activities).sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); // safe unwrap as we compare u32 to other u32 + + // set result to none (for now) + let mut result: Option = None; + + // reverse iterate over the activities, as the activities with more occurrences are more likely to appear precisely once every trace + 'activity_loop: for (activity, cardinality) in activities.into_iter().rev() { + // activity has to appear precisely once in every trace, therefore skip if it does not appear as often as we have traces + if cardinality != k { + continue 'activity_loop; + } + for trace in &trace_activities { + // has to appear precisely one time + if let Some(count) = trace.get(&activity) { + if *count != 1 { + continue 'activity_loop; + } + } else { + // activity did not appear in the trace or in the event log at all + } + } + // at this point the activity has appeared precisely one time in every trace + result = Some(activity); + break 'activity_loop; + } + + // check result of activity loop + if result.is_some() { + cleanup_log(log, event_log_classifier, result.unwrap()) + } else { + // does not apply - return the event log to be used in other fallthrough cases + Return(log) + } +} + +/// Public wrapper for [`activity_once_per_trace`]. +/// +/// This function simply forwards its arguments to +/// `activity_once_per_trace` and exists for consistency +/// with other fall-through detection wrappers. +pub fn activity_once_per_trace_wrapper( + log: EventLog, + event_log_classifier: &EventLogClassifier, + _: &Parameters, +) -> Fallthrough { + activity_once_per_trace(log, event_log_classifier) +} + +mod test_activity_once_per_trace { + use crate::{event_log, EventLog}; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::activity_once_per_trace; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return}; + + fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: &EventLogClassifier) -> bool { + if log.traces.len() == o_log.traces.len() { + for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) { + if t0.events.len() == t1.events.len() { + for (e0,e1) in t0.events.iter().zip(t1.events.iter()) { + let a0 = event_log_classifier.get_class_identity(e0); + let a1 = event_log_classifier.get_class_identity(e1); + if a0 != a1 { + println!("Two activities did not match{:?}", (a0, a1)); + + return false; + } + } + } + } + return true; + } + false + } + + #[test] + /// The example as defined in Robust Process Mining with Guarantees + fn leeman_example() { + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let result = activity_once_per_trace(log, &EventLogClassifier::default()); + match result { + ActivityOncePerTrace(node, log) => { + let expected_log = event_log!( + ["a", "b", "c"], + ["a", "b"], + ["a", "c"], + ["b", "c"], + ); + assert!(events_equal(&log, &expected_log, &EventLogClassifier::default())); + + let mut expected_node = Node::new_operator(OperatorType::Concurrency); + expected_node.add_child(Node::new_leaf(Some(String::from("d")))); + + assert_eq!(node, expected_node); + } + _ => assert!(false), + } + } + + #[test] + /// Assert that the function returns none if there is no activity once in every trace, but almost + fn test_log_with_no_ft() { + // fist case - first trace + let log = event_log!( + ["a", "b", "c"], // here i removed the 'd' + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let Return(expected_log) = activity_once_per_trace(log.clone(), &EventLogClassifier::default()) + else { + return assert!(false); + }; + + let log1 = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c"], // now the d is missing here + ); + + assert!(events_equal(&log, &expected_log, &EventLogClassifier::default())); + + let Return(log2) = activity_once_per_trace(log1.clone(), &EventLogClassifier::default()) + else { + return assert!(false); + }; + assert!(events_equal(&log1, &log2, &EventLogClassifier::default())); + } + + #[test] + fn test_with_multiple_activities_appearing_once() { + let log = event_log!( + ["a", "b", "c", "d"], // here i removed the 'd' + ["d", "a", "b", "c"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + let ActivityOncePerTrace(process_node, log) = + activity_once_per_trace(log, &EventLogClassifier::default()) + else { + return assert!(false); + }; + + let expected_log = event_log!( + ["a", "b", "d"], + ["d", "a", "b"], + ["a", "d"], + ["b", "d"], + ); + let expected_log2 = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "c"], + ["b", "c"], + ); + + // it really is arbitrary whether c or d is chosen + assert!(events_equal(&log, &expected_log, &EventLogClassifier::default()) || + events_equal(&log, &expected_log2, &EventLogClassifier::default())); + + + let mut expected_node = Node::new_operator(OperatorType::Concurrency); + expected_node.add_child(Node::new_leaf(Some(String::from("c")))); + + let mut expected_node2 = Node::new_operator(OperatorType::Concurrency); + expected_node2.add_child(Node::new_leaf(Some(String::from("d")))); + + assert!(process_node == expected_node || process_node == expected_node2) + } + + #[test] + fn test_two_activites_in_trace() { + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b", "d"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + let Return(log1) = activity_once_per_trace(log.clone(), &EventLogClassifier::default()) + else { + return assert!(false); + }; + + assert!(events_equal(&log, &log1, &EventLogClassifier::default())); + } + + #[test] + fn test_with_empty_log() { + let log = event_log!(["a", "b"], []); + // the fallthrough should not find anything, as there is a trace containing no element + let Return(_) = activity_once_per_trace(log.clone(), &EventLogClassifier::default()) else { + return assert!(false); + }; + + let log2 = event_log!(["a", "b"]); + let r = activity_once_per_trace(log2, &EventLogClassifier::default()); + assert!(r.same_enum_variant(&Fallthrough::ActivityOncePerTrace( + Node::new_operator(OperatorType::Concurrency), + event_log!() + ))); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs new file mode 100644 index 0000000..dbd278d --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs @@ -0,0 +1,122 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{EmptyTraces, Return}; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +/// Checks whether the empty traces fallthrough applies to a given log, +/// it applies when the log contains empty traces. +/// If so a Process node with operator instance xor is returned, having an empty leaf as child if +/// there is at least one empty trace in the log, +/// the other non-empty logs have to processed in another recursion of the IM Algorithm. + fn empty_traces(mut log: EventLog, _event_log_classifier: &EventLogClassifier) -> Fallthrough { + let len_before = log.traces.len(); + log.traces = log.traces.into_iter().filter(|trace| !trace.events.is_empty()).collect(); + + if len_before != log.traces.len(){ + // if the len of the trace has changed in the meantime, this means there are some traces lost, + // due to that they have been empty + + // return a Process node together with the resulting unprocessed traces of the event log + + let mut node = Node::new_operator(OperatorType::ExclusiveChoice); + node.add_child(Node::new_leaf(None)); + EmptyTraces(node, log) + } else { + // otherwise this fallthrough does not apply + Return(log) + } +} + +/// Public wrapper for [`empty_traces`]. +/// +/// This function simply forwards its arguments to +/// `empty_traces` and exists for consistency +/// with other fall-through detection wrappers. +pub fn empty_traces_wrapper(log: EventLog, _event_log_classifier: &EventLogClassifier, _: &Parameters) -> Fallthrough { + empty_traces(log, _event_log_classifier) +} + + +mod test_empty_traces_ft{ + use crate::{event_log, event}; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::empty_traces::empty_traces; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{EmptyTraces, Return}; + + #[test] + /// test the simplest case, the log should retrun a xor-node with one child from type empty and + /// a log containing the only not empty trace + fn test_empty_traces(){ + let log = event_log!( + [], + [], + [], + ["a"], + [], + ); + + let EmptyTraces(node, log) = empty_traces(log, &EventLogClassifier::default()) else { return assert!(false); }; + assert_eq!(log.traces.len(), 1); + assert_eq!(log.traces[0].events.len(), 1); + assert_eq!(log.traces[0].events[0], event!("a")); + + let mut expected_node = Node::new_operator(OperatorType::ExclusiveChoice); + expected_node.add_child(Node::new_leaf(None)); + + assert_eq!(node, expected_node); + } + + #[test] + /// Assert that an event log + fn test_not_empty_traces(){ + let log = event_log!( + ["a"], + ["b"], + ["f"], + ["a"], + ["g"], + ); + + let Return(log1) = empty_traces(log.clone(), &EventLogClassifier::default()) else { return assert!(false); }; + + assert_eq!(log, log1); + } + + #[test] + /// assert that an empty event log results in no result ('None'), + /// as this is the basecase + fn test_empty_log(){ + let log = event_log!(); + let res = empty_traces(log.clone(), &EventLogClassifier::default()); + match res { + Return(log1) => assert_eq!(log, log1), + _ => assert!(false), + } + } + + + #[test] + fn test_log_only_empty_traces(){ + let log = event_log!( + [], [], [] + ); + + let res = empty_traces(log, &EventLogClassifier::default()); + match res { + EmptyTraces(node,log1) => { + assert_eq!(log1.traces.len(), 0); + assert_eq!(log1, event_log!()); + let mut expected_node = Node::new_operator(OperatorType::ExclusiveChoice); + expected_node.add_child(Node::new_leaf(None)); + assert_eq!(node, expected_node); + }, + _ => assert!(false), + } + + } + + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs new file mode 100644 index 0000000..36e80df --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs @@ -0,0 +1,37 @@ +use std::mem::discriminant; +use crate::core::process_models::process_tree::Node; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// Represents the result of attempting to apply a fall-through rule. +/// +/// Each variant corresponds to a specific fall-through strategy and +/// contains the resulting [`Node`], i.e. Operator-type and children if any, together with the +/// event log(s) derived during its application. +/// +/// If no fall-through rule is applicable, the `Return` variant is used. +/// In this case, the original event log is returned unchanged. +/// +/// Not to be confused with [`FallThroughLabel`] +pub enum Fallthrough { + EmptyTraces(Node, EventLog), + ActivityOncePerTrace(Node, EventLog), + ActivityConcurrent(Node, EventLog, Split), + StrictTauLoop(Node, EventLog), + TauLoop(Node, EventLog), + FlowerModel(Node), + Return(EventLog), +} + +impl Fallthrough { + + pub fn same_enum_variant(&self, other: &Self) -> bool { + discriminant(self) == discriminant(other) + } +} + + + + + + diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs new file mode 100644 index 0000000..4567da5 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs @@ -0,0 +1,80 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::FlowerModel; +use crate::EventLog; + +/// This is the last resort of the fallthrough's of the inductive miner. +/// This FT should only be applied if the event log does not contain any empty trace +pub fn flower_model(log: EventLog, event_log_classifier: &EventLogClassifier) -> Fallthrough { + let dfg = discover_dfg_with_classifier(&log, event_log_classifier); + + // get all activities in the directly follows graph + let mut activities: Vec = dfg.activities.iter().map(|(a,_)| a.clone()).collect(); + + // sort activities to allow for a defined behavior or so + (&mut activities).sort(); + + // create a concurrency relation over all non-empty activities + let mut sub_tree = Node::new_operator(OperatorType::Concurrency); + + // add a leaf for each activity + for activity in activities { + sub_tree.add_child(Node::new_leaf(Some(activity))); + } + + // flower root + let mut flower_node_root = Node::new_operator(OperatorType::Loop); + // first child of flower model is a concurrency relation over all non-empty activities - do part + flower_node_root.add_child(sub_tree); + + // add silent transition as second child - redo part + flower_node_root.add_child(Node::new_leaf(None)); + + FlowerModel(flower_node_root) +} + + +mod test_flower_model { + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::flower_model::flower_model; + use crate::event_log; + + #[test] + fn test_basic_flower_model_leemans(){ + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let flower = flower_model(log, &EventLogClassifier::default()); + + // do part consist of all activities in a concurrency relation + let mut expected_sub_flower = Node::new_operator(OperatorType::Concurrency); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("a")))); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("b")))); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("c")))); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("d")))); + + // build expected flower model + let mut expected_flower = Node::new_operator(OperatorType::Loop); + expected_flower.add_child(expected_sub_flower); + + // the redo part is just a silent transition + expected_flower.add_child(Node::new_leaf(None)); + + + + if let Fallthrough::FlowerModel(flower) = flower { + assert_eq!(expected_flower, flower); + } else { + assert!(false); + } + + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs new file mode 100644 index 0000000..35cc815 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs @@ -0,0 +1,73 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_concurrent::activity_concurrent_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::activity_once_per_trace_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::empty_traces::empty_traces_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::flower_model::flower_model; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::strict_tau_loop::strict_tau_loop_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::tau_loop::tau_loop_wrapper; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters}; +use crate::EventLog; + +mod activity_concurrent; +pub mod fallthrough; +mod activity_once_per_trace; +mod empty_traces; +mod flower_model; +mod strict_tau_loop; +mod tau_loop; + + + +/// Applies the sequence of *fallthrough rules* used by the Inductive Miner to an event log. +/// +/// This function iteratively evaluates predefined fallthroughs in the following order: +/// - Empty Traces +/// - Activity Once Per Trace +/// - Activity Concurrent +/// - Strict Tau Loop +/// - Tau Loop +/// - Flower Model +/// +/// Whether a Fallthrough is applied at all, is controlled by the provided parameters. +/// Note, that the Flower Model is applied nevertheless. +/// +/// # Parameters +/// - log: The event log to which a Fallthrough rules are applied. +/// - event_log_classifier: classifier to identify activities in event log events +/// - parameters: the provided parameters +/// +/// # Returns +/// A `Fallthrough` value representing either: +/// - a discovered process model produced by a fallthrough, or +/// - the flower model if no fallthrough applies or fallthroughs are disabled. +pub fn apply_fallthrough( + mut log: EventLog, + event_log_classifier: &EventLogClassifier, + parameters: &Parameters, +) -> Fallthrough { + let funcs: Vec Fallthrough> = vec![ + empty_traces_wrapper, + activity_once_per_trace_wrapper, + activity_concurrent_wrapper, + strict_tau_loop_wrapper, + tau_loop_wrapper, + ]; + + // check if Fallthrough shall be applied by provided parameters + if parameters.contains(&Parameter::ApplyFallthrough){ + // iterate over all fall throughs + for apply_fallthrough in funcs { + let ft = apply_fallthrough(log, event_log_classifier, parameters); + if let Fallthrough::Return(returned_log) = ft { + log = returned_log; + continue; + } else { + return ft; + } + } + } // else the flower model is applied + + // last possible Option: Flower Model + flower_model(log, event_log_classifier) +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs new file mode 100644 index 0000000..7797d35 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs @@ -0,0 +1,239 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::Node; +use crate::core::process_models::process_tree::OperatorType::Loop; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{Return, StrictTauLoop}; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +fn split_log_according_to_strict_tau(log: EventLog, classifier: &EventLogClassifier) -> EventLog{ + let dfg = discover_dfg_with_classifier(&log, classifier); + let mut result_log = log.clone_without_traces(); + + + for trace in log.traces{ + let mut last_event_was_end = false; + let mut new_trace = trace.clone_without_events(); + + + for event in trace.events{ + let activity = classifier.get_class_identity(&event); + + // check condition + if last_event_was_end && dfg.start_activities.contains(&activity){ + // condition satisfied, the last activity was an end activity, this one is a start, + // we need to split the current trace at this point right now + let help_trace = new_trace.clone_without_events(); + result_log.traces.push(new_trace); + new_trace = help_trace; + } + + // push event to new_trace + new_trace.events.push(event); + + // if this activity is an end activity set the according flag + last_event_was_end = dfg.end_activities.contains(&activity); + + } + + // if the trace hasn't been pushed, we need to push it now -- this includes empty traces + result_log.traces.push(new_trace); + } + // we need to iterate through the entire log and split a trace if after an end activity an start activity appears + result_log +} + +/// +fn strict_tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough { + let k = log.traces.len(); + let log = split_log_according_to_strict_tau(log, classifier); + + if k < log.traces.len(){ + let mut node = Node::new_operator(Loop); + node.add_child(Node::new_leaf(None)); // temporary at index 0 + node.add_child(Node::new_leaf(None)); // redo part is silent + + + StrictTauLoop( + // first return a process node with the required structure + node, + // secondly return the new event log + log + ) + } else if k > log.traces.len(){ + panic!("Original log contains more traces, than the log split according to strict tau.") + }else { + // default return + Return(log) + } + +} + +/// Public wrapper for [`strict_tau_loop`]. +/// +/// This function simply forwards its arguments to +/// `strict_tau_loop` and exists for consistency +/// with other fall-through detection wrappers. +pub fn strict_tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _:&Parameters) -> Fallthrough { + strict_tau_loop(log, classifier) +} + + + +mod test_strict_tau_loop{ + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{Return, StrictTauLoop}; + use crate::{event_log, EventLog}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::strict_tau_loop::strict_tau_loop; + + + fn cmp_logs(log: Fallthrough, expected: EventLog){ + let classifier = EventLogClassifier::default(); + assert!(if let StrictTauLoop(_, log) = log { + log.traces.len() == expected.traces.len() && !log.traces.iter().zip(expected.traces.iter()).any(|(t0,t1)| + t0.events.len() != t1.events.len() || t0.events.iter().zip(t1.events.iter()).any(|(e0,e1)| { + classifier.get_class_identity(e0) != classifier.get_class_identity(e1) + }) + ) + } else { + false + }) + } + #[test] + fn test_split(){ + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let expected_log = event_log!( + ["a", "b", "c"], + ["d"], + ["d"], + ["a", "b"], + ["a", "d", "c"], + ["b", "c"], + ["d"] + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + + } + + #[test] + fn strict_tau_loop_simple_split() { + let log = event_log!( + ["a", "b", "c", "a", "c"], // contains c (end) followed by a (start) -> split + ); + + // Splitting at c|a -> two traces: "a b c" and "a d" + // L.len() = 1, L1.len() = 2 => strict tau-loop discovered + let expected_log = event_log!( + ["a", "b", "c"], + ["a", "c"], + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + } + + + #[test] + fn strict_tau_multiple_splits_in_trace() { + let log = event_log!( + // start set will contain "a" (first event of every trace if all traces start with a), + // end set will contain "c" (last events), + // here we have "... c a ... c a ..." -> two splits -> three traces after split + ["a", "b", "c", "a", "b", "c", "a", "b", "c"], + ); + + // Splits at each c|a produce three identical traces "a b c" + // L.len() = 1, L1.len() = 3 => tau-loop discovered + let expected_log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + + } + + #[test] + fn strict_tau_no_split() { + let log = event_log!( + ["a", "b", "c"], // starts with a, ends with c + ["d", "e"], // starts with d, ends with e + ["f", "g", "h"] // starts with f, ends with h + ); + + // start set = {a, d, f}, end set = {c, e, h} + // There is no occurrence inside any trace of (c|e|h) followed immediately by (a|d|f) + // => L1.len() == L.len() -> no tau-loop found + let expected_log = event_log!( + ["a", "b", "c"], + ["d", "e"], + ["f", "g", "h"] + ); + + if let Return(log) = strict_tau_loop(log, &EventLogClassifier::default()){ + assert_eq!(log, expected_log); + } + + } + + #[test] + fn strict_tau_start_end_overlap() { + let log = event_log!( + ["a", "b", "a", "c", "a"], // start set contains "a", end set contains "a" + ["c", "d"] // trivial trace starting and ending with a + ); + let expected_log = event_log!( + ["a", "b", "a"], // prefix up to first split + ["c", "a"], // remainder after that split + ["c", "d"], // original second trace unchanged + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + } + + #[test] + fn strict_tau_single_trace_to_many() { + let log = event_log!( + ["x", "a", "b", "a", "x", "y", "a"], // suppose start set includes x and end set includes a + ); + let expected_log = event_log!( + ["x", "a", "b", "a"], + ["x", "y", "a"], + ); + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + } + + + // 7) Edge case: traces of length 1 where start==end; adjacent repetition inside a longer trace causes multiple tiny splits + #[test] + fn strict_tau_length_one_traces_and_adjacent_repeats() { + let log = event_log!( + ["a"], // start/end = a + ["a", "a", "b", "a", "a"], // many a|a adjacencies + ); + + // start set = {a}, end set = {a, a} => {a} + // split at every a|a adjacency inside second trace -> many fragments + // One reasonable expected L1 (fragmenting around adjacent a's) could be: + let expected_log = event_log!( + ["a"], // first trace unchanged + ["a"], // fragment from leading 'a' in second trace + ["a", "b", "a"], // middle fragment + ["a"], // trailing fragment + ); + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs new file mode 100644 index 0000000..b98d033 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs @@ -0,0 +1,131 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::Node; +use crate::core::process_models::process_tree::OperatorType::Loop; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::Return; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +/// Splits the event log according to the semantics of the `tau_loop` fall-through. +/// +/// Each trace is split at every occurrence of a *start activity* +/// Whenever a start activity appears and the current subtrace is +/// non-empty, a new trace is created. +/// +/// Empty traces are not inserted into the resulting log +/// +/// # Returns +/// A new 'Eventlog' in which traces are split at occurrences of start activities. +/// The total number of traces may increase +/// if loop behavior is detected. + +fn split_log_according_to_tau_loop(log: EventLog, classifier: &EventLogClassifier) -> EventLog{ + // simply split a trace at the occurrence of any starting activity + let dfg = discover_dfg_with_classifier(&log, classifier); + let mut result_log = log.clone_without_traces(); + + + for trace in log.traces{ + let mut new_trace = trace.clone_without_events(); + + + for event in trace.events{ + let activity = classifier.get_class_identity(&event); + + + // check condition + if dfg.start_activities.contains(&activity) && !new_trace.events.is_empty(){ + // condition satisfied, this activity is a start activity + let help_trace = new_trace.clone_without_events(); + result_log.traces.push(new_trace); + new_trace = help_trace; + } + + new_trace.events.push(event); + } + + // if the trace hasn't been pushed, we need to push it now, but exclude empty traces + if !new_trace.events.is_empty(){ + result_log.traces.push(new_trace); + } + } + // we need to iterate through the entire log and split a trace if after an end activity an start activity appears + result_log +} + +/// Attempts to apply the 'tau_loop' Fallthrough by +fn tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough { + let k = log.traces.len(); + let log = split_log_according_to_tau_loop(log, classifier); + + if k < log.traces.len(){ + + let mut node = Node::new_operator(Loop); + node.add_child(Node::new_leaf(None)); // placeholder transition, will be replaced + node.add_child(Node::new_leaf(None)); // silent transition as redo part + Fallthrough::TauLoop( + // first return a process node with the required structure + node, + log + ) + } else if k > log.traces.len(){ + panic!("Original log contains more traces, than the log split according to strict tau.") + }else { + // default return + Return(log) + } + +} + +/// Public wrapper for [`tau_loop`]. +/// +/// This function simply forwards its arguments to +/// `tau_loop` and exists for consistency +/// with other fall-through detection wrappers. +pub fn tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _:&Parameters) -> Fallthrough { + tau_loop(log, classifier) +} + + + +mod test_tau_loop{ + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::TauLoop; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::tau_loop::tau_loop; + use crate::{event_log, EventLog}; + + fn equal_events(log: &EventLog, o_log: &EventLog, classifier: &EventLogClassifier) -> bool { + log.traces.len() == o_log.traces.len() && !log.traces.iter().zip(o_log.traces.iter()).any(|(t, o)| { + t.events.len() != o.events.len() || t.events.iter().zip(o.events.iter()).any(|(e0,e1)| { + classifier.get_class_identity(e0) != classifier.get_class_identity(e1) + }) + }) + } + #[test] + fn test_split(){ + let log = event_log!( + ["a", "b", "c", "d"], // here i removed the 'd' + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let expected_log = event_log!( + ["a"], + ["b", "c"], + ["d"], + ["d"], + ["a"], + ["b"], + ["a"], + ["d", "c"], + ["b", "c"], + ["d"] + ); + + let TauLoop(_node, log)= tau_loop(log, &EventLogClassifier::default()) else { return assert!(false);}; + + assert!(equal_events(&log, &expected_log, &EventLogClassifier::default())); + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs new file mode 100644 index 0000000..05a3e4f --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs @@ -0,0 +1,266 @@ +//! inductive miner discovery algorithm + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, ProcessTree}; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::base_cases::base_cases::{find_base_case, BaseCases}; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::find_cut; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::apply_fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::splits::perform_split; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters}; +use crate::EventLog; + +mod cut_finder; +mod structures; +mod splits; +mod fallthrough; +mod base_cases; + + +/// Mines a process tree from the given event log using the Inductive Miner +/// with default parameter settings. +/// +/// This function initializes the default mining parameters, recursively +/// builds the process tree, and applies post-processing (folding) +/// if configured in the parameters. +/// +/// # Parameters +/// - `log`: The event log to mine. +/// - `event_log_classifier`: Classifier used to determine activity identities. +/// +/// # Returns +/// The root `ProcessNode` of the discovered process tree. +pub fn inductive_miner_default_parameters(log: EventLog, event_log_classifier: &EventLogClassifier) -> ProcessTree { + // uses default parameters while for mining the process tree model + let parameters = Parameter::generate_default_parameters(); + let node = build_tree(log, event_log_classifier, ¶meters, 0); + // node.fold(); // as default parameters contain to fold the process tree + ProcessTree::new(node) +} + + +/// Converts a detected cut into a corresponding process tree node. +/// +/// The event log is split according to the cut ([`perform_split`]), and for each resulting +/// sub-log the Inductive Miner is recursively applied. The resulting +/// subtrees become the children of a new process node labeled with +/// the cut's operator. +/// +/// # Parameters +/// - `cut`: The detected cut. +/// - `event_log_classifier`: Activity classifier. +/// - `log`: The event log to split. +/// - `parameters`: Mining parameters. +/// - `depth`: Current recursion depth - debug reasons +/// +/// # Returns +/// A `ProcessNode` representing the cut and its recursively mined children. +fn convert_cut_to_process_node<'a>(cut: Cut<'a>, event_log_classifier: &EventLogClassifier, log: EventLog, parameters: &Parameters, depth: usize) -> Node { + // extract operator and split the original event log + let operator = cut.get_operator(); + let split = perform_split(&log, event_log_classifier, cut); + + // acquire ownership of the split vector + let split = split.get_own(); + + // create new node + let mut cut_node = Node::new_operator(operator); + + // this could be done in parallel + for log in split{ + cut_node.add_child(build_tree(log, &event_log_classifier, parameters,depth +1)); + } + + // return new process node + cut_node +} + +/// Applies fallthrough strategies ([`apply_fallthrough`]) if no valid cut can be found. +/// +/// +/// Fallthroughs ensure that a process tree can always be constructed, +/// even if the log does not yield a structured cut. Depending on the +/// detected pattern, additional recursive mining steps may be performed. +/// +/// # Parameters +/// - `log`: The event log. +/// - `event_log_classifier`: Activity classifier. +/// - `parameters`: Mining parameters. +/// - `depth`: Current recursion depth - debug reasons +/// +/// # Returns +/// A `ProcessNode` representing the fallthrough model. +fn fallthrough_finder(log: EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters, depth: usize) -> Node { + // default fallthrough + // We are getting a guaranteed fallthrough, default is flower model + match apply_fallthrough(log, event_log_classifier, parameters){ + #[allow(unused_mut)] + Fallthrough::EmptyTraces(mut node, log) | + Fallthrough::ActivityOncePerTrace(mut node, log) => { + node.add_child(build_tree(log, &event_log_classifier, parameters,depth+1)); + node + } + #[allow(unused_mut)] + Fallthrough::StrictTauLoop(mut node, log) | + Fallthrough::TauLoop(mut node, log) => { + if let Node::Operator(op) = &mut node{ + // replace the placeholder node at index 0 + op.children[0] = build_tree(log, event_log_classifier, parameters,depth+1); + } else { + panic!("TauLoop node is not an operator node.") + } + node + } + Fallthrough::ActivityConcurrent(mut node, filtered_out_log, split) => { + // the filtered out log are all the logs containing all traces and therefore all events where the chosen activity occurred + node.add_child(build_tree(filtered_out_log, event_log_classifier, parameters,depth+1)); + + // the split is already performed in the activity concurrent fall through to save one unnecessary find_cut iteration + let operator_type = split.get_operator().clone(); + let split = split.get_own(); + + let mut node = Node::new_operator(operator_type); + // this could be done in parallel + // every event log yields one process node + for log in split{ + // convert every log into one process node catching the behavior + node.add_child(build_tree(log, &event_log_classifier, parameters, depth+1)); + } + node + } + Fallthrough::FlowerModel(node) => { node} // not much to do, this is the default + Fallthrough::Return(_) => { // THis point should not be reached at all, as the flower model is the default + panic!("Fallthrough::Return in build tree function - must not happen"); + } + } + +} + +/// Core recursive function of the Inductive Miner. +/// +/// The algorithm proceeds as follows: +/// 1. Check for base cases (empty log or single activity): [`find_base_case`] +/// 2. If none apply, construct the directly-follows graph (DFG) [`DirectlyFollowsGraph::create_from_log`] +/// 3. Attempt to find a valid cut.#: [`find_cut`] +/// 4. If a cut is found, split the log and recurse on each sub-log: [`convert_cut_to_process_node`] +/// 5. Otherwise, apply a fallthrough strategy: [`fallthrough_finder`] +/// +/// # Parameters +/// - `log`: The event log to mine. +/// - `event_log_classifier`: Activity classifier. +/// - `parameters`: Mining parameters. +/// - `depth`: Current recursion depth. +/// +/// # Returns +/// The root `ProcessNode` of the mined (sub)tree. +pub fn build_tree(log: EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters, depth: usize) -> Node{ + match find_base_case(&log, event_log_classifier){ + BaseCases::None => { + let dfg = discover_dfg_with_classifier(&log, event_log_classifier); + let cut = find_cut(&dfg, &log, event_log_classifier, parameters); // find cut, if there is some + if cut.is_some(){ + convert_cut_to_process_node(cut.unwrap(), event_log_classifier, log, parameters, depth) + } else { + fallthrough_finder(log, event_log_classifier, parameters, depth) + } + } + BaseCases::Empty => { + Node::new_leaf(None) + } + BaseCases::SingleActivity(activity) => { + Node::new_leaf(Some(activity)) + } + } + +} + + +#[cfg(test)] +mod tests { + + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::{Node}; + use crate::core::process_models::process_tree::OperatorType::{ExclusiveChoice, Loop}; + use crate::discovery::case_centric::inductive_miner_app::{inductive_miner_default_parameters}; + use crate::event_log; + + #[test] + fn test_works_without_panic() { + let log = event_log!( + ["a", "b", "c", "d"], + ["a", "b", "c", "d", "e", "a", "b", "c", "d"], + ); + let event_log_classifier = EventLogClassifier::default(); + + let node = inductive_miner_default_parameters(log, &event_log_classifier); + assert!(node.is_valid()); + } + + #[test] + fn test_loop_over_same_activity(){ + let log = event_log!(["a", "a"]); + + + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + let mut expected = Node::new_operator(Loop); + expected.add_child(Node::new_leaf(Some(String::from("a")))); + expected.add_child(Node::new_leaf(None)); + + assert!(node.is_valid()); + assert_eq!(node.root, expected); + } + + #[test] + fn test_complex_log(){ + let log = event_log![ + ["a", "b", "d"], + ["a", "d", "b"], + ["a", "b", "c", "a", "b"], + ["a", "d", "c", "a", "d"], + ["a", "b", "d", "c", "a", "d", "b"], + ["a", "d", "b", "c", "a", "b", "d"], + ]; + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + assert!(node.is_valid()) + } + + + #[test] + fn test_loop_over_same_activity_with_empty_trace(){ + let log = event_log!( + [], + ["a", "a"], + ); + + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + let mut expected_sub = Node::new_operator(Loop); + expected_sub.add_child(Node::new_leaf(Some(String::from("a")))); + expected_sub.add_child(Node::new_leaf(None)); + + let mut expected = Node::new_operator(ExclusiveChoice); + expected.add_child(Node::new_leaf(None)); + expected.add_child(expected_sub); + + + assert!(node.is_valid()); + assert_eq!(node.root, expected); + } + + #[test] + fn test_empty_trace_plus_base_case(){ + let log = event_log!(["a"],[]); + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + let mut expected = Node::new_operator(ExclusiveChoice); + expected.add_child(Node::new_leaf(None)); + expected.add_child(Node::new_leaf(Some(String::from("a")))); + + assert!(node.is_valid()); + assert_eq!(node.root, expected); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs new file mode 100644 index 0000000..f42e35a --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs @@ -0,0 +1,126 @@ +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType::Concurrency; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// Splits an event log according to the partitions of an AND-Cut (concurrency cut). +/// +/// For each partition of the cut a new sub log is created, the traces belonging to these sublogs are retained events of the original trace, +/// those are filtered s.t. only events whose activity belongs to the partition are retained. +/// +/// The result is a vector of sub-logs, one per partition, that together form +/// the split required for recursive process tree discovery. +/// +/// # Returns +/// Some(split) if the cut struct is a valid and cut +/// None if the cut is not a valid and cut +/// +/// +/// # Notes +/// - event order within traces is preserved +/// - empty traces may occur if a trace contains no events from a partition +pub fn and_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { + + // only perform split if the cut is of the type concurrent + if cut.get_operator() != Concurrency{ + return None; + } + + // result vector containing sub logs + let mut result: Vec = Vec::new(); + // the found partitions of the cut + let partitions: Vec>> = cut.get_own(); + + for partition in partitions.into_iter(){ + let mut new_log = log.clone_without_traces(); + + for trace in & log.traces{ + let mut new_trace = trace.clone_without_events(); + + for event in trace.events.iter(){ + let activity = activity_classifier.get_class_identity(event); + if partition.contains(activity.as_str()){ + new_trace.events.push(event.clone()); + } + } + new_log.traces.push(new_trace); + } + + result.push(new_log); + } + Some(Split::new(Concurrency, result)) +} + + +#[allow(unused_imports)] +mod test_and_split{ + use crate::core::chrono::Utc; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::concurrent_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split; + use crate::{event_log, EventLog}; + + #[test] + fn test_simple_and_cut_and_split(){ + let time = Utc::now(); // need same timestamp attributes + let test_log = event_log!( + ["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}, "C"; {"time:timestamp" => time.clone()}], + ["A"; {"time:timestamp" => time.clone()}, "C"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], + ["C"; {"time:timestamp" => time.clone()}, "A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], + ); + + let dfg = DirectlyFollowsGraph::discover(&test_log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + println!("{:?}", cut); + let split = and_split(&test_log,&EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap().get_own(); + println!("{}", split.len()); + + let log1 = event_log!(["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], ["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], ["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}]); + let log2 = event_log!(["C"; {"time:timestamp" => time.clone()}], ["C"; {"time:timestamp" => time.clone()}], ["C"; {"time:timestamp" => time.clone()}]); + + let mut b1 = false; + let mut b2 = false; + + + for log in split{ + if log == log1 && !b1{ + b1 = true; + } else if log == log2 && !b2{ + b2 = true; + } else { + assert!(false); + } + } + } + + #[test] + fn test(){ + let test_log = event_log!([], ["A", "B"], ["B", "A"]); + let dfg = DirectlyFollowsGraph::discover(&test_log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + let split = and_split(&test_log,&EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap().get_own(); + + + for log in split.into_iter().enumerate(){ + println!("Log: {}", log.0); + for t in log.1.traces.into_iter().enumerate(){ + println!("trace{}", t.0); + for e in t.1.events.into_iter().enumerate(){ + println!(" {}", e.0); + } + + } + } + + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusice_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusice_choice.rs new file mode 100644 index 0000000..e0703db --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusice_choice.rs @@ -0,0 +1,218 @@ +use std::collections::HashMap; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// This implementation follows the xor-split algorithm as implemented in +/// the ProM framework (`InductiveMiner`), originally written in Java. +/// +/// Reference: +/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +/// Application of Concurrency to System Design (ACSD), 2013. +/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +/// University of Technology, 09.05.2017 +/// - ProM source code: +/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterXorFiltering.java + + + +/// This functions splits an event log according to a provided valid xor cut. +/// +/// # Parameters +/// - 'log': the event log to split +/// - 'activity_classifier': the classifier to identify the activities in the events +/// - 'cut': the previously found sequence cut (check the operator) +/// +/// # Returns +/// - Some(Split) containing as many logs as the number of partitions in the split. +/// - None if the cut was not a sequence cut nor valid +pub fn xor_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { + if cut.get_operator() != ExclusiveChoice || cut.is_empty() { + // if this is not the demanded operator, return none + return None; + } + let k = cut.len(); + + // get partitions from cut + let partition = cut.get_own(); + + // According to the pseudocode in "Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven + // University of Technology, 09.05.2017" the algorithm splits the log into several sublogs, by only adding the trace t_i to the sublog L_i + // if the partition p_i contains all events of t_i + + // assume a cut / partitions like {{A,B}, {C}, {D},{E}} + + // assign every activity an index for faster access later - activites in the same partition get the same index + // if the assumed cut is used, you would get a map like + //{ #activity -> index + // A -> 0, + // B -> 0, + // C -> 1, + // D -> 2, + // E -> 2 + // } + // + let mut activity_partition_idx_map = HashMap::new(); + for (idx, activity_set) in partition.iter().enumerate() { + for act in activity_set{ + // every unique activity gets another index + activity_partition_idx_map.insert(act.clone(), idx); + } + } + + // produce result vector with k empty logs + let mut result: Vec = (0..k).map(|_| EventLog::new()).collect(); + + // iterate over every tracce, for the example assume a trace [A,A, B, A, B,B] + for trace in log.traces.iter(){ + let mut counts = vec![0usize; k]; + + //count incidents of activities within a partition of the trace + // for the example trace above we would get a counts-vec : [6,0,0] as all events occur in the + // very first partition, the latter partitions contain no activity which occurs here + for event in trace.events.iter(){ + let activity = activity_classifier.get_class_identity(event); + if let Some(idx) = activity_partition_idx_map.get(activity.as_str()){ + if *idx >= counts.len(){ + eprintln!("Length matches exactly index! index: {}, counts: {:?}\n activity: {}\n map{:?} ", *idx,counts, activity, activity_partition_idx_map); + } + counts[*idx] += 1; + } + } + + // get the partition, which contains the maximum occurrences in count + // for the example it is the partition at index 0 in count as 6 > 0 + let max = if trace.events.is_empty(){ + None + } else { + let mut max_idx = 0; // index of activity having most incidents + let mut max_val = 0; // actual activity with most incidents + + for (i, count) in counts.iter().enumerate(){ + // a tie within the same trace should not occur, because this is a xor cut (maybe in noisy loops??) + if *count > max_val{ + max_val = *count; + max_idx =i; + } + } + + Some(max_idx) + }; + + // build new sublog - iterate over all indexes, to keep empty traces in every possible sublog, if there is one + for sublog_idx in 0..k{ // iterate over partition size + + // only do this + if let Some(winning_partition) = max { + if winning_partition != sublog_idx { + // remove trace from this sublog + continue; + }// else we got the index of the activity within the trace which appears mostly + } // else trace is empty (max == None) + + let mut new_trace = trace.clone(); // clone current trace + // Filter events: keep only those + new_trace.events.retain(|e| { + // keep only the events of the trace, which appear in the winning partition + if let Some(act_idx) = activity_partition_idx_map.get(activity_classifier.get_class_identity(e).as_str()){ + sublog_idx == *act_idx + } else { + false + } + }); + + // push new trace to trace vec + result[sublog_idx].traces.push(new_trace); + } + } + Some(Split::new(ExclusiveChoice, result)) +} + +mod tests_xor_split{ + use std::collections::HashSet; + use crate::core::chrono::Utc; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::exclusice_choice::xor_split; + use crate::event_log; + + #[test] + fn test_basic(){ + let log = event_log!( + ["A", "A", "B", "e"], + ["C", "D"] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + + assert!(cut.is_some()); + let cut = cut.unwrap(); + + let x = xor_split(&log, &EventLogClassifier::default(), cut); + assert!(x.is_some()); + let x = x.unwrap(); + assert_eq!(x.len(), 2); + } + + #[test] + fn test_only_empty_traces_and_cut(){ + let log = event_log!( + [], + [] + ); + + let mut cut = Vec::new(); + cut.push(HashSet::new()); + cut.push(HashSet::new()); + cut.push(HashSet::new()); + let cut = Cut::new(ExclusiveChoice, cut); + let x = xor_split(&log, &EventLogClassifier::default(), cut); + assert!(x.is_some()); + let x = x.unwrap().get_own(); + assert_eq!(x.len(), 3); // exactly 3 sublogs + for log in x{ + // each sublog has exactly 2 empty logs + assert_eq!(log.traces.len(), 2); + for trace in log.traces{ + assert!(trace.events.is_empty()) + } + } + } + + #[test] + fn test_leeman_example(){ + let time = Utc::now(); + let log = event_log!( + ["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}], + ["C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + assert!(cut.is_some()); + let cut = cut.unwrap(); + let x = xor_split(&log, &EventLogClassifier::default(), cut); + assert!(x.is_some()); + let x = x.unwrap().get_own(); + assert_eq!(x.len(), 2); + for log in x{ + if log.traces.len() == 1{ + if log.traces[0].events.len() == 2{ + assert_eq!(log, event_log!(["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}] {"concept:name" => 0},)); + } else { + assert_eq!(log, event_log!(["C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}]{"concept:name" => 1})); + } + } else { + // if there is not exactly one trace per log, sth is really wrong + assert!(false); + } + } + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs new file mode 100644 index 0000000..e97ebc5 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs @@ -0,0 +1,80 @@ +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split; +use crate::discovery::case_centric::inductive_miner_app::splits::exclusice_choice::xor_split; +use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split; +use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +mod concurrency; +mod sequence; +mod exclusice_choice; +mod redo_loop; +pub mod split; + + +/// A wrapper for the actual split function. +/// +/// This function simply forwards its arguments to [`splitting`]. +/// +/// # Panic +/// This function panics if the provided cut somehow could not be handled by the splitting algorithm, +/// this should only be the case iff the operator of the cut finds no split operator. +pub fn perform_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Split{ + if let Some(split) = splitting(log, classifier, cut) { + split + } else { + panic!("No split function found for the cut operator.") + } + +} + + +/// Core Split function matching the cut operator to the matching split function. +/// +/// [`xor_split`] +/// +/// [`sequence_split`] +/// +/// [`and_split`] +/// +/// [`loop_split`] +fn splitting<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Option{ + // match the operator and perform the matching split + match cut.get_operator() { + OperatorType::ExclusiveChoice => { + xor_split(log, classifier, cut) + } + OperatorType::Sequence => { + sequence_split(log, classifier, cut) + } + OperatorType::Concurrency => { + and_split(log, classifier, cut) + } + OperatorType::Loop => { + loop_split(log, classifier, cut) + } + } +} + + +mod test_splits{ + use std::collections::HashSet; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; + use crate::event_log; + + #[test] + fn test_sequence_split() { + let log = event_log!(["a", "b", "c", "d"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let split = sequence_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs new file mode 100644 index 0000000..42f1471 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs @@ -0,0 +1,208 @@ +use std::collections::HashMap; +use crate::EventLog; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType::Loop; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +/// Splits an event log according to the partition of a Loop-cut. +/// +/// Recall that a loop cut identifies a structure consisting of a main body (do-part) and at least one redo part. +/// The partitions of the cut represent activity sets that belong to different segments of the loop structure. +/// The first partition belongs to the do segment. +/// +/// Creates one sub log for each partition in the cut +/// Iterates over every trace, grouping activities to the same sub trace as long as they belong to the same partition. +/// If a partition changes the current sub trace is finalized and added to the sub log +/// +/// # Returns +/// Some(split) containing filtered traces +/// None if the cut is not a valid loop cut +/// +/// # Notes +/// - number of traces in each sublog may differ +/// - event order is preserved +/// - activities not encountered in any partition are being ignored + +pub fn loop_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { + if Loop != cut.get_operator() { + return None; + } + // Prologue - preparations + let k = cut.len(); + let mut result: Vec = Vec::with_capacity(k); + + // Create empty sublogs + for _ in 0..k { + result.push(log.clone_without_traces()); + } + // get partitions + let partitions = cut.get_own(); + + // Pre-map activities to partition index for fast lookup - just transfer activity to index of set + let mut activity_to_log_map = HashMap::new(); + for (i, part) in partitions.iter().enumerate() { + // at least two partitions, if more loops there can be more + for a in part { + activity_to_log_map.insert(a.clone(), i); + } + } + + // iterate over each trace of the original log + for trace in &log.traces { + //each sublogs gets one clean trace + let mut sub_trace = trace.clone_without_events(); + + let mut last_partition: Option = None; // init to None to signal the start of a new trace + + for event in &trace.events { + let activity = classifier.get_class_identity(event); + + // get the log index / the index of the partition the activity is part of (exactly one partition) + let Some(log_index) = activity_to_log_map.get(activity.as_str()) else { + eprintln!("Encountered unexpeceted activity {} in loop splitter using the following cut {:?}: on event log.", activity, partitions); + // if the activity is not in the block, this means that it's not part of the loop - it shouldn't be in here + continue; + }; + + if last_partition.is_some() && last_partition.unwrap() != *log_index { + // if the last partition is not the same as in the block index of the current activity, + // we need to create a new sub_trace and push the last one to the existing ones + + // as last_partition is some, we can just push the trace to the result log index at last partiton + result[last_partition.unwrap()].traces.push(sub_trace); + sub_trace = trace.clone_without_events(); + } + // At the current state, the event belongs to the subtrace of the log_index which + + // push current activity to sub_trace of block_index sublog + sub_trace.events.push(event.clone()); + // update the last partition + last_partition = Some(*log_index); + } + // at this point we have a sub_trace which is empty or contains at least one element, + // if the last_partition variable is set, there is at least one element in the log + if last_partition.is_some() { + result[last_partition.unwrap()].traces.push(sub_trace); + } else { + // trace is empty, nothing to do + } + } + + Some(Split::new(Loop, result)) +} + +#[allow(unused_imports)] +mod test_loop_split { + use crate::core::chrono::Utc; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::loop_cut::redo_loop_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split; + use crate::{event, event_log, trace}; + use crate::EventLog; + + fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: EventLogClassifier) -> bool { + if log.traces.len() == o_log.traces.len() { + for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) { + if t0.events.len() == t1.events.len() { + for (e0,e1) in t0.events.iter().zip(t1.events.iter()) { + let a0 = event_log_classifier.get_class_identity(e0); + let a1 = event_log_classifier.get_class_identity(e1); + if a0 != a1 { + println!("Two activities did not match{:?}", (a0, a1)); + + return false; + } + } + } + } + return true; + } + false + } + #[test] + fn test_loop_split_leemans_example() { + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "b", "c", "a", "b", "c", "a", "b"] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut_wrapper(&dfg); + assert!(cut.is_some()); + let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap(); + assert_eq!(split.len(), 2); + + // created expected event logs + let do_log = event_log!( + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "b"] + ); + + let redo_log = event_log!(["c"], ["c"], ["c"]); + + for log in split.get_own() { + if log.traces.len() == 6 { + // expected length of 6 + assert!(events_equal(&log, &do_log, EventLogClassifier::default())); + } else if log.traces.len() == 3 { + // expected length of 3 + assert!(events_equal(&log, &redo_log, EventLogClassifier::default())); + } else { + assert!(false); + } + } + } + + #[test] + fn test_more_complex_loop() { + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "d", "b"], + ["a", "d", "b", "c", "a", "d", "b"], + ["a", "d", "b", "c", "a", "b"] + ); + + let do_log = event_log!( + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "d", "b"], + ["a", "d", "b"], + ["a", "d", "b"], + ["a", "d", "b"], + ["a", "b"] + ); + + let redo_log = event_log!(["c"], ["c"], ["c"]); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut_wrapper(&dfg); + assert!(cut.is_some()); + let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap(); + assert_eq!(split.len(), 2); + + for log in split.get_own() { + println!("{:#?}", log); + if log.traces.len() == do_log.traces.len() { + // expected length of 6 + assert!(events_equal(&log, &do_log, EventLogClassifier::default())); + } else if log.traces.len() == redo_log.traces.len() { + // expected length of 3 + assert!(events_equal(&log, &redo_log, EventLogClassifier::default())); + } else { + assert!(false); + } + } + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs new file mode 100644 index 0000000..4cbc4db --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs @@ -0,0 +1,233 @@ +/// This implementation follows the sequence-split algorithm as implemented in +/// the ProM framework (`InductiveMiner`), originally written in Java. +/// +/// Reference: +/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +/// Application of Concurrency to System Design (ACSD), 2013. +/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +/// University of Technology, 09.05.2017 +/// - ProM source code: +/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterSequenceFiltering.javang +use std::borrow::Cow; +use std::collections::HashSet; +use std::ops::Deref; +use crate::core::event_data::case_centric::{EventLogClassifier, Trace}; +use crate::core::process_models::process_tree::OperatorType::Sequence; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// Finds the postion inside a trace for a split, that most strongly matches a given activity partition. +fn find_optimal_split( + trace: &Trace, + partition: &HashSet>, + start_pos: usize, + ignore: &HashSet, + classifier: &EventLogClassifier, +) -> usize { + let mut position_least_cost = start_pos; // default + let mut least_cost = 0; + let mut cost: i32 = 0; + let mut position = start_pos; + + // iterate through events of trace from start position to end + while position < trace.events.len() { + // get string activity attribute + let activity = classifier.get_class_identity(&trace.events[position]); + + if ignore.contains(&activity) { + // skip: contributes nothing to cost + } else if partition.contains(activity.as_str()) { + // decrease cost + cost -= 1; + } else { + cost += 1; + } + + position += 1; + + if cost < least_cost { + least_cost = cost; + position_least_cost = position; + } + } + position_least_cost +} +/// Splits an event log according to the partitions of a sequence cut. +/// +/// # Returns +/// - Some(Split) containing as many logs as the number of partitions in the split. +/// - None if the cut was not a sequence cut nor valid +pub fn sequence_split<'a>( + log: &EventLog, + activity_classifier: &EventLogClassifier, + cut: Cut<'a>, +) -> Option { + + if cut.get_operator() != Sequence{ + return None; + } + // create results vec with empty event logs + let k = cut.len(); + let mut result: Vec = Vec::with_capacity(k); + + for _ in 0..k { + // clone log structure - safe attributes of log and traces, but without events + let mut sub_log = log.clone_without_traces(); + for trace in &log.traces { + sub_log.traces.push(trace.clone_without_events()); + } + result.push(sub_log); + } + + // get partitions + let partitions = cut.get_own(); + for (trace_idx, trace) in log.traces.iter().enumerate() { + let mut curr_position = 0; + let mut ignore: HashSet = HashSet::new(); + + for (partition_idx, partition) in partitions.iter().enumerate() { + let new_postion = if partition_idx + 1 < k { + find_optimal_split( + trace, + partition, + curr_position, + &ignore, + activity_classifier, + ) + } else { + // only last partition gets here, it must finish the trace + trace.events.len() + }; + + // for positions in range [curr_postion, new_position) copy events that belong to the partition + + if new_postion > curr_position { + // destination trace in result[i] for trace_idx + let dest_trace = &mut result[partition_idx].traces[trace_idx]; + + for pos in curr_position..new_postion { + // get trace and retrieve activity + let event = &trace.events[pos]; + let activity = activity_classifier.get_class_identity(event); + + if partition.contains(activity.as_str()) { + dest_trace.events.push(event.clone()); + } + } + } + + // add events from current partition to ignore set + for act in partition { + ignore.insert(act.deref().to_string()); + } + + // update position + curr_position = new_postion; + } + } + + Some(Split::new(Sequence, result)) +} + +#[allow(unused_imports)] +mod test_sequence_split { + use std::collections::HashSet; + use crate::core::chrono::Utc; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; + use crate::{event, event_log}; + + #[test] + fn test_sequence_split() { + let time = Utc::now(); + let log = event_log!( + ["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}], + ["b"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let cut = cut.unwrap(); + + println!("Cut: {:?}", cut); + let split = sequence_split(&log, &EventLogClassifier::default(), cut); + assert!(split.is_some()); + + let split = split.unwrap().get_own(); + // to the actual split + let log1 = event_log!(["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}], ["b"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}]); + let log2 = event_log!(["c"; {"time:timestamp" => time.clone()}], ["c"; {"time:timestamp" => time.clone()}]); + + let mut b1 = false; + let mut b2 = false; + for log in split { + // make certain every log is only compared one time, as we don't know the order + if log == log1 && !b1 { + b1 = true; + } else if log == log2 && !b2 { + b2 = true; + } else { + // no matching log or multiple matchings -> immediately false + assert!(false); + } + } + } + + #[test] + fn test_sequence_split2() { + // this log contains a sequence cut, as b or c never reach an "a" + let time = Utc::now(); + let log = event_log!( + ["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}], + ["a"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}] + ); + // we cut this log and sepreate the "a"s from "b's" and "c's" + // after definition the resulting sublogs contain only those elements which are also in the partition + // create expected logs + let log0 = event_log!(["b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}], ["c"; {"time:timestamp" => time.clone()}]); + let log1 = event_log!(["a"; {"time:timestamp" => time.clone()}], ["a"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}]); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let cut = cut.unwrap(); + let split = sequence_split(&log, &EventLogClassifier::default(), cut); + assert!(split.is_some()); + let split = split.unwrap().get_own(); + + assert_eq!(split.len(), 2); + + // check that both resulting logs match the expected sequence of activities + let mut b0 = false; + let mut b1 = false; + for log in split { + if log == log0 && !b0 { + b0 = true; + } else if log == log1 && !b1 { + b1 = true; + } + } + assert!(b1); + assert!(b0); + } + + + #[test] + fn test_sequence_split3() { + let log = event_log!(["a", "b", "c", "d"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let split = sequence_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + println!("{:?}", split.unwrap().get_own()); + + } + + +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs new file mode 100644 index 0000000..5b8d015 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs @@ -0,0 +1,35 @@ +use crate::core::process_models::process_tree::OperatorType; +use crate::EventLog; + + +/// Helper struct to aggregate the returns of splitting algorithms. +/// +/// # Parameters +/// - 'operator' : ['ImOperator'] defining the split type +/// - 'sub_logs': a vector containing all new logs +pub struct Split{ + operator: OperatorType, + sub_logs: Vec, +} + +impl Split{ + pub fn new(operator: OperatorType, sub_logs: Vec) -> Split{ + Self{operator, sub_logs} + } + + pub fn len(&self) -> usize { + self.sub_logs.len() + } + + pub fn get_own(self) -> Vec{ + self.sub_logs + } + + pub fn get_operator(&self) -> OperatorType { + self.operator + } + + pub fn is_empty(&self) -> bool{ + self.sub_logs.is_empty() + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs new file mode 100644 index 0000000..39c34b3 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs @@ -0,0 +1,200 @@ +/// This implementation is inspired by the component structure in +/// the ProM framework (`InductiveMiner`), originally written in Java. +/// +/// - ProM source code: +/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/helperclasses/graphs/IntComponents.java + + +use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; + +#[derive(Debug)] +pub struct Components<'a> { + components: Vec, // component index of each node, get node index from map + node2index: HashMap, usize>, // index of every node in components + number_of_components: usize, +} + + +impl<'a> Components<'a> { + pub fn new(nodes: &[Cow<'a, str>]) -> Self { + let mut node2index = HashMap::new(); + // every node gets it own index in the beginning + for (i, n) in nodes.iter().enumerate() { + // clone is very cheap if cow is borrowed + node2index.insert(n.clone(), i); + } + + let len = nodes.len(); + Components { + components: (0..len).collect(), + node2index, + number_of_components: len, + } + } + + pub fn from(partitions: &Vec>>) -> Self { + let mut node2index = HashMap::new(); + let mut node_number: usize = 0; + + for part in partitions.iter() { + for act in part.iter() { + node2index.insert(act.clone(), node_number); + node_number += 1; + } + } + + let mut components = vec![0;node_number]; + + + let mut node_number: usize = 0; + for (component_number, part) in partitions.iter().enumerate() { + for _ in part.iter(){ + components[node_number] = component_number; + node_number += 1; + } + } + + Self{components, node2index, number_of_components: partitions.len()} + + } + + + pub fn component_of(&self, node: &str) -> usize { + self.components[self.node2index[node]] + } + + pub fn same_component(&self, a: &str, b: &str) -> bool { + self.component_of(a) == self.component_of(b) + } + + pub fn merge_components_of(&mut self, a: &str, b: &str) { + let ca = self.component_of(a); + let cb = self.component_of(b); + self.merge_components(ca, cb); + } + + pub fn merge_components(&mut self, ca: usize, cb: usize) { + if ca == cb { + return; + } + let mut changed = false; + for comp in self.components.iter_mut() { + if *comp == ca { + *comp = cb; + changed = true; + } + } + if changed { + self.number_of_components -= 1; + } + } + + pub fn get_components(&self) -> Vec>> { + let mut result: Vec>> = Vec::new(); + let mut map: HashMap = HashMap::new(); + let mut next_idx = 0; + + // assign normalized indexes + for comp in &self.components { + if !map.contains_key(comp) { + map.insert(*comp, next_idx); + result.push(HashSet::new()); + next_idx += 1; + } + } + + // fill components + for (node, idx) in &self.node2index { + let comp = self.components[*idx]; + let part = map[&comp]; + result[part].insert(node.clone()); + } + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashSet; + + #[test] + fn test_initial_components() { + let nodes = vec!["A".into(), "B".into(), "C".into()]; + let c = Components::new(&nodes); + + assert_eq!(c.number_of_components, 3); + assert!(!c.same_component("A", "B")); + assert!(!c.same_component("B", "C")); + assert!(!c.same_component("A", "C")); + } + + #[test] + fn test_simple_merge() { + let nodes = vec!["A".into(), "B".into(), "C".into()]; + let mut c = Components::new(&nodes); + + c.merge_components_of("A", "B"); + + assert!(c.same_component("A", "B")); + assert!(!c.same_component("A", "C")); + + assert_eq!(c.number_of_components, 2); + } + + #[test] + fn test_chain_merge() { + let nodes = vec!["A".into(), "B".into(), "C".into(), "D".into()]; + let mut c = Components::new(&nodes); + + c.merge_components_of("A", "B"); + c.merge_components_of("B", "C"); + + // All A,B,C should be in the same component + assert!(c.same_component("A", "C")); + assert!(c.same_component("A", "B")); + assert!(c.same_component("B", "C")); + + // D remains separate + assert!(!c.same_component("A", "D")); + + assert_eq!(c.number_of_components, 2); + } + + #[test] + fn test_merge_same_component_does_not_decrease_count() { + let nodes = vec!["A".into(), "B".into()]; + let mut c = Components::new(&nodes); + + c.merge_components_of("A", "B"); + assert_eq!(c.number_of_components, 1); + + // merging again should not decrease further + c.merge_components_of("A", "B"); + assert_eq!(c.number_of_components, 1); + } + + #[test] + fn test_get_components() { + let nodes = vec!["A".into(), "B".into(), "C".into(), "D".into()]; + let mut c = Components::new(&nodes); + + c.merge_components_of("A", "B"); + c.merge_components_of("C", "D"); + + let comps = c.get_components(); + + // each component should have 2 elements + let mut sets: Vec>> = comps.into_iter().collect(); + sets.sort_by_key(|s| s.len()); + + assert_eq!(sets.len(), 2); + + let first = &sets[0]; + let second = &sets[1]; + + assert!(first.contains("A") && first.contains("B") || first.contains("C") && first.contains("D")); + assert!(second.contains("A") && second.contains("B") || second.contains("C") && second.contains("D")); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs new file mode 100644 index 0000000..fb94d66 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs @@ -0,0 +1,272 @@ +use crate::{event_log, trace, EventLog}; +use std::collections::{HashMap, HashSet}; +use crate::core::event_data::case_centric::{EventLogClassifier, Trace}; +use crate::core::process_models::dfg::Activity; + +type Index = usize; +type MinDist = usize; +type InterveningSet = HashSet; + +/// Stores for every activity, its minimum self-distance and the set of activities occurring +/// between two minium-distance instances of that activity. +pub struct MinimumSelfDistance { + minimum_distance_relation: HashMap)>, +} + +impl MinimumSelfDistance { + + /// Constructs the new minimum self-distance relation from a given log and classifier. + pub fn new( + log: &EventLog, event_log_classifier: &EventLogClassifier) -> MinimumSelfDistance { + Self{minimum_distance_relation: Self::minimum_distances_interleave(log, event_log_classifier)} + } + + + /// Returns the minimum self-distance for a given activity and the set of activities occurring + /// between two minimum-distance instances of that activity. + pub fn get_minimum_distance(&self, activity: &str) -> Option<&(MinDist, HashSet)> { + self.minimum_distance_relation.get(activity) + } + + /// Computes minimum self-distances for all activities within a single trace. + /// + /// For each activity, the minimum number of events between two consecutive + /// executions is determined, together with the set of intervening activities + /// observed at that minimum distance. + + fn extract_interleaving_activities( + start: Index, + end: Index, + trace: &Trace, + event_log_classifier: &EventLogClassifier, + ) -> HashSet { + let mut interleaving_activities = HashSet::new(); + for i in start + 1..end { + if let Some(event) = trace.events.get(i) { + interleaving_activities.insert(event_log_classifier.get_class_identity(event)); + } + } + + interleaving_activities + } + + /// Two activities 'a' and 'b' are in a minimum distance relation iff 'b' appears between two + /// minimum distance executions of a. + /// This function evaluates the minimum distance between two executions of an activity and + /// count the appearing activities. + /// + /// + /// This function calculates the minimum distance relation of every activity. + fn minimum_distances_trace( + trace: &Trace, + event_log_classifier: &EventLogClassifier, + ) -> HashMap { + let mut last_seen: HashMap = HashMap::new(); + let mut results: HashMap = HashMap::new(); + for (index, event) in trace.events.iter().enumerate() { + let activity = event_log_classifier.get_class_identity(event); + if let Some(last_index) = last_seen.get(&activity) { + // calculate distance between the two indexes + let dist = index - *last_index - 1; + if let Some((prev_dist, acts)) = results.get_mut(&activity) { + if *prev_dist > dist { + // previous distance is smaller than the current, so it can't be minimum + *prev_dist = dist; + *acts = Self::extract_interleaving_activities( + *last_index, + index, + trace, + event_log_classifier, + ); + } else if *prev_dist == dist { + acts.extend(Self::extract_interleaving_activities( + *last_index, + index, + trace, + event_log_classifier, + )); + } + // skip, the distance is greater than the one we got previously + } else { + // the first time we found a loop + results.insert( + activity.clone(), // clone as we need to update activity later + ( + dist, + Self::extract_interleaving_activities( + *last_index, + index, + trace, + event_log_classifier, + ), + ), + ); + } + + + } + // update the last seen index of this activity + last_seen.insert(activity, index); + + } + results + } + + /// Aggregates minimum self-distance information over all traces in the log. + /// + /// For each activity, the globally smallest self-distance is retained and + /// the intervening activity sets for equal minimum distances are merged. + fn minimum_distances_interleave(log: &EventLog, event_log_classifier: &EventLogClassifier) -> HashMap { + let mut results: HashMap = HashMap::new(); + + // Go through every trace + for trace in log.traces.iter(){ + for (activity, (dist, interleaving_acts)) in Self::minimum_distances_trace(trace, event_log_classifier) { + if let Some(( min_dist, interleaving_set)) = results.get_mut(&activity) { + if *min_dist > dist{ + *min_dist = dist; + *interleaving_set = interleaving_acts; + } else if *min_dist == dist { + interleaving_set.extend(interleaving_acts); + } else { + // skip if the new distance is greater tan the already saved distance + } + } else { + results.insert(activity, (dist, interleaving_acts)); + } + } + } + results + } +} + + +#[test] +fn test_extract_interleaving_activities() { + let t = trace!("a", "b", "c", "d", "e", "f"); + let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); + assert_eq!(s, HashSet::from(["b".into(), "c".into(), "d".into(), "e".into(), "f".into()])); +} +#[test] +fn test_extract_from_empty_trace() { + let t = trace!(); + let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); + assert!(s.is_empty());} + +// ------------ Tests using binary events +#[test] +fn test_one_loop_distance() { + let t = trace!("a", "b", "a"); + + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert!(r.get("a").unwrap().1.contains("b")); +} + +#[test] +fn test_loop_zero_distance(){ + let t = trace!("a","a"); + + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + assert!(r.get("a").unwrap().1.is_empty()); +} + +#[test] +fn test_retrieve_smaller_later_loop(){ + let t = trace!("a", "b", "b", "a", "b", "b", "b", "a", "b", "a"); + + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert!(r.get("a").unwrap().1.contains("b")); + + // trivial, b should have 0 minimum self distance in this example + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 0); + assert!(r.get("b").unwrap().1.is_empty()); +} + + +// -------------------------------- Test using more than two different activities + +#[test] +fn test_complex_trace(){ + let t = trace!("a", "b", "d", "e", "a", "d", "g", "g", "d","b", "f", "a", "c"); + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + + // check if loops are contained + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 3); + assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "d".into(), "e".into()])); + + + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 7); + assert_eq!(r.get("b").unwrap().1, HashSet::from(["a".into(), "e".into(), "d".into(), "g".into()])); + + assert!(!r.contains_key("c")); + + // special case, because there are two loops with same minimum distance two + assert!(r.contains_key("d")); + assert_eq!(r.get("d").unwrap().0, 2); + // merged activities + assert_eq!(r.get("d").unwrap().1, HashSet::from(["e".into(), "a".into(), "g".into()])); + + + // not appearing twice + assert!(!r.contains_key("e")); + assert!(!r.contains_key("f")); + + // only one trace where g follows after g + assert!(r.contains_key("g")); + assert_eq!(r.get("g").unwrap().0, 0); + assert!(r.get("g").unwrap().1.is_empty()); +} + + +#[test] +fn test_empty_log(){ + let log = event_log!(); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.is_empty()); +} + +#[test] +fn test_zero_loops_log(){ + let log = event_log!(["a", "a"], ["b", "b"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 0); +} + +#[test] +fn test_find_smaller_loop(){ + let log = event_log!(["a", "a"], ["a", "b", "a"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + + assert!(!r.contains_key("b")); +} + +#[test] +fn test_merge_relations(){ + let log = event_log!(["a", "c", "a"], ["a", "b", "a"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "c".into()])); +} + + + diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs new file mode 100644 index 0000000..0a3ede3 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs @@ -0,0 +1,3 @@ +pub mod parameter; +pub mod components; +pub mod minimum_self_distance; \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs new file mode 100644 index 0000000..8c4f135 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs @@ -0,0 +1,32 @@ +use std::collections::HashSet; + +/// A helper type aggregating parameters which user maybe want the inductive miner to adhere. +/// The Hashset is used, so that every parameter is unique +pub type Parameters = HashSet; + + + +/// Helper enum to express which option shall be activated in the inductive miner +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Parameter{ + StrictSequenceCut, // apply strict sequence cut additionally to the 'ordinary' sequence cut + FoldTree, // automatically fold tree + MinimumSelfDistance, // consider minimum self distance while looking for concurrent cut + ApplyFallthrough, // apply fallthrough's (Flower Model will always be applied + //-------Ideas for additional parameters: + // Multiprocessing +} + + + +impl Parameter{ + + /// Generate a Hashset containing all default parameters s.t.: + /// - Strict Sequence Cut is used + /// - Fallthrough's are being applied + /// - Minimum Self Distance is calculated and used during looking for a concurrent cut + /// - Resulting Tree is folded + pub fn generate_default_parameters() -> Parameters{ + HashSet::from([Parameter::StrictSequenceCut, Parameter::FoldTree, Parameter::MinimumSelfDistance, Parameter::ApplyFallthrough]) + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/mod.rs b/process_mining/src/discovery/case_centric/mod.rs index 8c44111..72193bd 100644 --- a/process_mining/src/discovery/case_centric/mod.rs +++ b/process_mining/src/discovery/case_centric/mod.rs @@ -2,4 +2,6 @@ pub mod alphappp; +pub mod inductive_miner_app; + pub mod dfg; From 6ec38857671ad137cd7eefb4587775699a90f568 Mon Sep 17 00:00:00 2001 From: Fabian Sandkuhl Date: Thu, 12 Mar 2026 18:17:54 +0100 Subject: [PATCH 2/6] added more documentation --- .../base_cases/base_cases.rs | 49 ---- .../inductive_miner_app/base_cases/mod.rs | 52 ++++- .../cut_finder/concurrent.rs | 32 +-- .../inductive_miner_app/cut_finder/cut.rs | 27 +-- .../cut_finder/exclusive_choice.rs | 41 +--- .../cut_finder/loop_cut.rs | 52 +++-- .../inductive_miner_app/cut_finder/mod.rs | 4 +- .../cut_finder/sequence_cut.rs | 25 +-- .../fallthrough/activity_concurrent.rs | 16 +- .../fallthrough/activity_once_per_trace.rs | 35 +-- .../fallthrough/empty_traces.rs | 11 +- .../fallthrough/fallthrough.rs | 2 + .../fallthrough/flower_model.rs | 6 +- .../inductive_miner_app/fallthrough/mod.rs | 18 +- .../fallthrough/strict_tau_loop.rs | 36 ++- .../fallthrough/tau_loop.rs | 22 +- .../case_centric/inductive_miner_app/mod.rs | 6 +- .../inductive_miner_app/splits/concurrency.rs | 5 +- ...xclusice_choice.rs => exclusive_choice.rs} | 31 +-- .../inductive_miner_app/splits/mod.rs | 9 +- .../inductive_miner_app/splits/redo_loop.rs | 30 ++- .../inductive_miner_app/splits/sequence.rs | 31 +-- .../inductive_miner_app/splits/split.rs | 14 +- .../structures/components.rs | 65 +++--- .../structures/minimum_self_distance.rs | 211 ++++++++++-------- .../inductive_miner_app/structures/mod.rs | 1 + .../structures/parameter.rs | 7 +- 27 files changed, 461 insertions(+), 377 deletions(-) delete mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs rename process_mining/src/discovery/case_centric/inductive_miner_app/splits/{exclusice_choice.rs => exclusive_choice.rs} (91%) diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs deleted file mode 100644 index 67e8d02..0000000 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/base_cases.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::core::event_data::case_centric::EventLogClassifier; -use crate::EventLog; - -#[derive(Debug)] -pub enum BaseCases { - None, // No base case is found - Empty, // the event log is completely empty - SingleActivity(String) // just one activity in every single trace in the event log -} - - -/// Checks whether the base case single activity applies to the given event log. -/// The BaseCase applies if the event log only contains traces with precisely one event, -/// which must have the same activity attribute. -fn check_single_activity_case(log: &EventLog, classifier: &EventLogClassifier) -> Option { - let mut activity: Option = None; - for t in &log.traces{ - if t.events.len() != 1{ // catch empty traces - return None; - } - let act = classifier.get_class_identity(&t.events[0]); - if let Some(activity) = &activity{ - if act != *activity{ - return None; - } - } else { - activity = Some(act); - } - } - activity -} - -/// Checks whether a BaseCase applies to a given event log. -/// -/// There are two possible base cases: -/// - 'empty trace' where the entire event log consists of one single empty trace, -/// - 'single activity' where the entire event log consist of traces containing only one single event with the same activity attribute. -pub fn find_base_case(log: &EventLog, event_log_classifier: &EventLogClassifier) -> BaseCases { - - if log.traces.len() == 0{ - // this just checks for an empty event log, this means, even if there are only empty traces, this case case does not apply - BaseCases::Empty - } else if let Some(activity) = check_single_activity_case(log, event_log_classifier){ - BaseCases::SingleActivity(activity) - } else { - // no base case applied to this one - BaseCases::None - } -} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs index b8acedb..fe61edf 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs @@ -1 +1,51 @@ -pub mod base_cases; \ No newline at end of file +//! This module contains utilities for detecting the base cases 'Empty' and 'Single Activity' used in the Inductive Miner. +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::EventLog; + + +/// Enum Representing whether and if so which type of base case was found. +#[derive(Debug)] +pub enum BaseCases { + None, // No base case is found + Empty, // the event log is completely empty + SingleActivity(String) // just one activity in every single trace in the event log +} + +/// Checks whether the base case single activity applies to the given event log. +/// The BaseCase applies if the event log only contains traces with precisely one event, +/// which must have the same activity attribute. +fn check_single_activity_case(log: &EventLog, classifier: &EventLogClassifier) -> Option { + let mut activity: Option = None; + for t in &log.traces{ + if t.events.len() != 1{ // catch empty traces + return None; + } + let act = classifier.get_class_identity(&t.events[0]); + if let Some(activity) = &activity{ + if act != *activity{ + return None; + } + } else { + activity = Some(act); + } + } + activity +} + +/// Checks whether a BaseCase applies to a given event log. +/// +/// There are two possible base cases: +/// - 'empty trace' where the entire event log consists of one single empty trace, +/// - 'single activity' where the entire event log consist of traces containing only one single event with the same activity attribute. +pub fn find_base_case(log: &EventLog, event_log_classifier: &EventLogClassifier) -> BaseCases { + + if log.traces.len() == 0{ + // this just checks for an empty event log, this means, even if there are only empty traces, this case case does not apply + BaseCases::Empty + } else if let Some(activity) = check_single_activity_case(log, event_log_classifier){ + BaseCases::SingleActivity(activity) + } else { + // no base case applied to this one + BaseCases::None + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs index 97ba690..0834592 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs @@ -1,14 +1,17 @@ -/// This implementation (ot the function parallel cut) follows the parallel cut algorithm as implemented in -/// the ProM framework (`InductiveMiner`), originally written in Java. -/// -/// Reference: -/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -/// Application of Concurrency to System Design (ACSD), 2013. -/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -/// University of Technology, 09.05.2017 -/// - ProM source code: -/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMConcurrent.java +//! Utility for detecting a concurrency cut in a given Directly Follows Graph. +//! +//! This implementation ports the parallel cut algorithm as implemented in +//! the ProM framework (`InductiveMiner`), originally written in Java. +//! +//! Reference: +//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +//! Application of Concurrency to System Design (ACSD), 2013. +//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +//! University of Technology, 09.05.2017 +//! - ProM source code: +//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMConcurrent.java + use std::borrow::Cow; use std::collections::HashSet; use crate::core::process_models::dfg::DirectlyFollowsGraph; @@ -192,6 +195,7 @@ pub fn concurrent_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: Op } } +#[cfg(test)] mod test_parallel_cut { use std::borrow::Cow; use std::collections::{HashMap, HashSet}; @@ -227,7 +231,6 @@ mod test_parallel_cut { &event_log!(["a", "b", "c"], ["b", "a", "c"]) ); let cut = concurrent_cut(&dfg, &None); - println!("CUT {:?}", cut); } #[test] @@ -236,7 +239,6 @@ mod test_parallel_cut { &event_log!(["a", "b"], ["b", "a"]) ); let cut = concurrent_cut_wrapper(&dfg, None); - println!("CUT {:?}", cut); assert!(cut.is_some()); assert_eq!(cut.unwrap().len(), 2); } @@ -260,8 +262,8 @@ mod test_parallel_cut { let parts = cut.unwrap(); assert_eq!(parts.len(), 3); - let flattened: HashSet> = parts - .get_iter() + let flattened: HashSet> = parts.partitions + .iter() .flat_map(|p| p.iter().map(|s| s.clone())) .collect(); diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs index d0e338c..2ee50da 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs @@ -1,3 +1,4 @@ +//! This module contains a struct used for representing a found cut by specifying the found partitions and cut type. use std::borrow::Cow; use std::collections::HashSet; use crate::core::process_models::process_tree::OperatorType; @@ -7,8 +8,8 @@ use crate::core::process_models::process_tree::OperatorType; /// specific cut operator (e.g. sequence, xor etc.) #[derive(Debug, PartialEq)] pub struct Cut<'a>{ - operator: OperatorType, // define what operator this cut is about - partitions: Vec>>, + pub operator: OperatorType, // define what operator this cut is about + pub partitions: Vec>>, } impl<'a> Cut<'a>{ @@ -26,10 +27,6 @@ impl<'a> Cut<'a>{ self.partitions.len() } - /// Returns an iterator over the partitions of this cut. - pub fn get_iter(&self) -> std::slice::Iter<'_, HashSet>> { - self.partitions.iter() - } /// Consumes the cut and returns the partitions of this cut. pub fn get_own(self) -> Vec>> { @@ -46,21 +43,5 @@ impl<'a> Cut<'a>{ pub fn is_empty(&self) -> bool{ self.partitions.is_empty() } - - - - /// Converts this cut into an owned version with `'static` lifetime. - /// - /// All activity labels are cloned into owned `String`s. - /// This is useful when the cut must outlive the original event log data. - pub fn to_owned_cut(&self) ->Cut<'static>{ - let owned_partitions = self.partitions.iter().map(|partition|{ - partition.iter().map(|cow| Cow::Owned(cow.to_string())).collect() - }).collect::>>>(); - - Cut{ - operator: self.operator, - partitions: owned_partitions, - } - } + } \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs index 761e8b0..799d44f 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs @@ -1,22 +1,4 @@ -//** -// This Code is based on the paper: -// -// Discovering Block-Structured Process Models From Event Logs - A Constructive Approach -// by S.J.J. Leemans, D. Fahland, and W.M.P. van der Aalst -// -// -// The algorithm works by recursively identifying splits in the process behavior, -// constructing a hierarchical representation (in case of a process tree). -// -// There are typically four split conditions: -// -// 1. Exclusive choice (xor) -// 2. Sequence -// 3. Concurrent (parallel) -// 4. Loop -// -// If a split condition is matched, an accordingly named cut function is used to cut the log, -// the algorithm continues recursively. +//! Utility for detecting an exclusive choice cut in a given Directly Follows Graph use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use std::borrow::Cow; @@ -127,12 +109,11 @@ pub fn exclusive_choice_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Op } -#[allow(unused_imports)] +#[cfg(test)] mod tests { - use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::dfg::DirectlyFollowsGraph; use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; - use crate::{event, event_log, trace}; + use crate::event_log; #[test] @@ -156,8 +137,8 @@ mod tests { // Expect two components: {"b","d"} and {"e","c"} // assert_eq!(cut.len(), 2); - assert!(cut.get_iter().any(|comp| comp.contains("b"))); - assert!(cut.get_iter().any(|comp| comp.contains("c"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("b"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("c"))); } // Case 2: XOR with 3 different branches @@ -171,9 +152,9 @@ mod tests { // Expect three components: one with b, one with c, one with d assert_eq!(cut.len(), 3); - assert!(cut.get_iter().any(|comp| comp.contains("b"))); - assert!(cut.get_iter().any(|comp| comp.contains("c"))); - assert!(cut.get_iter().any(|comp| comp.contains("d"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("b"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("c"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("d"))); } // Case 3: No XOR (sequence only) @@ -206,9 +187,9 @@ mod tests { // Expect 3 disjoint components assert_eq!(cut.len(), 3); - assert!(cut.get_iter().any(|comp| comp.contains("a"))); - assert!(cut.get_iter().any(|comp| comp.contains("e"))); - assert!(cut.get_iter().any(|comp| comp.contains("f"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("a"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("e"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("f"))); } #[test] diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs index c58f978..e3178ca 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs @@ -1,14 +1,18 @@ -/// This implementation follows the Loop cut finder algorithm as implemented in -/// the ProM framework (`InductiveMiner`), originally written in Java. -/// -/// Reference: -/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -/// Application of Concurrency to System Design (ACSD), 2013. -/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -/// University of Technology, 09.05.2017 -/// - ProM source code: -/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMLoop.java +//! Utility for detecting a loop cut in a Directly Follows Graph. +//! +//! +//! # Implementation Notes +//! This implementation ports the Loop cut finder algorithm as implemented in +//! the ProM framework (`InductiveMiner`), originally written in Java. +//! +//! Reference: +//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +//! Application of Concurrency to System Design (ACSD), 2013. +//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +//! University of Technology, 09.05.2017 +//! - ProM source code: +//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMLoop.java use std::borrow::Cow; use std::collections::HashSet; use crate::core::process_models::dfg::DirectlyFollowsGraph; @@ -141,10 +145,10 @@ pub fn redo_loop_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Option(dfg: &'a DirectlyFollowsGraph<'_>, log: &EventLog, event_log } } - +#[cfg(test)] mod test_cut_finder{ use std::collections::HashSet; use crate::{ diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs index 8b17dde..2d723ab 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs @@ -1,3 +1,5 @@ +//! Utility for detecting a sequence cut in a Directly Follows Graph. + use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use crate::core::process_models::dfg::{Activity, DirectlyFollowsGraph}; @@ -60,22 +62,6 @@ fn reaches_any_transitive(a: &HashSet>, b: &HashSet>, } -/// Helper function which calculates whether every activity in a set a can reach every activity in another set b. -fn reaches_all_transitive(a: &HashSet>, b: &HashSet>, - idx_map: &HashMap, - matrix: &Vec>) -> bool { - for act_a in a { - for act_b in b { - if let (Some(&idx_a), Some(&idx_b)) = (idx_map.get(act_a.as_ref()), idx_map.get(act_b.as_ref())) { - if !matrix[idx_a][idx_b] { - return false; - } - } - } - } - true -} - /// Calculates Activity Sequences in a given Directly Follows Graph. /// Two activities are in sequence if they are neither mutually reachable nor mutually unreachable. @@ -176,16 +162,14 @@ pub fn sequence_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, _parameters: } } -#[allow(unused_imports)] +#[cfg(test)] mod test_sequence_cut{ use std::borrow::Cow; use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::calc_sequences; use std::collections::HashSet; - use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::dfg::DirectlyFollowsGraph; - use crate::discovery::case_centric::dfg::discover_dfg; use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::{ sequence_cut_wrapper}; - use crate::{event_log, trace, event}; + use crate::{event_log}; #[test] fn test_single_activity(){ let dfg = DirectlyFollowsGraph::discover(&event_log!(["a"])); @@ -199,7 +183,6 @@ mod test_sequence_cut{ let input = event_log!(["a", "b", "c"], ["d"]); let dfg = DirectlyFollowsGraph::discover(&input); let result = sequence_cut_wrapper(&dfg, &HashSet::new()); - println!("{:?}", result); assert!(result.is_some()); assert_eq!(result.unwrap().get_own().len(), 3); diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs index dc7f073..aa91005 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs @@ -1,3 +1,14 @@ +//! Activity Concurrent fallthrough detection utilities. +//! +//! This module implements the **activity concurrent** fallthrough used by the inductive miner. +//! +//! The activity concurrent fallthrough assumes concurrent behavior when a single activity in the event log +//! can occur independently of the ordering of the other activities. In such a case, the activity is +//! considered to run in parallel with the remaining behavior of the log. +//! +//! When this pattern is detected, the activity is separated from the log and modeled as executing +//! concurrently with the rest of the process. + use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::{Node, OperatorType}; use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; @@ -137,6 +148,7 @@ pub fn activity_concurrent_wrapper(log: EventLog, activity_concurrent(log, event_log_classifier, parameters) } +#[cfg(test)] mod test_activity_concurrent { use std::collections::HashSet; use crate::core::event_data::case_centric::EventLogClassifier; @@ -153,8 +165,6 @@ mod test_activity_concurrent { let a0 = event_log_classifier.get_class_identity(e0); let a1 = event_log_classifier.get_class_identity(e1); if a0 != a1 { - println!("Two activities did not match{:?}", (a0, a1)); - return false; } } @@ -192,7 +202,7 @@ mod test_activity_concurrent { assert!(events_equal(&log1, &ex1, &classifier)); assert!(events_equal(&log2, &ex2, &classifier)); let ActivityConcurrent(node, log1, split)= activity_concurrent(log, &classifier, &HashSet::new()) else { return assert!(false); }; - assert!(!log1.traces.is_empty() && !split.is_empty()); + assert!(!log1.traces.is_empty() && !split.sub_logs.is_empty()); let ex_node = Node::new_operator(OperatorType::Concurrency); assert_eq!(node, ex_node); diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs index b7da602..dbb1d0c 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs @@ -1,7 +1,15 @@ - +//! Activity once per trace detection utilities. +//! +//! This module implements the **activity once per trace** used by the inductive miner. +//! +//! The fallthrough applies when an activity occurs **exactly once in every trace of the event log**. +//! In this case, the activity is assumed to execute independently of the rest of the process. +//! +//! When such an activity is detected, it is removed from the event log and modeled as running in +//! parallel with the remaining behavior of the process. use std::collections::HashMap; use crate::core::event_data::case_centric::EventLogClassifier; -use crate::{event_log, EventLog}; +use crate::EventLog; use crate::core::process_models::process_tree::{Node, OperatorType}; use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return}; @@ -34,14 +42,6 @@ fn remove_activity_from_log( log } -#[test] -fn test_remove_activity_with_empty_trace() { - let log = event_log!([], ["a"], ["a", "b"]); - let r = remove_activity_from_log(log, &EventLogClassifier::default(), "a".to_string()); - - let expected = event_log!([], [], ["b"],); - assert_eq!(r, expected); -} /// Helper struct to count the occurrences of each activity in the whole log and in every trace. /// In 'trace_activities' each index corresponds to a trace at the same index in the event log. @@ -164,14 +164,25 @@ pub fn activity_once_per_trace_wrapper( activity_once_per_trace(log, event_log_classifier) } + +#[cfg(test)] mod test_activity_once_per_trace { use crate::{event_log, EventLog}; use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::{Node, OperatorType}; - use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::activity_once_per_trace; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::{activity_once_per_trace, remove_activity_from_log}; use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return}; + #[test] + fn test_remove_activity_with_empty_trace() { + let log = event_log!([], ["a"], ["b", "a"]); // b as first event intentionally to get the same timestamp as for the expected one + let r = remove_activity_from_log(log, &EventLogClassifier::default(), "a".to_string()); + + let expected = event_log!([], [], ["b"],); + assert_eq!(r, expected); + } + fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: &EventLogClassifier) -> bool { if log.traces.len() == o_log.traces.len() { for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) { @@ -180,8 +191,6 @@ mod test_activity_once_per_trace { let a0 = event_log_classifier.get_class_identity(e0); let a1 = event_log_classifier.get_class_identity(e1); if a0 != a1 { - println!("Two activities did not match{:?}", (a0, a1)); - return false; } } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs index dbd278d..71cc9eb 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs @@ -1,3 +1,5 @@ +//! Empty traces fallthrough detection utilities. + use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::{Node, OperatorType}; use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; @@ -7,9 +9,10 @@ use crate::EventLog; /// Checks whether the empty traces fallthrough applies to a given log, /// it applies when the log contains empty traces. -/// If so a Process node with operator instance xor is returned, having an empty leaf as child if -/// there is at least one empty trace in the log, -/// the other non-empty logs have to processed in another recursion of the IM Algorithm. +/// +/// # Returns +/// - [EmptyTraces] if the event log contained empty traces +/// - [Return] if the event log contained no empty traces fn empty_traces(mut log: EventLog, _event_log_classifier: &EventLogClassifier) -> Fallthrough { let len_before = log.traces.len(); log.traces = log.traces.into_iter().filter(|trace| !trace.events.is_empty()).collect(); @@ -38,7 +41,7 @@ pub fn empty_traces_wrapper(log: EventLog, _event_log_classifier: &EventLogClass empty_traces(log, _event_log_classifier) } - +#[cfg(test)] mod test_empty_traces_ft{ use crate::{event_log, event}; use crate::core::event_data::case_centric::EventLogClassifier; diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs index 36e80df..572cae2 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs @@ -1,3 +1,5 @@ +//! Fallthrough labels + use std::mem::discriminant; use crate::core::process_models::process_tree::Node; use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs index 4567da5..0936bef 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs @@ -1,3 +1,7 @@ +//! Flower model fall through utilities. +//! +//! This module implements the flower model fallthrough as a last resort fallthrough. + use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::{Node, OperatorType}; use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; @@ -35,7 +39,7 @@ pub fn flower_model(log: EventLog, event_log_classifier: &EventLogClassifier) -> FlowerModel(flower_node_root) } - +#[cfg(test)] mod test_flower_model { use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::{Node, OperatorType}; diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs index 35cc815..b2b39dd 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs @@ -1,3 +1,7 @@ +//! Fallthrough detection utilities for the Inductive Miner. +//! +//! This module contains utilities of the fallthrough rules used by the Inductive Miner when no +//! standard cut can be discovered in the event log. use crate::core::event_data::case_centric::EventLogClassifier; use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_concurrent::activity_concurrent_wrapper; use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::activity_once_per_trace_wrapper; @@ -21,13 +25,13 @@ mod tau_loop; /// Applies the sequence of *fallthrough rules* used by the Inductive Miner to an event log. /// -/// This function iteratively evaluates predefined fallthroughs in the following order: -/// - Empty Traces -/// - Activity Once Per Trace -/// - Activity Concurrent -/// - Strict Tau Loop -/// - Tau Loop -/// - Flower Model +/// This function iteratively evaluates predefined fallthrough in the following order: +/// - [empty_traces] +/// - [activity_once_per_trace] +/// - [activity_concurrent] +/// - [strict_tau_loop] +/// - [tau_loop] +/// - [flower_model] /// /// Whether a Fallthrough is applied at all, is controlled by the provided parameters. /// Note, that the Flower Model is applied nevertheless. diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs index 7797d35..6b17cd6 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs @@ -1,3 +1,12 @@ +//! Strict tau loop fallthrough detection utilities. +//! +//! This module implements the **strict tau loop fallthrough** used by the Inductive Miner. +//! +//! A strict tau loop assumes that a new iteration of the process starts **only when a start activity +//! directly follows an end activity** within the same trace. Such a pattern suggest that the process +//! silently returned to the beginning of the workflow via a tau transition between iterations. + + use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::Node; use crate::core::process_models::process_tree::OperatorType::Loop; @@ -7,6 +16,16 @@ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthroug use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; use crate::EventLog; +/// Splits traces in the event log according to the semantics of a **strict tau loop fallthrough**. +/// +/// A trace is split whenever an **end activity** is immediately followed by a **start activity**. +/// This pattern indicates that one iteration of the process has completed and a new iteration begins +/// via an implicit silent transition. +/// +/// Empty traces may appear in the resulting log if a split occurs at the beginning of a trace segment. +/// +/// # Returns +/// A new [EventLog] where traces are split by the above described logic. fn split_log_according_to_strict_tau(log: EventLog, classifier: &EventLogClassifier) -> EventLog{ let dfg = discover_dfg_with_classifier(&log, classifier); let mut result_log = log.clone_without_traces(); @@ -44,7 +63,22 @@ fn split_log_according_to_strict_tau(log: EventLog, classifier: &EventLogClassif result_log } +/// Attempt to detect and apply the **strict tau loop fallthrough**. +/// The log is transformed by using [split_log_according_to_strict_tau]. +/// If the operation increased the number of traces in the log, it indicates that the traces contained +/// implicit restarts of the process. /// +/// In that case, a loop operator is constructed where: +/// +/// - the **do part** represents a single iteration of the process +/// - the **redo part** +/// +/// The resulting loop node and transformed event log are returned. +/// +/// # Returns +/// - [StrictTauLoop] if stric loop behavior is detected +/// - [Return] is a silent transition + fn strict_tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough { let k = log.traces.len(); let log = split_log_according_to_strict_tau(log, classifier); @@ -80,7 +114,7 @@ pub fn strict_tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _ } - +#[cfg(test)] mod test_strict_tau_loop{ use crate::core::event_data::case_centric::EventLogClassifier; use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs index b98d033..1cc1bec 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs @@ -1,3 +1,10 @@ +//! Tau loop fallthrough detection utilities. +//! +//! This module implements the **tau loop fallthrough** used by the Inductive Miner. +//! A tau loop is assumed when a trace appears to restart without an explicit visible transition +//! between the end of one iteration and the beginning of the next. + + use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::Node; use crate::core::process_models::process_tree::OperatorType::Loop; @@ -54,7 +61,18 @@ fn split_log_according_to_tau_loop(log: EventLog, classifier: &EventLogClassifie result_log } -/// Attempts to apply the 'tau_loop' Fallthrough by +/// Attempts to apply the 'tau_loop' fallthrough. +/// +/// The algorithm first splits the log using [split_log_according_to_tau_loop]. +/// If this operation increases the number of traces, it indicates that traces contained implicit +/// restarts. In that case, a loop operator is created where: +/// +/// - the **do part** represent one iteration of the process +/// - the **redo part** is a silent transition (tau) +/// +/// # Returns +/// - [Fallthrough::TauLoop] if the log split indicates loop behavior +/// - [Fallthrough::Return] if the log split indicates no loop behavior fn tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough { let k = log.traces.len(); let log = split_log_according_to_tau_loop(log, classifier); @@ -88,7 +106,7 @@ pub fn tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _:&Param } - +#[cfg(test)] mod test_tau_loop{ use crate::core::event_data::case_centric::EventLogClassifier; use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::TauLoop; diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs index 05a3e4f..2d31d0a 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs @@ -1,9 +1,9 @@ //! inductive miner discovery algorithm +use base_cases::{find_base_case, BaseCases}; use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::{Node, ProcessTree}; use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; -use crate::discovery::case_centric::inductive_miner_app::base_cases::base_cases::{find_base_case, BaseCases}; use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use crate::discovery::case_centric::inductive_miner_app::cut_finder::find_cut; use crate::discovery::case_centric::inductive_miner_app::fallthrough::apply_fallthrough; @@ -181,9 +181,9 @@ pub fn build_tree(log: EventLog, event_log_classifier: &EventLogClassifier, para mod tests { use crate::core::event_data::case_centric::EventLogClassifier; - use crate::core::process_models::process_tree::{Node}; + use crate::core::process_models::process_tree::Node; use crate::core::process_models::process_tree::OperatorType::{ExclusiveChoice, Loop}; - use crate::discovery::case_centric::inductive_miner_app::{inductive_miner_default_parameters}; + use crate::discovery::case_centric::inductive_miner_app::inductive_miner_default_parameters; use crate::event_log; #[test] diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs index f42e35a..4e94cdb 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs @@ -1,3 +1,4 @@ +//! Utility for splitting an event log according to a concurrency split. use std::borrow::Cow; use std::collections::HashSet; use crate::core::event_data::case_centric::EventLogClassifier; @@ -55,14 +56,14 @@ pub fn and_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, c } -#[allow(unused_imports)] +#[cfg(test)] mod test_and_split{ use crate::core::chrono::Utc; use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::dfg::DirectlyFollowsGraph; use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::concurrent_cut_wrapper; use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split; - use crate::{event_log, EventLog}; + use crate::event_log; #[test] fn test_simple_and_cut_and_split(){ diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusice_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs similarity index 91% rename from process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusice_choice.rs rename to process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs index e0703db..b02d41a 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusice_choice.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs @@ -1,3 +1,18 @@ +//! Utility for splitting an event log according to an exclusive choice cut. +//! +//! +//! # Implementation Notes +//! This implementation adopts the xor-split algorithm as implemented in +//! the ProM framework (`InductiveMiner`), originally written in Java. +//! +//! Reference: +//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +//! Application of Concurrency to System Design (ACSD), 2013. +//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +//! University of Technology, 09.05.2017 +//! - ProM source code: +//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterXorFiltering.java use std::collections::HashMap; use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice; @@ -5,17 +20,6 @@ use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; use crate::EventLog; -/// This implementation follows the xor-split algorithm as implemented in -/// the ProM framework (`InductiveMiner`), originally written in Java. -/// -/// Reference: -/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -/// Application of Concurrency to System Design (ACSD), 2013. -/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -/// University of Technology, 09.05.2017 -/// - ProM source code: -/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterXorFiltering.java @@ -131,6 +135,7 @@ pub fn xor_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, c Some(Split::new(ExclusiveChoice, result)) } +#[cfg(test)] mod tests_xor_split{ use std::collections::HashSet; use crate::core::chrono::Utc; @@ -139,7 +144,7 @@ mod tests_xor_split{ use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice; use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; - use crate::discovery::case_centric::inductive_miner_app::splits::exclusice_choice::xor_split; + use crate::discovery::case_centric::inductive_miner_app::splits::exclusive_choice::xor_split; use crate::event_log; #[test] @@ -158,7 +163,7 @@ mod tests_xor_split{ let x = xor_split(&log, &EventLogClassifier::default(), cut); assert!(x.is_some()); let x = x.unwrap(); - assert_eq!(x.len(), 2); + assert_eq!(x.sub_logs.len(), 2); } #[test] diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs index e97ebc5..867ef8e 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs @@ -1,8 +1,11 @@ +//! This module contains utilities for splitting an event log according to either exclusive choice, +//! sequence, loop or concurrency cut. + use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::OperatorType; use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split; -use crate::discovery::case_centric::inductive_miner_app::splits::exclusice_choice::xor_split; +use crate::discovery::case_centric::inductive_miner_app::splits::exclusive_choice::xor_split; use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split; use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; @@ -10,7 +13,7 @@ use crate::EventLog; mod concurrency; mod sequence; -mod exclusice_choice; +mod exclusive_choice; mod redo_loop; pub mod split; @@ -59,7 +62,7 @@ fn splitting<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) } } - +#[cfg(test)] mod test_splits{ use std::collections::HashSet; use crate::core::event_data::case_centric::EventLogClassifier; diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs index 42f1471..ac1aa04 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs @@ -1,3 +1,19 @@ +//! Utility for splitting a log according to a loop cut +//! +//! +//! # Implementation Notes +//! This implementation adopts the loop-splitting algorithm as implemented in +//! the ProM framework (`InductiveMiner`), originally written in Java. +//! +//! Reference: +//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +//! Application of Concurrency to System Design (ACSD), 2013. +//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +//! University of Technology, 09.05.2017 +//! - ProM source code: +//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterLoop.java + use std::collections::HashMap; use crate::EventLog; use crate::core::event_data::case_centric::EventLogClassifier; @@ -52,7 +68,7 @@ pub fn loop_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut< //each sublogs gets one clean trace let mut sub_trace = trace.clone_without_events(); - let mut last_partition: Option = None; // init to None to signal the start of a new trace + let mut last_partition: Option = None; // init too None to signal the start of a new trace for event in &trace.events { let activity = classifier.get_class_identity(event); @@ -91,14 +107,13 @@ pub fn loop_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut< Some(Split::new(Loop, result)) } -#[allow(unused_imports)] +#[cfg(test)] mod test_loop_split { - use crate::core::chrono::Utc; use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::dfg::DirectlyFollowsGraph; use crate::discovery::case_centric::inductive_miner_app::cut_finder::loop_cut::redo_loop_cut_wrapper; use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split; - use crate::{event, event_log, trace}; + use crate::event_log; use crate::EventLog; fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: EventLogClassifier) -> bool { @@ -109,8 +124,6 @@ mod test_loop_split { let a0 = event_log_classifier.get_class_identity(e0); let a1 = event_log_classifier.get_class_identity(e1); if a0 != a1 { - println!("Two activities did not match{:?}", (a0, a1)); - return false; } } @@ -134,7 +147,7 @@ mod test_loop_split { let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap()); assert!(split.is_some()); let split = split.unwrap(); - assert_eq!(split.len(), 2); + assert_eq!(split.sub_logs.len(), 2); // created expected event logs let do_log = event_log!( @@ -190,10 +203,9 @@ mod test_loop_split { let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap()); assert!(split.is_some()); let split = split.unwrap(); - assert_eq!(split.len(), 2); + assert_eq!(split.sub_logs.len(), 2); for log in split.get_own() { - println!("{:#?}", log); if log.traces.len() == do_log.traces.len() { // expected length of 6 assert!(events_equal(&log, &do_log, EventLogClassifier::default())); diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs index 4cbc4db..f1fef4b 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs @@ -1,14 +1,17 @@ -/// This implementation follows the sequence-split algorithm as implemented in -/// the ProM framework (`InductiveMiner`), originally written in Java. -/// -/// Reference: -/// - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -/// "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -/// Application of Concurrency to System Design (ACSD), 2013. -/// - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -/// University of Technology, 09.05.2017 -/// - ProM source code: -/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterSequenceFiltering.javang +//! Utility for resolving sequence cuts into sequence splits. +//! +//! # Implementation Notes +//! Port of the sequence-split algorithm as implemented in +//! the ProM framework (`InductiveMiner`), originally written in Java. +//! +//! # Reference: +//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +//! Application of Concurrency to System Design (ACSD), 2013. +//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +//! University of Technology, 09.05.2017 +//! - ProM source code: +//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterSequenceFiltering.javang use std::borrow::Cow; use std::collections::HashSet; use std::ops::Deref; @@ -131,7 +134,7 @@ pub fn sequence_split<'a>( Some(Split::new(Sequence, result)) } -#[allow(unused_imports)] +#[cfg(test)] mod test_sequence_split { use std::collections::HashSet; use crate::core::chrono::Utc; @@ -139,7 +142,7 @@ mod test_sequence_split { use crate::core::process_models::dfg::DirectlyFollowsGraph; use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper; use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; - use crate::{event, event_log}; + use crate::event_log; #[test] fn test_sequence_split() { @@ -154,7 +157,6 @@ mod test_sequence_split { assert!(cut.is_some()); let cut = cut.unwrap(); - println!("Cut: {:?}", cut); let split = sequence_split(&log, &EventLogClassifier::default(), cut); assert!(split.is_some()); @@ -225,7 +227,6 @@ mod test_sequence_split { assert!(cut.is_some()); let split = sequence_split(&log, &EventLogClassifier::default(), cut.unwrap()); assert!(split.is_some()); - println!("{:?}", split.unwrap().get_own()); } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs index 5b8d015..4921425 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs @@ -1,3 +1,4 @@ +//! This module contains the basic split used for representing found splits in the Inductive Miner Algorithm. use crate::core::process_models::process_tree::OperatorType; use crate::EventLog; @@ -8,19 +9,15 @@ use crate::EventLog; /// - 'operator' : ['ImOperator'] defining the split type /// - 'sub_logs': a vector containing all new logs pub struct Split{ - operator: OperatorType, - sub_logs: Vec, + pub operator: OperatorType, + pub sub_logs: Vec, } impl Split{ pub fn new(operator: OperatorType, sub_logs: Vec) -> Split{ Self{operator, sub_logs} } - - pub fn len(&self) -> usize { - self.sub_logs.len() - } - + pub fn get_own(self) -> Vec{ self.sub_logs } @@ -29,7 +26,4 @@ impl Split{ self.operator } - pub fn is_empty(&self) -> bool{ - self.sub_logs.is_empty() - } } \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs index 39c34b3..9042f8e 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs @@ -1,4 +1,18 @@ -/// This implementation is inspired by the component structure in +//! Component management utilities. +//! +//! This module provides a lightweight structure for maintaining connected components (partitions) of +//! a set of nodes. Each node belongs to exactly one component and components can be merged dynamically. +//! +//! The structure is primarily used to represent partitions of activities during algorithms for finding +//! cuts in event logs. +//! +//! # Implementation notes +//! +//! This is a port of the component structure implementation in the ProM framework ((`InductiveMiner`), originally written in Java. +//! - ProM source code: +//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/helperclasses/graphs/IntComponents.java + +/// Port of the component structure implementation in /// the ProM framework (`InductiveMiner`), originally written in Java. /// /// - ProM source code: @@ -8,6 +22,13 @@ use std::borrow::Cow; use std::collections::{HashMap, HashSet}; + +/// Maintains a partition of nodes into components. +/// +/// Each node belongs to exactly one component. Components can be merged, queried and converted back +/// into explicit sets of nodes. +/// +/// Internally, nodes are mapped to integer indices to allow for efficient component operations. #[derive(Debug)] pub struct Components<'a> { components: Vec, // component index of each node, get node index from map @@ -17,6 +38,8 @@ pub struct Components<'a> { impl<'a> Components<'a> { + + /// Creates a new component structure where each node initially forms its own component. pub fn new(nodes: &[Cow<'a, str>]) -> Self { let mut node2index = HashMap::new(); // every node gets it own index in the beginning @@ -33,47 +56,31 @@ impl<'a> Components<'a> { } } - pub fn from(partitions: &Vec>>) -> Self { - let mut node2index = HashMap::new(); - let mut node_number: usize = 0; - - for part in partitions.iter() { - for act in part.iter() { - node2index.insert(act.clone(), node_number); - node_number += 1; - } - } - - let mut components = vec![0;node_number]; - - - let mut node_number: usize = 0; - for (component_number, part) in partitions.iter().enumerate() { - for _ in part.iter(){ - components[node_number] = component_number; - node_number += 1; - } - } - - Self{components, node2index, number_of_components: partitions.len()} - - } - + /// Returns the component index of a given node. + /// + /// Panics if the node is not contained in the component structure. pub fn component_of(&self, node: &str) -> usize { self.components[self.node2index[node]] } + /// Returns whether the nodes 'a' and 'b' are in the same component. pub fn same_component(&self, a: &str, b: &str) -> bool { self.component_of(a) == self.component_of(b) } + /// Merges the components containing the nodes 'a' and 'b'. + /// + /// If both nodes already are in the same component, the structure remains unchanged. pub fn merge_components_of(&mut self, a: &str, b: &str) { let ca = self.component_of(a); let cb = self.component_of(b); self.merge_components(ca, cb); } + /// Merge two components identified by their indices. + /// + /// All nodes belonging to the component 'ca' are reassigned to the component 'cb'. pub fn merge_components(&mut self, ca: usize, cb: usize) { if ca == cb { return; @@ -90,6 +97,10 @@ impl<'a> Components<'a> { } } + /// Returns the current partitioning of nodes as explicit sets. + /// + /// Each element of the returned vector represents a component containing the nodes belonging + /// to that component. pub fn get_components(&self) -> Vec>> { let mut result: Vec>> = Vec::new(); let mut map: HashMap = HashMap::new(); diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs index fb94d66..8521cb8 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs @@ -1,4 +1,7 @@ -use crate::{event_log, trace, EventLog}; +//! Utilities for determining the minimum self distance of a given activity in a given trace +//! or activity sequence. + +use crate::EventLog; use std::collections::{HashMap, HashSet}; use crate::core::event_data::case_centric::{EventLogClassifier, Trace}; use crate::core::process_models::dfg::Activity; @@ -141,131 +144,141 @@ impl MinimumSelfDistance { } -#[test] -fn test_extract_interleaving_activities() { - let t = trace!("a", "b", "c", "d", "e", "f"); - let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); - assert_eq!(s, HashSet::from(["b".into(), "c".into(), "d".into(), "e".into(), "f".into()])); -} -#[test] -fn test_extract_from_empty_trace() { - let t = trace!(); - let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); - assert!(s.is_empty());} - -// ------------ Tests using binary events -#[test] -fn test_one_loop_distance() { - let t = trace!("a", "b", "a"); - - let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); - assert!(r.contains_key("a")); - assert_eq!(r.get("a").unwrap().0, 1); - assert!(r.get("a").unwrap().1.contains("b")); -} +#[cfg(test)] +mod test_min_dist{ + use std::collections::HashSet; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance; + use crate::{event_log, trace}; -#[test] -fn test_loop_zero_distance(){ - let t = trace!("a","a"); + #[test] + fn test_extract_interleaving_activities() { + let t = trace!("a", "b", "c", "d", "e", "f"); + let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); + assert_eq!(s, HashSet::from(["b".into(), "c".into(), "d".into(), "e".into(), "f".into()])); + } + #[test] + fn test_extract_from_empty_trace() { + let t = trace!(); + let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); + assert!(s.is_empty());} + + // ------------ Tests using binary events + #[test] + fn test_one_loop_distance() { + let t = trace!("a", "b", "a"); + + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert!(r.get("a").unwrap().1.contains("b")); + } - let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); - assert!(r.contains_key("a")); - assert_eq!(r.get("a").unwrap().0, 0); - assert!(r.get("a").unwrap().1.is_empty()); -} + #[test] + fn test_loop_zero_distance(){ + let t = trace!("a","a"); -#[test] -fn test_retrieve_smaller_later_loop(){ - let t = trace!("a", "b", "b", "a", "b", "b", "b", "a", "b", "a"); + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + assert!(r.get("a").unwrap().1.is_empty()); + } - let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); - assert!(r.contains_key("a")); - assert_eq!(r.get("a").unwrap().0, 1); - assert!(r.get("a").unwrap().1.contains("b")); + #[test] + fn test_retrieve_smaller_later_loop(){ + let t = trace!("a", "b", "b", "a", "b", "b", "b", "a", "b", "a"); - // trivial, b should have 0 minimum self distance in this example - assert!(r.contains_key("b")); - assert_eq!(r.get("b").unwrap().0, 0); - assert!(r.get("b").unwrap().1.is_empty()); -} + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert!(r.get("a").unwrap().1.contains("b")); + // trivial, b should have 0 minimum self distance in this example + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 0); + assert!(r.get("b").unwrap().1.is_empty()); + } -// -------------------------------- Test using more than two different activities -#[test] -fn test_complex_trace(){ - let t = trace!("a", "b", "d", "e", "a", "d", "g", "g", "d","b", "f", "a", "c"); - let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + // -------------------------------- Test using more than two different activities - // check if loops are contained - assert!(r.contains_key("a")); - assert_eq!(r.get("a").unwrap().0, 3); - assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "d".into(), "e".into()])); + #[test] + fn test_complex_trace(){ + let t = trace!("a", "b", "d", "e", "a", "d", "g", "g", "d","b", "f", "a", "c"); + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + // check if loops are contained + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 3); + assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "d".into(), "e".into()])); - assert!(r.contains_key("b")); - assert_eq!(r.get("b").unwrap().0, 7); - assert_eq!(r.get("b").unwrap().1, HashSet::from(["a".into(), "e".into(), "d".into(), "g".into()])); - assert!(!r.contains_key("c")); + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 7); + assert_eq!(r.get("b").unwrap().1, HashSet::from(["a".into(), "e".into(), "d".into(), "g".into()])); - // special case, because there are two loops with same minimum distance two - assert!(r.contains_key("d")); - assert_eq!(r.get("d").unwrap().0, 2); - // merged activities - assert_eq!(r.get("d").unwrap().1, HashSet::from(["e".into(), "a".into(), "g".into()])); + assert!(!r.contains_key("c")); + // special case, because there are two loops with same minimum distance two + assert!(r.contains_key("d")); + assert_eq!(r.get("d").unwrap().0, 2); + // merged activities + assert_eq!(r.get("d").unwrap().1, HashSet::from(["e".into(), "a".into(), "g".into()])); - // not appearing twice - assert!(!r.contains_key("e")); - assert!(!r.contains_key("f")); - // only one trace where g follows after g - assert!(r.contains_key("g")); - assert_eq!(r.get("g").unwrap().0, 0); - assert!(r.get("g").unwrap().1.is_empty()); -} + // not appearing twice + assert!(!r.contains_key("e")); + assert!(!r.contains_key("f")); + // only one trace where g follows after g + assert!(r.contains_key("g")); + assert_eq!(r.get("g").unwrap().0, 0); + assert!(r.get("g").unwrap().1.is_empty()); + } -#[test] -fn test_empty_log(){ - let log = event_log!(); - let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); - assert!(r.is_empty()); -} + #[test] + fn test_empty_log(){ + let log = event_log!(); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); -#[test] -fn test_zero_loops_log(){ - let log = event_log!(["a", "a"], ["b", "b"]); - let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + assert!(r.is_empty()); + } - assert!(r.contains_key("a")); - assert_eq!(r.get("a").unwrap().0, 0); + #[test] + fn test_zero_loops_log(){ + let log = event_log!(["a", "a"], ["b", "b"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); - assert!(r.contains_key("b")); - assert_eq!(r.get("b").unwrap().0, 0); -} + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); -#[test] -fn test_find_smaller_loop(){ - let log = event_log!(["a", "a"], ["a", "b", "a"]); - let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 0); + } - assert!(r.contains_key("a")); - assert_eq!(r.get("a").unwrap().0, 0); + #[test] + fn test_find_smaller_loop(){ + let log = event_log!(["a", "a"], ["a", "b", "a"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); - assert!(!r.contains_key("b")); -} + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + + assert!(!r.contains_key("b")); + } + + #[test] + fn test_merge_relations(){ + let log = event_log!(["a", "c", "a"], ["a", "b", "a"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "c".into()])); + } -#[test] -fn test_merge_relations(){ - let log = event_log!(["a", "c", "a"], ["a", "b", "a"]); - let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); - assert!(r.contains_key("a")); - assert_eq!(r.get("a").unwrap().0, 1); - assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "c".into()])); } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs index 0a3ede3..1c3eb51 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs @@ -1,3 +1,4 @@ +//! This module contains additional structures needed for the implementation of the Inductive Miner. pub mod parameter; pub mod components; pub mod minimum_self_distance; \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs index 8c4f135..9746001 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs @@ -1,3 +1,4 @@ +//! Parameter settings for controlling the inductive miner implementation use std::collections::HashSet; /// A helper type aggregating parameters which user maybe want the inductive miner to adhere. @@ -9,12 +10,12 @@ pub type Parameters = HashSet; /// Helper enum to express which option shall be activated in the inductive miner #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Parameter{ - StrictSequenceCut, // apply strict sequence cut additionally to the 'ordinary' sequence cut - FoldTree, // automatically fold tree MinimumSelfDistance, // consider minimum self distance while looking for concurrent cut ApplyFallthrough, // apply fallthrough's (Flower Model will always be applied //-------Ideas for additional parameters: // Multiprocessing + // StrictSequenceCut, // apply strict sequence cut additionally to the 'ordinary' sequence cut + // FoldTree, // automatically fold tree } @@ -27,6 +28,6 @@ impl Parameter{ /// - Minimum Self Distance is calculated and used during looking for a concurrent cut /// - Resulting Tree is folded pub fn generate_default_parameters() -> Parameters{ - HashSet::from([Parameter::StrictSequenceCut, Parameter::FoldTree, Parameter::MinimumSelfDistance, Parameter::ApplyFallthrough]) + HashSet::from([Parameter::MinimumSelfDistance, Parameter::ApplyFallthrough]) } } \ No newline at end of file From 30169f1897647c14fdc77b448d584f87d83c267e Mon Sep 17 00:00:00 2001 From: Fabian Sandkuhl Date: Mon, 18 May 2026 13:46:00 +0200 Subject: [PATCH 3/6] some fixes and fold operator --- .../process_tree/process_tree_struct.rs | 274 ++++++++++++++++++ .../cut_finder/concurrent.rs | 2 +- .../cut_finder/loop_cut.rs | 2 +- .../cut_finder/sequence_cut.rs | 8 +- .../fallthrough/activity_once_per_trace.rs | 3 +- .../case_centric/inductive_miner_app/mod.rs | 22 +- 6 files changed, 289 insertions(+), 22 deletions(-) diff --git a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs index 934a2ff..50f41f2 100644 --- a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs +++ b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs @@ -64,6 +64,54 @@ impl Node { Node::Leaf(_) => true, } } + + /// Recursively folds this node by merging children that share the same + /// associative operator into the current node. + /// + /// The fold is applied **bottom-up**: children are folded first, and only + /// then the current node checks whether any of its (now-folded) children + /// can be inlined. + /// + /// **Example** – `SEQ(SEQ(a, b), c)` becomes `SEQ(a, b, c)`. + /// + /// Leaf nodes are returned unchanged. + pub fn fold(self) -> Self { + match self { + Node::Leaf(_) => self, + Node::Operator(op) => { + // Recursively fold all children first (bottom-up). + let folded_children: Vec = + op.children.into_iter().map(|child| child.fold()).collect(); + + // If the current operator is associative, inline any child + // that carries the same operator type. + let children = if op.operator_type.is_associative() { + let mut flattened = Vec::with_capacity(folded_children.len()); + for child in folded_children { + match child { + Node::Operator(ref inner) + if inner.operator_type == op.operator_type => + { + // Consume the child and move its children up. + if let Node::Operator(inner) = child { + flattened.extend(inner.children); + } + } + other => flattened.push(other), + } + } + flattened + } else { + folded_children + }; + + Node::Operator(Operator { + operator_type: op.operator_type, + children, + }) + } + } + } } /// @@ -81,6 +129,25 @@ pub enum OperatorType { Loop, } +impl OperatorType { + /// Returns `true` if this operator is associative. + /// + /// The associative operators are [`Sequence`](OperatorType::Sequence), + /// [`ExclusiveChoice`](OperatorType::ExclusiveChoice), and + /// [`Concurrency`](OperatorType::Concurrency). The [`Loop`](OperatorType::Loop) + /// operator is **not** associative because its first child (the body) and + /// subsequent children (the redo / exit branches) carry different semantic + /// roles, so merging nested loops would change the language. + pub fn is_associative(self) -> bool { + matches!( + self, + OperatorType::Sequence + | OperatorType::ExclusiveChoice + | OperatorType::Concurrency + ) + } +} + /// /// Object-centric process tree struct that contains [`Node`] as root /// @@ -98,6 +165,29 @@ impl ProcessTree { Self { root } } + /// Folds the process tree by merging nodes whose operator is associative. + /// + /// For the associative operators [`Sequence`](OperatorType::Sequence), + /// [`ExclusiveChoice`](OperatorType::ExclusiveChoice), and + /// [`Concurrency`](OperatorType::Concurrency) the following identity holds: + /// + /// ```text + /// OP(OP(a, b), c) ≡ OP(a, b, c) + /// ``` + /// + /// The fold is applied recursively bottom-up across the entire tree, so + /// arbitrarily deep chains of the same associative operator are fully + /// collapsed into a single flat node. + /// + /// [`Loop`](OperatorType::Loop) nodes are **not** folded because their + /// child positions carry different semantic roles. + /// + /// # Returns + /// A new [`ProcessTree`] with all associative operator chains collapsed. + pub fn fold(self) -> Self { + ProcessTree::new(self.root.fold()) + } + /// /// Returns `true` if all nodes have the right number of children and if all operators have /// eventually descendants that are leaves. @@ -252,6 +342,190 @@ mod tests { Leaf, Node, Operator, OperatorType, ProcessTree, }; + // ── folding tests ──────────────────────────────────────────────────────── + + #[test] + fn fold_flat_sequence_unchanged() { + // SEQ(a, b, c) has no nested SEQ — the tree must be returned as-is. + let mut seq = Operator::new(OperatorType::Sequence); + seq.children.push(Node::new_leaf(Some("a".into()))); + seq.children.push(Node::new_leaf(Some("b".into()))); + seq.children.push(Node::new_leaf(Some("c".into()))); + let pt = ProcessTree::new(Node::Operator(seq)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_nested_sequence() { + // SEQ(SEQ(a, b), c) → SEQ(a, b, c) + let mut inner = Operator::new(OperatorType::Sequence); + inner.children.push(Node::new_leaf(Some("a".into()))); + inner.children.push(Node::new_leaf(Some("b".into()))); + + let mut outer = Operator::new(OperatorType::Sequence); + outer.children.push(Node::Operator(inner)); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_deeply_nested_sequence() { + // SEQ(SEQ(SEQ(a, b), c), d) → SEQ(a, b, c, d) + let mut innermost = Operator::new(OperatorType::Sequence); + innermost.children.push(Node::new_leaf(Some("a".into()))); + innermost.children.push(Node::new_leaf(Some("b".into()))); + + let mut middle = Operator::new(OperatorType::Sequence); + middle.children.push(Node::Operator(innermost)); + middle.children.push(Node::new_leaf(Some("c".into()))); + + let mut outer = Operator::new(OperatorType::Sequence); + outer.children.push(Node::Operator(middle)); + outer.children.push(Node::new_leaf(Some("d".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + expected.children.push(Node::new_leaf(Some("d".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_xor_nested() { + // XOR(XOR(a, b), c) → XOR(a, b, c) + let mut inner = Operator::new(OperatorType::ExclusiveChoice); + inner.children.push(Node::new_leaf(Some("a".into()))); + inner.children.push(Node::new_leaf(Some("b".into()))); + + let mut outer = Operator::new(OperatorType::ExclusiveChoice); + outer.children.push(Node::Operator(inner)); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::ExclusiveChoice); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_concurrency_nested() { + // AND(AND(a, b), c) → AND(a, b, c) + let mut inner = Operator::new(OperatorType::Concurrency); + inner.children.push(Node::new_leaf(Some("a".into()))); + inner.children.push(Node::new_leaf(Some("b".into()))); + + let mut outer = Operator::new(OperatorType::Concurrency); + outer.children.push(Node::Operator(inner)); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Concurrency); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_does_not_merge_different_operators() { + // SEQ(XOR(a, b), c) — different operator, must stay unchanged. + // Build two identical XOR nodes: one for the input, one for expected. + let make_inner = || { + let mut xor = Operator::new(OperatorType::ExclusiveChoice); + xor.children.push(Node::new_leaf(Some("a".into()))); + xor.children.push(Node::new_leaf(Some("b".into()))); + xor + }; + + let mut outer = Operator::new(OperatorType::Sequence); + outer.children.push(Node::Operator(make_inner())); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected_outer = Operator::new(OperatorType::Sequence); + expected_outer.children.push(Node::Operator(make_inner())); + expected_outer.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected_outer)); + } + + #[test] + fn fold_does_not_merge_loop() { + // LOOP(LOOP(a, tau), tau) — Loop is not associative, must stay unchanged. + let make_inner = || { + let mut lp = Operator::new(OperatorType::Loop); + lp.children.push(Node::new_leaf(Some("a".into()))); + lp.children.push(Node::new_leaf(None)); + lp + }; + + let mut outer = Operator::new(OperatorType::Loop); + outer.children.push(Node::Operator(make_inner())); + outer.children.push(Node::new_leaf(None)); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Loop); + expected.children.push(Node::Operator(make_inner())); + expected.children.push(Node::new_leaf(None)); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_mixed_tree() { + // SEQ( SEQ(a, b), LOOP(c, tau), SEQ(d, e) ) + // The two SEQ children get merged; the LOOP stays in place. + // Result: SEQ(a, b, LOOP(c, tau), d, e) + let make_loop = || { + let mut lp = Operator::new(OperatorType::Loop); + lp.children.push(Node::new_leaf(Some("c".into()))); + lp.children.push(Node::new_leaf(None)); + lp + }; + + let mut seq1 = Operator::new(OperatorType::Sequence); + seq1.children.push(Node::new_leaf(Some("a".into()))); + seq1.children.push(Node::new_leaf(Some("b".into()))); + + let mut seq2 = Operator::new(OperatorType::Sequence); + seq2.children.push(Node::new_leaf(Some("d".into()))); + seq2.children.push(Node::new_leaf(Some("e".into()))); + + let mut root = Operator::new(OperatorType::Sequence); + root.children.push(Node::Operator(seq1)); + root.children.push(Node::Operator(make_loop())); + root.children.push(Node::Operator(seq2)); + + let pt = ProcessTree::new(Node::Operator(root)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::Operator(make_loop())); + expected.children.push(Node::new_leaf(Some("d".into()))); + expected.children.push(Node::new_leaf(Some("e".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + #[test] fn is_valid_test() { // SEQ() is not valid diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs index 0834592..6eff849 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs @@ -230,7 +230,7 @@ mod test_parallel_cut { let dfg = DirectlyFollowsGraph::discover( &event_log!(["a", "b", "c"], ["b", "a", "c"]) ); - let cut = concurrent_cut(&dfg, &None); + let _cut = concurrent_cut(&dfg, &None); } #[test] diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs index e3178ca..429e597 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs @@ -100,7 +100,7 @@ fn redo_loop_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec(dfg: &'a DirectlyFollowsGraph<'_>, _parameters: return None; } - // at this point we could check whether the sequence satisfies the conditions for a strict sequence cut - // if there is more than one sequence block, a cut is found successfully - if sequences.len() > 1 { - Some(Cut::new(OperatorType::Sequence, sequences)) - } else { - None - } + Some(Cut::new(OperatorType::Sequence, sequences)) } #[cfg(test)] diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs index dbb1d0c..bf141e5 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs @@ -134,7 +134,8 @@ pub fn activity_once_per_trace( continue 'activity_loop; } } else { - // activity did not appear in the trace or in the event log at all + // activity did not appear in this trace → condition violated + continue 'activity_loop; } } // at this point the activity has appeared precisely one time in every trace diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs index 2d31d0a..76aee79 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs @@ -36,8 +36,7 @@ pub fn inductive_miner_default_parameters(log: EventLog, event_log_classifier: & // uses default parameters while for mining the process tree model let parameters = Parameter::generate_default_parameters(); let node = build_tree(log, event_log_classifier, ¶meters, 0); - // node.fold(); // as default parameters contain to fold the process tree - ProcessTree::new(node) + ProcessTree::new(node).fold() } @@ -114,20 +113,19 @@ fn fallthrough_finder(log: EventLog, event_log_classifier: &EventLogClassifier, node } Fallthrough::ActivityConcurrent(mut node, filtered_out_log, split) => { - // the filtered out log are all the logs containing all traces and therefore all events where the chosen activity occurred - node.add_child(build_tree(filtered_out_log, event_log_classifier, parameters,depth+1)); + // The AND-node already holds the concurrent activity as its first child. + // Build the sub-tree for the extracted (concurrent) activity. + node.add_child(build_tree(filtered_out_log, event_log_classifier, parameters, depth + 1)); - // the split is already performed in the activity concurrent fall through to save one unnecessary find_cut iteration + // The split was already performed inside the fallthrough; add each + // resulting sub-log as a further grand-children let operator_type = split.get_operator().clone(); let split = split.get_own(); - - let mut node = Node::new_operator(operator_type); - // this could be done in parallel - // every event log yields one process node - for log in split{ - // convert every log into one process node catching the behavior - node.add_child(build_tree(log, &event_log_classifier, parameters, depth+1)); + let mut child = Node::new_operator(operator_type); + for log in split { + child.add_child(build_tree(log, event_log_classifier, parameters, depth + 1)); } + node.add_child(child); node } Fallthrough::FlowerModel(node) => { node} // not much to do, this is the default From d3ee472181cd2100ca21038d946b3232188ba069 Mon Sep 17 00:00:00 2001 From: Fabian Sandkuhl Date: Wed, 20 May 2026 15:41:53 +0200 Subject: [PATCH 4/6] deal with multiple taus in fold --- .../process_tree/process_tree_struct.rs | 83 +++++++++++++++---- .../case_centric/inductive_miner_app/mod.rs | 80 ++++++++++++++++++ 2 files changed, 148 insertions(+), 15 deletions(-) diff --git a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs index 50f41f2..3d0b06b 100644 --- a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs +++ b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs @@ -85,7 +85,7 @@ impl Node { // If the current operator is associative, inline any child // that carries the same operator type. - let children = if op.operator_type.is_associative() { + let mut children = if op.operator_type.is_associative() { let mut flattened = Vec::with_capacity(folded_children.len()); for child in folded_children { match child { @@ -105,6 +105,21 @@ impl Node { folded_children }; + // XOR-specific: at most one tau (silent leaf) is semantically + // meaningful as a direct child. Remove duplicates introduced + // when EmptyTraces fallthrough shells are folded upward. + if op.operator_type == OperatorType::ExclusiveChoice { + let tau = Node::Leaf(Leaf { activity_label: LeafLabel::Tau }); + let mut tau_seen = false; + children.retain(|c| { + if *c == tau { + if tau_seen { return false; } + tau_seen = true; + } + true + }); + } + Node::Operator(Operator { operator_type: op.operator_type, children, @@ -114,6 +129,58 @@ impl Node { } } +impl std::fmt::Display for OperatorType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OperatorType::Sequence => write!(f, "SEQ"), + OperatorType::ExclusiveChoice => write!(f, "XOR"), + OperatorType::Concurrency => write!(f, "AND"), + OperatorType::Loop => write!(f, "LOOP"), + } + } +} + +impl std::fmt::Display for LeafLabel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LeafLabel::Activity(s) => write!(f, "{s}"), + LeafLabel::Tau => write!(f, "tau"), + } + } +} + +impl std::fmt::Display for Leaf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.activity_label) + } +} + +impl std::fmt::Display for Node { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Node::Leaf(leaf) => write!(f, "{leaf}"), + Node::Operator(op) => write!(f, "{op}"), + } + } +} + +impl std::fmt::Display for Operator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}(", self.operator_type)?; + for (i, child) in self.children.iter().enumerate() { + if i > 0 { write!(f, ", ")?; } + write!(f, "{child}")?; + } + write!(f, ")") + } +} + +impl std::fmt::Display for ProcessTree { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.root) + } +} + /// /// Operator type enum for [`Operator`] /// @@ -167,23 +234,9 @@ impl ProcessTree { /// Folds the process tree by merging nodes whose operator is associative. /// - /// For the associative operators [`Sequence`](OperatorType::Sequence), - /// [`ExclusiveChoice`](OperatorType::ExclusiveChoice), and - /// [`Concurrency`](OperatorType::Concurrency) the following identity holds: - /// - /// ```text - /// OP(OP(a, b), c) ≡ OP(a, b, c) - /// ``` - /// /// The fold is applied recursively bottom-up across the entire tree, so /// arbitrarily deep chains of the same associative operator are fully /// collapsed into a single flat node. - /// - /// [`Loop`](OperatorType::Loop) nodes are **not** folded because their - /// child positions carry different semantic roles. - /// - /// # Returns - /// A new [`ProcessTree`] with all associative operator chains collapsed. pub fn fold(self) -> Self { ProcessTree::new(self.root.fold()) } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs index 76aee79..aafe7a6 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs @@ -262,3 +262,83 @@ mod tests { assert_eq!(node.root, expected); } } + +/// Integration tests that require external test data files. +/// Run with: cargo test sepsis -- --nocapture +#[cfg(test)] +mod integration_tests { + use crate::core::event_data::case_centric::xes::import_xes::import_xes_path; + use crate::core::event_data::case_centric::xes::import_xes::XESImportOptions; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::discovery::case_centric::inductive_miner_app::inductive_miner_default_parameters; + use crate::test_utils::get_test_data_path; + + /// Mine the Sepsis Cases event log and print the resulting process tree + /// in canonical parenthesis notation (SEQ/XOR/AND/LOOP/tau). + /// + /// The output can be compared against pm4py: + /// python3 pm4py_sepsis_tree.py + /// + /// The test asserts: + /// 1. The log can be imported (1050 traces). + /// 2. The discovered tree passes is_valid(). + /// 3. Every activity in the log appears as a leaf in the tree. + #[test] + fn test_sepsis_tree_is_valid_and_print() { + let path = get_test_data_path() + .join("xes") + .join("Sepsis Cases - Event Log.xes.gz"); + + if !path.exists() { + eprintln!( + "Skipping Sepsis integration test: file not found at {:?}.\n\ + Download the test data from https://rwth-aachen.sciebo.de/s/4cvtTU3lLOgtxt1 \ + and place it under process_mining/test_data/xes/", + path + ); + return; + } + + let log = import_xes_path(&path, XESImportOptions::default()) + .expect("Failed to import Sepsis log"); + assert_eq!(log.traces.len(), 1050, "Expected 1050 traces"); + + let classifier = EventLogClassifier::default(); + let tree = inductive_miner_default_parameters(log.clone(), &classifier); + + // --- validity --- + assert!(tree.is_valid(), "Discovered tree is not valid"); + + // --- all activities present --- + let mut expected_activities: std::collections::HashSet = std::collections::HashSet::new(); + for trace in &log.traces { + for event in &trace.events { + expected_activities.insert(classifier.get_class_identity(event)); + } + } + let tree_leaves: std::collections::HashSet = tree + .find_all_leaves() + .into_iter() + .filter_map(|l| { + use crate::core::process_models::case_centric::process_tree::process_tree_struct::LeafLabel; + match &l.activity_label { + LeafLabel::Activity(s) => Some(s.clone()), + LeafLabel::Tau => None, + } + }) + .collect(); + + for act in &expected_activities { + assert!( + tree_leaves.contains(act), + "Activity '{act}' is missing from the process tree" + ); + } + + // --- print for manual comparison --- + println!("\n=== Rust Inductive Miner – Sepsis Cases ==="); + println!("Unique activities in log : {}", expected_activities.len()); + println!("Leaves in tree : {}", tree_leaves.len()); + println!("\nProcess tree (canonical):\n{}", tree); + } +} From d2b0687eb11a9aa65cec8b3001486ca54e50e87d Mon Sep 17 00:00:00 2001 From: Fabian Sandkuhl Date: Mon, 1 Jun 2026 10:53:09 +0200 Subject: [PATCH 5/6] Removed ports --- .../cut_finder/concurrent.rs | 148 +----------- .../cut_finder/loop_cut.rs | 94 +------- .../case_centric/inductive_miner_app/mod.rs | 80 ------- .../splits/exclusive_choice.rs | 120 ++-------- .../inductive_miner_app/splits/redo_loop.rs | 93 +------- .../inductive_miner_app/splits/sequence.rs | 110 +-------- .../structures/components.rs | 211 ------------------ .../inductive_miner_app/structures/mod.rs | 1 - 8 files changed, 33 insertions(+), 824 deletions(-) delete mode 100644 process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs index 6eff849..db96c18 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs @@ -1,118 +1,11 @@ //! Utility for detecting a concurrency cut in a given Directly Follows Graph. -//! -//! This implementation ports the parallel cut algorithm as implemented in -//! the ProM framework (`InductiveMiner`), originally written in Java. -//! -//! Reference: -//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -//! Application of Concurrency to System Design (ACSD), 2013. -//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -//! University of Technology, 09.05.2017 -//! - ProM source code: -//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMConcurrent.java use std::borrow::Cow; use std::collections::HashSet; use crate::core::process_models::dfg::DirectlyFollowsGraph; use crate::core::process_models::process_tree::OperatorType; use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; -use crate::discovery::case_centric::inductive_miner_app::structures::components::Components; use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance; -// Following the definition of an parallel cut, every element has to be either a starting activity or an end activity. -// Also, every element has to be connected to each other element - like a mesh - -// Example -// / -> A -> B -\ -// START -->| |-> END -// \ -> B -> A -/ -// . - - - -/// Ensures that every resulting component has both start and end activities, -/// a concurrent cut only makes sense if every isolated component can be entered or left independently. -/// -/// To do this, this functions categorizes each connected component into one of four categories: -/// (start & end, start only, end only, neither start nor end). -/// Every not start & end category components is merged with an arbitrary component (here the first one) -/// containing both start & end activities. -#[allow(dead_code)] -fn ensure_start_end_in_each<'a>( - dfg: &'a DirectlyFollowsGraph<'_>, - con_components: Vec>>, -) -> Option>>> { - // create for different classes of components - - let mut start_end = Vec::new(); - let mut start = Vec::new(); - let mut end = Vec::new(); - let mut neither = Vec::new(); - - for component in con_components { - let has_start = component - .iter() - .any(|act| dfg.start_activities.contains(act.as_ref())); - let has_end = component - .iter() - .any(|act| dfg.end_activities.contains(act.as_ref())); - - match (has_start, has_end) { - (true, true) => { - // components which have both start and end activities - start_end.push(component); - } - (true, false) => { - // components which contain start and no end activity - start.push(component); - } - (false, true) => { - // components which contains no start but end activities - end.push(component); - } - (false, false) => { - // neither start nor end activities in this components - neither.push(component); - } - } - } - - // no component with start and end -> no parallel cut - if start_end.len() == 0 { - return None; - } - - // Start building final components - let mut result = start_end; - - loop { - match (start.pop(), end.pop()) { - // combine start-only and end-only components - (Some(mut start), Some(end)) => { - start.extend(end); - result.push(start); - } - - (Some(start), None) => { - // add remaining start only components to any component - (&mut result[0]).extend(start); - } - (None, Some(end)) => { - // add remaining end only components to any component - (&mut result[0]).extend(end); - } - (None, None) => { - // add components that have neither start nor end - for component in neither { - (&mut result[0]).extend(component) - } - // no components left -> break the loop - break; - } - } - } - Some(result) -} ///Partitions activities into components, such that activities in different components can occur /// concurrently. Two activities are in the same component if they are not bidirectionally reachable. @@ -123,46 +16,7 @@ fn ensure_start_end_in_each<'a>( /// /// # Parameters fn concurrent_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: &Option) -> Option>>> { - let activities: Vec> = dfg.activities.keys().map(|act| Cow::from(act)).collect(); - if activities.is_empty() { - return None; - } - - // merge activities into components (based on which other activities are reachable) - let mut components = Components::new(&activities); - - for (i, act1) in activities.iter().enumerate() { - for (j, act2) in activities.iter().enumerate() { - // do not do that for the same activity - if i < j && !components.same_component(act1, act2) { - // merge only bidirectional activities - let has_a1_a2 = dfg.contains_df_relation((act1.clone(), act2.clone())); - let has_a2_a1 = dfg.contains_df_relation((act2.clone(), act1.clone())); - - if !has_a1_a2 || !has_a2_a1 { - components.merge_components_of(act1, act2); - } - } - } - } - - // optional minimum self distance - if let Some(mindist) = mindist { - for activity1 in activities.iter(){ - if let Some(mindist) = mindist.get_minimum_distance(activity1){ - for activity2 in &mindist.1{ - components.merge_components_of(activity1, activity2.as_str()); - } - } - } - } - - let components = components.get_components(); - if components.len() > 1 { - ensure_start_end_in_each(dfg, components) - } else { - None - } + todo!() } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs index 429e597..33d4631 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs @@ -1,24 +1,10 @@ //! Utility for detecting a loop cut in a Directly Follows Graph. -//! -//! -//! # Implementation Notes -//! This implementation ports the Loop cut finder algorithm as implemented in -//! the ProM framework (`InductiveMiner`), originally written in Java. -//! -//! Reference: -//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -//! Application of Concurrency to System Design (ACSD), 2013. -//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -//! University of Technology, 09.05.2017 -//! - ProM source code: -//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/cutfinders/CutFinderIMLoop.java + use std::borrow::Cow; use std::collections::HashSet; use crate::core::process_models::dfg::DirectlyFollowsGraph; use crate::core::process_models::process_tree::OperatorType; use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; -use crate::discovery::case_centric::inductive_miner_app::structures::components::Components; /// Attempts to find a loop cut in a given Directly Follows Graph (DFG). @@ -40,83 +26,7 @@ use crate::discovery::case_centric::inductive_miner_app::structures::components: /// # Panic /// Panics if the dfg contains no start activity fn redo_loop_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec>> { - // activities - let nodes: Vec> = dfg.activities.iter().map(|(act, _)| Cow::from(act)).collect(); - let mut components = Components::new(&nodes); - - // start element as pivot element -> safe unwrap as there has to be at least one start element - let pivot = dfg.start_activities.iter().next().unwrap(); - for start in &dfg.start_activities { - components.merge_components_of(pivot, start); - } - for end in &dfg.end_activities { - components.merge_components_of(pivot, end); - } - - // merge inner components - for ((v0, v1), _) in &dfg.directly_follows_relations { - let v0_is_start = dfg.start_activities.contains(v0.as_ref()); - let v0_is_end = dfg.end_activities.contains(v0.as_ref()); - let v1_is_start = dfg.start_activities.contains(v1.as_ref()); - - if !v0_is_start { - if !v0_is_end { - if !v1_is_start { - components.merge_components_of(v0, v1); - } - } - } else if v0_is_end { - components.merge_components_of(v0, v1); - } - } - - // create sub end and sub start activities - let mut sub_end_activities = HashSet::new(); - let mut sub_start_activities = HashSet::new(); - - // sort edges into components - for ((v0, v1), _) in &dfg.directly_follows_relations { - if components.same_component(&v0, &v1) { - sub_start_activities.insert(v0); - sub_end_activities.insert(v1); - } - } - - // check if sub-end-activities are connected to all start activities - for sub_end in sub_end_activities { - for start in &dfg.start_activities { - if components.same_component(sub_end, start) { - break; - } - if !dfg.contains_df_relation((sub_end.clone(), start.into())) { - components.merge_components_of(sub_end, start); - break; - } - } - } - - for sub_start in sub_start_activities { - for end_activity in dfg.end_activities.iter() { - if components.same_component(&sub_start, &end_activity) { - break; - } - if !dfg.contains_df_relation((end_activity.clone(), sub_start.to_string())) { - components.merge_components_of(sub_start, end_activity); - break; - } - } - } - - // reorder so that pivot comes first - let mut partition = components.get_components(); - let pivot = Cow::Owned(pivot.to_string()); - if let Some(pos) = partition.iter().position(|set| set.contains(&pivot)) { - partition.swap(0, pos); - } - - partition - - // check whether those sub component belongs to the do or the redo + todo!() } /// Attempts to find a Loop cut in a given DFG. diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs index aafe7a6..76aee79 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs @@ -262,83 +262,3 @@ mod tests { assert_eq!(node.root, expected); } } - -/// Integration tests that require external test data files. -/// Run with: cargo test sepsis -- --nocapture -#[cfg(test)] -mod integration_tests { - use crate::core::event_data::case_centric::xes::import_xes::import_xes_path; - use crate::core::event_data::case_centric::xes::import_xes::XESImportOptions; - use crate::core::event_data::case_centric::EventLogClassifier; - use crate::discovery::case_centric::inductive_miner_app::inductive_miner_default_parameters; - use crate::test_utils::get_test_data_path; - - /// Mine the Sepsis Cases event log and print the resulting process tree - /// in canonical parenthesis notation (SEQ/XOR/AND/LOOP/tau). - /// - /// The output can be compared against pm4py: - /// python3 pm4py_sepsis_tree.py - /// - /// The test asserts: - /// 1. The log can be imported (1050 traces). - /// 2. The discovered tree passes is_valid(). - /// 3. Every activity in the log appears as a leaf in the tree. - #[test] - fn test_sepsis_tree_is_valid_and_print() { - let path = get_test_data_path() - .join("xes") - .join("Sepsis Cases - Event Log.xes.gz"); - - if !path.exists() { - eprintln!( - "Skipping Sepsis integration test: file not found at {:?}.\n\ - Download the test data from https://rwth-aachen.sciebo.de/s/4cvtTU3lLOgtxt1 \ - and place it under process_mining/test_data/xes/", - path - ); - return; - } - - let log = import_xes_path(&path, XESImportOptions::default()) - .expect("Failed to import Sepsis log"); - assert_eq!(log.traces.len(), 1050, "Expected 1050 traces"); - - let classifier = EventLogClassifier::default(); - let tree = inductive_miner_default_parameters(log.clone(), &classifier); - - // --- validity --- - assert!(tree.is_valid(), "Discovered tree is not valid"); - - // --- all activities present --- - let mut expected_activities: std::collections::HashSet = std::collections::HashSet::new(); - for trace in &log.traces { - for event in &trace.events { - expected_activities.insert(classifier.get_class_identity(event)); - } - } - let tree_leaves: std::collections::HashSet = tree - .find_all_leaves() - .into_iter() - .filter_map(|l| { - use crate::core::process_models::case_centric::process_tree::process_tree_struct::LeafLabel; - match &l.activity_label { - LeafLabel::Activity(s) => Some(s.clone()), - LeafLabel::Tau => None, - } - }) - .collect(); - - for act in &expected_activities { - assert!( - tree_leaves.contains(act), - "Activity '{act}' is missing from the process tree" - ); - } - - // --- print for manual comparison --- - println!("\n=== Rust Inductive Miner – Sepsis Cases ==="); - println!("Unique activities in log : {}", expected_activities.len()); - println!("Leaves in tree : {}", tree_leaves.len()); - println!("\nProcess tree (canonical):\n{}", tree); - } -} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs index b02d41a..07eab43 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs @@ -1,18 +1,5 @@ //! Utility for splitting an event log according to an exclusive choice cut. -//! -//! -//! # Implementation Notes -//! This implementation adopts the xor-split algorithm as implemented in -//! the ProM framework (`InductiveMiner`), originally written in Java. -//! -//! Reference: -//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -//! Application of Concurrency to System Design (ACSD), 2013. -//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -//! University of Technology, 09.05.2017 -//! - ProM source code: -//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterXorFiltering.java + use std::collections::HashMap; use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice; @@ -20,115 +7,44 @@ use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; use crate::EventLog; - - - /// This functions splits an event log according to a provided valid xor cut. -/// -/// # Parameters -/// - 'log': the event log to split -/// - 'activity_classifier': the classifier to identify the activities in the events -/// - 'cut': the previously found sequence cut (check the operator) -/// -/// # Returns -/// - Some(Split) containing as many logs as the number of partitions in the split. -/// - None if the cut was not a sequence cut nor valid pub fn xor_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { if cut.get_operator() != ExclusiveChoice || cut.is_empty() { - // if this is not the demanded operator, return none return None; } + let k = cut.len(); - - // get partitions from cut let partition = cut.get_own(); + let mut result: Vec = vec![log.clone_without_traces(); k]; - // According to the pseudocode in "Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven - // University of Technology, 09.05.2017" the algorithm splits the log into several sublogs, by only adding the trace t_i to the sublog L_i - // if the partition p_i contains all events of t_i - - // assume a cut / partitions like {{A,B}, {C}, {D},{E}} - - // assign every activity an index for faster access later - activites in the same partition get the same index - // if the assumed cut is used, you would get a map like - //{ #activity -> index - // A -> 0, - // B -> 0, - // C -> 1, - // D -> 2, - // E -> 2 - // } - // - let mut activity_partition_idx_map = HashMap::new(); - for (idx, activity_set) in partition.iter().enumerate() { - for act in activity_set{ - // every unique activity gets another index - activity_partition_idx_map.insert(act.clone(), idx); - } - } - - // produce result vector with k empty logs - let mut result: Vec = (0..k).map(|_| EventLog::new()).collect(); - - // iterate over every tracce, for the example assume a trace [A,A, B, A, B,B] - for trace in log.traces.iter(){ + for trace in log.traces.iter() { let mut counts = vec![0usize; k]; - //count incidents of activities within a partition of the trace - // for the example trace above we would get a counts-vec : [6,0,0] as all events occur in the - // very first partition, the latter partitions contain no activity which occurs here - for event in trace.events.iter(){ + for event in trace.events.iter() { let activity = activity_classifier.get_class_identity(event); - if let Some(idx) = activity_partition_idx_map.get(activity.as_str()){ - if *idx >= counts.len(){ - eprintln!("Length matches exactly index! index: {}, counts: {:?}\n activity: {}\n map{:?} ", *idx,counts, activity, activity_partition_idx_map); - } - counts[*idx] += 1; + if let Some(idx) = partition.iter().position(|p| p.contains(activity.as_str())) { + counts[idx] += 1; } } - // get the partition, which contains the maximum occurrences in count - // for the example it is the partition at index 0 in count as 6 > 0 - let max = if trace.events.is_empty(){ + let target_idx = if trace.events.is_empty() { None } else { - let mut max_idx = 0; // index of activity having most incidents - let mut max_val = 0; // actual activity with most incidents - - for (i, count) in counts.iter().enumerate(){ - // a tie within the same trace should not occur, because this is a xor cut (maybe in noisy loops??) - if *count > max_val{ - max_val = *count; - max_idx =i; - } - } - - Some(max_idx) + counts.iter().enumerate().max_by_key(|&(_, count)| count).map(|(i, _)| i) }; - // build new sublog - iterate over all indexes, to keep empty traces in every possible sublog, if there is one - for sublog_idx in 0..k{ // iterate over partition size - - // only do this - if let Some(winning_partition) = max { - if winning_partition != sublog_idx { - // remove trace from this sublog + for sublog_idx in 0..k { + if let Some(winning) = target_idx { + if winning != sublog_idx { continue; - }// else we got the index of the activity within the trace which appears mostly - } // else trace is empty (max == None) - - let mut new_trace = trace.clone(); // clone current trace - // Filter events: keep only those - new_trace.events.retain(|e| { - // keep only the events of the trace, which appear in the winning partition - if let Some(act_idx) = activity_partition_idx_map.get(activity_classifier.get_class_identity(e).as_str()){ - sublog_idx == *act_idx - } else { - false } + } + + let mut new_trace = trace.clone(); + new_trace.events.retain(|e| { + let act = activity_classifier.get_class_identity(e); + partition[sublog_idx].contains(act.as_str()) }); - - // push new trace to trace vec result[sublog_idx].traces.push(new_trace); } } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs index ac1aa04..40928bc 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs @@ -1,18 +1,4 @@ //! Utility for splitting a log according to a loop cut -//! -//! -//! # Implementation Notes -//! This implementation adopts the loop-splitting algorithm as implemented in -//! the ProM framework (`InductiveMiner`), originally written in Java. -//! -//! Reference: -//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: -//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." -//! Application of Concurrency to System Design (ACSD), 2013. -//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven -//! University of Technology, 09.05.2017 -//! - ProM source code: -//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterLoop.java use std::collections::HashMap; use crate::EventLog; @@ -20,89 +6,18 @@ use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::OperatorType::Loop; use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; -/// Splits an event log according to the partition of a Loop-cut. -/// -/// Recall that a loop cut identifies a structure consisting of a main body (do-part) and at least one redo part. -/// The partitions of the cut represent activity sets that belong to different segments of the loop structure. -/// The first partition belongs to the do segment. -/// -/// Creates one sub log for each partition in the cut -/// Iterates over every trace, grouping activities to the same sub trace as long as they belong to the same partition. -/// If a partition changes the current sub trace is finalized and added to the sub log -/// -/// # Returns -/// Some(split) containing filtered traces -/// None if the cut is not a valid loop cut -/// -/// # Notes -/// - number of traces in each sublog may differ -/// - event order is preserved -/// - activities not encountered in any partition are being ignored +/// Splits an event log according to the partition of a Loop-cut. pub fn loop_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { if Loop != cut.get_operator() { return None; } - // Prologue - preparations + let k = cut.len(); - let mut result: Vec = Vec::with_capacity(k); - - // Create empty sublogs - for _ in 0..k { - result.push(log.clone_without_traces()); - } - // get partitions + let mut result: Vec = vec![log.clone_without_traces(); k]; let partitions = cut.get_own(); - // Pre-map activities to partition index for fast lookup - just transfer activity to index of set - let mut activity_to_log_map = HashMap::new(); - for (i, part) in partitions.iter().enumerate() { - // at least two partitions, if more loops there can be more - for a in part { - activity_to_log_map.insert(a.clone(), i); - } - } - - // iterate over each trace of the original log - for trace in &log.traces { - //each sublogs gets one clean trace - let mut sub_trace = trace.clone_without_events(); - - let mut last_partition: Option = None; // init too None to signal the start of a new trace - - for event in &trace.events { - let activity = classifier.get_class_identity(event); - - // get the log index / the index of the partition the activity is part of (exactly one partition) - let Some(log_index) = activity_to_log_map.get(activity.as_str()) else { - eprintln!("Encountered unexpeceted activity {} in loop splitter using the following cut {:?}: on event log.", activity, partitions); - // if the activity is not in the block, this means that it's not part of the loop - it shouldn't be in here - continue; - }; - - if last_partition.is_some() && last_partition.unwrap() != *log_index { - // if the last partition is not the same as in the block index of the current activity, - // we need to create a new sub_trace and push the last one to the existing ones - - // as last_partition is some, we can just push the trace to the result log index at last partiton - result[last_partition.unwrap()].traces.push(sub_trace); - sub_trace = trace.clone_without_events(); - } - // At the current state, the event belongs to the subtrace of the log_index which - - // push current activity to sub_trace of block_index sublog - sub_trace.events.push(event.clone()); - // update the last partition - last_partition = Some(*log_index); - } - // at this point we have a sub_trace which is empty or contains at least one element, - // if the last_partition variable is set, there is at least one element in the log - if last_partition.is_some() { - result[last_partition.unwrap()].traces.push(sub_trace); - } else { - // trace is empty, nothing to do - } - } + todo!(); Some(Split::new(Loop, result)) } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs index f1fef4b..184cea1 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs @@ -1,8 +1,5 @@ //! Utility for resolving sequence cuts into sequence splits. //! -//! # Implementation Notes -//! Port of the sequence-split algorithm as implemented in -//! the ProM framework (`InductiveMiner`), originally written in Java. //! //! # Reference: //! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: @@ -10,53 +7,14 @@ //! Application of Concurrency to System Design (ACSD), 2013. //! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven //! University of Technology, 09.05.2017 -//! - ProM source code: -//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/framework/logsplitter/LogSplitterSequenceFiltering.javang + use std::borrow::Cow; -use std::collections::HashSet; -use std::ops::Deref; -use crate::core::event_data::case_centric::{EventLogClassifier, Trace}; +use crate::core::event_data::case_centric::EventLogClassifier; use crate::core::process_models::process_tree::OperatorType::Sequence; use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; use crate::EventLog; -/// Finds the postion inside a trace for a split, that most strongly matches a given activity partition. -fn find_optimal_split( - trace: &Trace, - partition: &HashSet>, - start_pos: usize, - ignore: &HashSet, - classifier: &EventLogClassifier, -) -> usize { - let mut position_least_cost = start_pos; // default - let mut least_cost = 0; - let mut cost: i32 = 0; - let mut position = start_pos; - - // iterate through events of trace from start position to end - while position < trace.events.len() { - // get string activity attribute - let activity = classifier.get_class_identity(&trace.events[position]); - - if ignore.contains(&activity) { - // skip: contributes nothing to cost - } else if partition.contains(activity.as_str()) { - // decrease cost - cost -= 1; - } else { - cost += 1; - } - - position += 1; - - if cost < least_cost { - least_cost = cost; - position_least_cost = position; - } - } - position_least_cost -} /// Splits an event log according to the partitions of a sequence cut. /// /// # Returns @@ -67,69 +25,17 @@ pub fn sequence_split<'a>( activity_classifier: &EventLogClassifier, cut: Cut<'a>, ) -> Option { - - if cut.get_operator() != Sequence{ + if cut.get_operator() != Sequence { return None; } - // create results vec with empty event logs - let k = cut.len(); - let mut result: Vec = Vec::with_capacity(k); - - for _ in 0..k { - // clone log structure - safe attributes of log and traces, but without events - let mut sub_log = log.clone_without_traces(); - for trace in &log.traces { - sub_log.traces.push(trace.clone_without_events()); - } - result.push(sub_log); - } - // get partitions + let k = cut.len(); let partitions = cut.get_own(); - for (trace_idx, trace) in log.traces.iter().enumerate() { - let mut curr_position = 0; - let mut ignore: HashSet = HashSet::new(); - - for (partition_idx, partition) in partitions.iter().enumerate() { - let new_postion = if partition_idx + 1 < k { - find_optimal_split( - trace, - partition, - curr_position, - &ignore, - activity_classifier, - ) - } else { - // only last partition gets here, it must finish the trace - trace.events.len() - }; - - // for positions in range [curr_postion, new_position) copy events that belong to the partition + + // Create k empty sublogs + let mut result: Vec = vec![log.clone_without_traces(); k]; - if new_postion > curr_position { - // destination trace in result[i] for trace_idx - let dest_trace = &mut result[partition_idx].traces[trace_idx]; - - for pos in curr_position..new_postion { - // get trace and retrieve activity - let event = &trace.events[pos]; - let activity = activity_classifier.get_class_identity(event); - - if partition.contains(activity.as_str()) { - dest_trace.events.push(event.clone()); - } - } - } - - // add events from current partition to ignore set - for act in partition { - ignore.insert(act.deref().to_string()); - } - - // update position - curr_position = new_postion; - } - } + todo!(); Some(Split::new(Sequence, result)) } diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs deleted file mode 100644 index 9042f8e..0000000 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/components.rs +++ /dev/null @@ -1,211 +0,0 @@ -//! Component management utilities. -//! -//! This module provides a lightweight structure for maintaining connected components (partitions) of -//! a set of nodes. Each node belongs to exactly one component and components can be merged dynamically. -//! -//! The structure is primarily used to represent partitions of activities during algorithms for finding -//! cuts in event logs. -//! -//! # Implementation notes -//! -//! This is a port of the component structure implementation in the ProM framework ((`InductiveMiner`), originally written in Java. -//! - ProM source code: -//! https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/helperclasses/graphs/IntComponents.java - -/// Port of the component structure implementation in -/// the ProM framework (`InductiveMiner`), originally written in Java. -/// -/// - ProM source code: -/// https://github.com/promworkbench/InductiveMiner/blob/main/src/org/processmining/plugins/inductiveminer2/helperclasses/graphs/IntComponents.java - - -use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; - - -/// Maintains a partition of nodes into components. -/// -/// Each node belongs to exactly one component. Components can be merged, queried and converted back -/// into explicit sets of nodes. -/// -/// Internally, nodes are mapped to integer indices to allow for efficient component operations. -#[derive(Debug)] -pub struct Components<'a> { - components: Vec, // component index of each node, get node index from map - node2index: HashMap, usize>, // index of every node in components - number_of_components: usize, -} - - -impl<'a> Components<'a> { - - /// Creates a new component structure where each node initially forms its own component. - pub fn new(nodes: &[Cow<'a, str>]) -> Self { - let mut node2index = HashMap::new(); - // every node gets it own index in the beginning - for (i, n) in nodes.iter().enumerate() { - // clone is very cheap if cow is borrowed - node2index.insert(n.clone(), i); - } - - let len = nodes.len(); - Components { - components: (0..len).collect(), - node2index, - number_of_components: len, - } - } - - - /// Returns the component index of a given node. - /// - /// Panics if the node is not contained in the component structure. - pub fn component_of(&self, node: &str) -> usize { - self.components[self.node2index[node]] - } - - /// Returns whether the nodes 'a' and 'b' are in the same component. - pub fn same_component(&self, a: &str, b: &str) -> bool { - self.component_of(a) == self.component_of(b) - } - - /// Merges the components containing the nodes 'a' and 'b'. - /// - /// If both nodes already are in the same component, the structure remains unchanged. - pub fn merge_components_of(&mut self, a: &str, b: &str) { - let ca = self.component_of(a); - let cb = self.component_of(b); - self.merge_components(ca, cb); - } - - /// Merge two components identified by their indices. - /// - /// All nodes belonging to the component 'ca' are reassigned to the component 'cb'. - pub fn merge_components(&mut self, ca: usize, cb: usize) { - if ca == cb { - return; - } - let mut changed = false; - for comp in self.components.iter_mut() { - if *comp == ca { - *comp = cb; - changed = true; - } - } - if changed { - self.number_of_components -= 1; - } - } - - /// Returns the current partitioning of nodes as explicit sets. - /// - /// Each element of the returned vector represents a component containing the nodes belonging - /// to that component. - pub fn get_components(&self) -> Vec>> { - let mut result: Vec>> = Vec::new(); - let mut map: HashMap = HashMap::new(); - let mut next_idx = 0; - - // assign normalized indexes - for comp in &self.components { - if !map.contains_key(comp) { - map.insert(*comp, next_idx); - result.push(HashSet::new()); - next_idx += 1; - } - } - - // fill components - for (node, idx) in &self.node2index { - let comp = self.components[*idx]; - let part = map[&comp]; - result[part].insert(node.clone()); - } - result - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashSet; - - #[test] - fn test_initial_components() { - let nodes = vec!["A".into(), "B".into(), "C".into()]; - let c = Components::new(&nodes); - - assert_eq!(c.number_of_components, 3); - assert!(!c.same_component("A", "B")); - assert!(!c.same_component("B", "C")); - assert!(!c.same_component("A", "C")); - } - - #[test] - fn test_simple_merge() { - let nodes = vec!["A".into(), "B".into(), "C".into()]; - let mut c = Components::new(&nodes); - - c.merge_components_of("A", "B"); - - assert!(c.same_component("A", "B")); - assert!(!c.same_component("A", "C")); - - assert_eq!(c.number_of_components, 2); - } - - #[test] - fn test_chain_merge() { - let nodes = vec!["A".into(), "B".into(), "C".into(), "D".into()]; - let mut c = Components::new(&nodes); - - c.merge_components_of("A", "B"); - c.merge_components_of("B", "C"); - - // All A,B,C should be in the same component - assert!(c.same_component("A", "C")); - assert!(c.same_component("A", "B")); - assert!(c.same_component("B", "C")); - - // D remains separate - assert!(!c.same_component("A", "D")); - - assert_eq!(c.number_of_components, 2); - } - - #[test] - fn test_merge_same_component_does_not_decrease_count() { - let nodes = vec!["A".into(), "B".into()]; - let mut c = Components::new(&nodes); - - c.merge_components_of("A", "B"); - assert_eq!(c.number_of_components, 1); - - // merging again should not decrease further - c.merge_components_of("A", "B"); - assert_eq!(c.number_of_components, 1); - } - - #[test] - fn test_get_components() { - let nodes = vec!["A".into(), "B".into(), "C".into(), "D".into()]; - let mut c = Components::new(&nodes); - - c.merge_components_of("A", "B"); - c.merge_components_of("C", "D"); - - let comps = c.get_components(); - - // each component should have 2 elements - let mut sets: Vec>> = comps.into_iter().collect(); - sets.sort_by_key(|s| s.len()); - - assert_eq!(sets.len(), 2); - - let first = &sets[0]; - let second = &sets[1]; - - assert!(first.contains("A") && first.contains("B") || first.contains("C") && first.contains("D")); - assert!(second.contains("A") && second.contains("B") || second.contains("C") && second.contains("D")); - } -} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs index 1c3eb51..7a8aa5d 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs @@ -1,4 +1,3 @@ //! This module contains additional structures needed for the implementation of the Inductive Miner. pub mod parameter; -pub mod components; pub mod minimum_self_distance; \ No newline at end of file From c76e1f4bb8ec76daa9f1b4416a04c1c637d08b86 Mon Sep 17 00:00:00 2001 From: Fabian Sandkuhl Date: Mon, 1 Jun 2026 10:58:51 +0200 Subject: [PATCH 6/6] xor-split to be safe --- .../splits/exclusive_choice.rs | 31 +------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs index 07eab43..bcc823c 100644 --- a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs @@ -17,37 +17,8 @@ pub fn xor_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, c let partition = cut.get_own(); let mut result: Vec = vec![log.clone_without_traces(); k]; - for trace in log.traces.iter() { - let mut counts = vec![0usize; k]; + todo!(); - for event in trace.events.iter() { - let activity = activity_classifier.get_class_identity(event); - if let Some(idx) = partition.iter().position(|p| p.contains(activity.as_str())) { - counts[idx] += 1; - } - } - - let target_idx = if trace.events.is_empty() { - None - } else { - counts.iter().enumerate().max_by_key(|&(_, count)| count).map(|(i, _)| i) - }; - - for sublog_idx in 0..k { - if let Some(winning) = target_idx { - if winning != sublog_idx { - continue; - } - } - - let mut new_trace = trace.clone(); - new_trace.events.retain(|e| { - let act = activity_classifier.get_class_identity(e); - partition[sublog_idx].contains(act.as_str()) - }); - result[sublog_idx].traces.push(new_trace); - } - } Some(Split::new(ExclusiveChoice, result)) }