diff --git a/process_mining/.idea/.gitignore b/process_mining/.idea/.gitignore new file mode 100644 index 0000000..ab1f416 --- /dev/null +++ b/process_mining/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/process_mining/.idea/vcs.xml b/process_mining/.idea/vcs.xml new file mode 100644 index 0000000..6c0b863 --- /dev/null +++ b/process_mining/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs index 543f39e..3d0b06b 100644 --- a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs +++ b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs @@ -14,7 +14,7 @@ pub enum LeafLabel { /// /// Node in a process tree /// -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, PartialEq)] pub enum Node { /// Operator node of a process tree Operator(Operator), @@ -64,12 +64,127 @@ impl Node { Node::Leaf(_) => true, } } + + /// Recursively folds this node by merging children that share the same + /// associative operator into the current node. + /// + /// The fold is applied **bottom-up**: children are folded first, and only + /// then the current node checks whether any of its (now-folded) children + /// can be inlined. + /// + /// **Example** – `SEQ(SEQ(a, b), c)` becomes `SEQ(a, b, c)`. + /// + /// Leaf nodes are returned unchanged. + pub fn fold(self) -> Self { + match self { + Node::Leaf(_) => self, + Node::Operator(op) => { + // Recursively fold all children first (bottom-up). + let folded_children: Vec = + op.children.into_iter().map(|child| child.fold()).collect(); + + // If the current operator is associative, inline any child + // that carries the same operator type. + let mut children = if op.operator_type.is_associative() { + let mut flattened = Vec::with_capacity(folded_children.len()); + for child in folded_children { + match child { + Node::Operator(ref inner) + if inner.operator_type == op.operator_type => + { + // Consume the child and move its children up. + if let Node::Operator(inner) = child { + flattened.extend(inner.children); + } + } + other => flattened.push(other), + } + } + flattened + } else { + folded_children + }; + + // XOR-specific: at most one tau (silent leaf) is semantically + // meaningful as a direct child. Remove duplicates introduced + // when EmptyTraces fallthrough shells are folded upward. + if op.operator_type == OperatorType::ExclusiveChoice { + let tau = Node::Leaf(Leaf { activity_label: LeafLabel::Tau }); + let mut tau_seen = false; + children.retain(|c| { + if *c == tau { + if tau_seen { return false; } + tau_seen = true; + } + true + }); + } + + Node::Operator(Operator { + operator_type: op.operator_type, + children, + }) + } + } + } +} + +impl std::fmt::Display for OperatorType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OperatorType::Sequence => write!(f, "SEQ"), + OperatorType::ExclusiveChoice => write!(f, "XOR"), + OperatorType::Concurrency => write!(f, "AND"), + OperatorType::Loop => write!(f, "LOOP"), + } + } +} + +impl std::fmt::Display for LeafLabel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LeafLabel::Activity(s) => write!(f, "{s}"), + LeafLabel::Tau => write!(f, "tau"), + } + } +} + +impl std::fmt::Display for Leaf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.activity_label) + } +} + +impl std::fmt::Display for Node { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Node::Leaf(leaf) => write!(f, "{leaf}"), + Node::Operator(op) => write!(f, "{op}"), + } + } +} + +impl std::fmt::Display for Operator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}(", self.operator_type)?; + for (i, child) in self.children.iter().enumerate() { + if i > 0 { write!(f, ", ")?; } + write!(f, "{child}")?; + } + write!(f, ")") + } +} + +impl std::fmt::Display for ProcessTree { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.root) + } } /// /// Operator type enum for [`Operator`] /// -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)] pub enum OperatorType { /// Sequence operator Sequence, @@ -81,6 +196,25 @@ pub enum OperatorType { Loop, } +impl OperatorType { + /// Returns `true` if this operator is associative. + /// + /// The associative operators are [`Sequence`](OperatorType::Sequence), + /// [`ExclusiveChoice`](OperatorType::ExclusiveChoice), and + /// [`Concurrency`](OperatorType::Concurrency). The [`Loop`](OperatorType::Loop) + /// operator is **not** associative because its first child (the body) and + /// subsequent children (the redo / exit branches) carry different semantic + /// roles, so merging nested loops would change the language. + pub fn is_associative(self) -> bool { + matches!( + self, + OperatorType::Sequence + | OperatorType::ExclusiveChoice + | OperatorType::Concurrency + ) + } +} + /// /// Object-centric process tree struct that contains [`Node`] as root /// @@ -98,6 +232,15 @@ impl ProcessTree { Self { root } } + /// Folds the process tree by merging nodes whose operator is associative. + /// + /// The fold is applied recursively bottom-up across the entire tree, so + /// arbitrarily deep chains of the same associative operator are fully + /// collapsed into a single flat node. + pub fn fold(self) -> Self { + ProcessTree::new(self.root.fold()) + } + /// /// Returns `true` if all nodes have the right number of children and if all operators have /// eventually descendants that are leaves. @@ -176,6 +319,7 @@ impl ProcessTree { /// An operator node in a process tree /// #[derive(Debug, Serialize, Deserialize)] +#[derive(PartialEq)] pub struct Operator { /// The [`OperatorType`] of the tree itself pub operator_type: OperatorType, @@ -221,6 +365,7 @@ impl Operator { /// /// A leaf in a process tree /// +#[derive(PartialEq)] pub struct Leaf { /// The silent or non-silent activity label [`LeafLabel`] pub activity_label: LeafLabel, @@ -250,6 +395,190 @@ mod tests { Leaf, Node, Operator, OperatorType, ProcessTree, }; + // ── folding tests ──────────────────────────────────────────────────────── + + #[test] + fn fold_flat_sequence_unchanged() { + // SEQ(a, b, c) has no nested SEQ — the tree must be returned as-is. + let mut seq = Operator::new(OperatorType::Sequence); + seq.children.push(Node::new_leaf(Some("a".into()))); + seq.children.push(Node::new_leaf(Some("b".into()))); + seq.children.push(Node::new_leaf(Some("c".into()))); + let pt = ProcessTree::new(Node::Operator(seq)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_nested_sequence() { + // SEQ(SEQ(a, b), c) → SEQ(a, b, c) + let mut inner = Operator::new(OperatorType::Sequence); + inner.children.push(Node::new_leaf(Some("a".into()))); + inner.children.push(Node::new_leaf(Some("b".into()))); + + let mut outer = Operator::new(OperatorType::Sequence); + outer.children.push(Node::Operator(inner)); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_deeply_nested_sequence() { + // SEQ(SEQ(SEQ(a, b), c), d) → SEQ(a, b, c, d) + let mut innermost = Operator::new(OperatorType::Sequence); + innermost.children.push(Node::new_leaf(Some("a".into()))); + innermost.children.push(Node::new_leaf(Some("b".into()))); + + let mut middle = Operator::new(OperatorType::Sequence); + middle.children.push(Node::Operator(innermost)); + middle.children.push(Node::new_leaf(Some("c".into()))); + + let mut outer = Operator::new(OperatorType::Sequence); + outer.children.push(Node::Operator(middle)); + outer.children.push(Node::new_leaf(Some("d".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + expected.children.push(Node::new_leaf(Some("d".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_xor_nested() { + // XOR(XOR(a, b), c) → XOR(a, b, c) + let mut inner = Operator::new(OperatorType::ExclusiveChoice); + inner.children.push(Node::new_leaf(Some("a".into()))); + inner.children.push(Node::new_leaf(Some("b".into()))); + + let mut outer = Operator::new(OperatorType::ExclusiveChoice); + outer.children.push(Node::Operator(inner)); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::ExclusiveChoice); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_concurrency_nested() { + // AND(AND(a, b), c) → AND(a, b, c) + let mut inner = Operator::new(OperatorType::Concurrency); + inner.children.push(Node::new_leaf(Some("a".into()))); + inner.children.push(Node::new_leaf(Some("b".into()))); + + let mut outer = Operator::new(OperatorType::Concurrency); + outer.children.push(Node::Operator(inner)); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Concurrency); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_does_not_merge_different_operators() { + // SEQ(XOR(a, b), c) — different operator, must stay unchanged. + // Build two identical XOR nodes: one for the input, one for expected. + let make_inner = || { + let mut xor = Operator::new(OperatorType::ExclusiveChoice); + xor.children.push(Node::new_leaf(Some("a".into()))); + xor.children.push(Node::new_leaf(Some("b".into()))); + xor + }; + + let mut outer = Operator::new(OperatorType::Sequence); + outer.children.push(Node::Operator(make_inner())); + outer.children.push(Node::new_leaf(Some("c".into()))); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected_outer = Operator::new(OperatorType::Sequence); + expected_outer.children.push(Node::Operator(make_inner())); + expected_outer.children.push(Node::new_leaf(Some("c".into()))); + assert_eq!(pt.root, Node::Operator(expected_outer)); + } + + #[test] + fn fold_does_not_merge_loop() { + // LOOP(LOOP(a, tau), tau) — Loop is not associative, must stay unchanged. + let make_inner = || { + let mut lp = Operator::new(OperatorType::Loop); + lp.children.push(Node::new_leaf(Some("a".into()))); + lp.children.push(Node::new_leaf(None)); + lp + }; + + let mut outer = Operator::new(OperatorType::Loop); + outer.children.push(Node::Operator(make_inner())); + outer.children.push(Node::new_leaf(None)); + + let pt = ProcessTree::new(Node::Operator(outer)).fold(); + + let mut expected = Operator::new(OperatorType::Loop); + expected.children.push(Node::Operator(make_inner())); + expected.children.push(Node::new_leaf(None)); + assert_eq!(pt.root, Node::Operator(expected)); + } + + #[test] + fn fold_mixed_tree() { + // SEQ( SEQ(a, b), LOOP(c, tau), SEQ(d, e) ) + // The two SEQ children get merged; the LOOP stays in place. + // Result: SEQ(a, b, LOOP(c, tau), d, e) + let make_loop = || { + let mut lp = Operator::new(OperatorType::Loop); + lp.children.push(Node::new_leaf(Some("c".into()))); + lp.children.push(Node::new_leaf(None)); + lp + }; + + let mut seq1 = Operator::new(OperatorType::Sequence); + seq1.children.push(Node::new_leaf(Some("a".into()))); + seq1.children.push(Node::new_leaf(Some("b".into()))); + + let mut seq2 = Operator::new(OperatorType::Sequence); + seq2.children.push(Node::new_leaf(Some("d".into()))); + seq2.children.push(Node::new_leaf(Some("e".into()))); + + let mut root = Operator::new(OperatorType::Sequence); + root.children.push(Node::Operator(seq1)); + root.children.push(Node::Operator(make_loop())); + root.children.push(Node::Operator(seq2)); + + let pt = ProcessTree::new(Node::Operator(root)).fold(); + + let mut expected = Operator::new(OperatorType::Sequence); + expected.children.push(Node::new_leaf(Some("a".into()))); + expected.children.push(Node::new_leaf(Some("b".into()))); + expected.children.push(Node::Operator(make_loop())); + expected.children.push(Node::new_leaf(Some("d".into()))); + expected.children.push(Node::new_leaf(Some("e".into()))); + assert_eq!(pt.root, Node::Operator(expected)); + } + #[test] fn is_valid_test() { // SEQ() is not valid diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs new file mode 100644 index 0000000..fe61edf --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs @@ -0,0 +1,51 @@ +//! This module contains utilities for detecting the base cases 'Empty' and 'Single Activity' used in the Inductive Miner. +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::EventLog; + + +/// Enum Representing whether and if so which type of base case was found. +#[derive(Debug)] +pub enum BaseCases { + None, // No base case is found + Empty, // the event log is completely empty + SingleActivity(String) // just one activity in every single trace in the event log +} + +/// Checks whether the base case single activity applies to the given event log. +/// The BaseCase applies if the event log only contains traces with precisely one event, +/// which must have the same activity attribute. +fn check_single_activity_case(log: &EventLog, classifier: &EventLogClassifier) -> Option { + let mut activity: Option = None; + for t in &log.traces{ + if t.events.len() != 1{ // catch empty traces + return None; + } + let act = classifier.get_class_identity(&t.events[0]); + if let Some(activity) = &activity{ + if act != *activity{ + return None; + } + } else { + activity = Some(act); + } + } + activity +} + +/// Checks whether a BaseCase applies to a given event log. +/// +/// There are two possible base cases: +/// - 'empty trace' where the entire event log consists of one single empty trace, +/// - 'single activity' where the entire event log consist of traces containing only one single event with the same activity attribute. +pub fn find_base_case(log: &EventLog, event_log_classifier: &EventLogClassifier) -> BaseCases { + + if log.traces.len() == 0{ + // this just checks for an empty event log, this means, even if there are only empty traces, this case case does not apply + BaseCases::Empty + } else if let Some(activity) = check_single_activity_case(log, event_log_classifier){ + BaseCases::SingleActivity(activity) + } else { + // no base case applied to this one + BaseCases::None + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs new file mode 100644 index 0000000..db96c18 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs @@ -0,0 +1,244 @@ +//! Utility for detecting a concurrency cut in a given Directly Follows Graph. + +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance; + +///Partitions activities into components, such that activities in different components can occur +/// concurrently. Two activities are in the same component if they are not bidirectionally reachable. +/// +/// Optionally, a minimum self distance constraint can further restrict concurrency, by +/// forcing activities, which are in a minimum self distance relation with other activities, +/// into the same component. +/// +/// # Parameters +fn concurrent_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: &Option) -> Option>>> { + todo!() +} + + + +/// Examines whether in a given Directly Follows Graph a concurrent cut can be applied. +/// +/// Public wrapper for [`concurrent_cut`] +/// +/// # Parameters +/// - 'dfg': the directly follows Graph which shall be examined +/// - 'mindist': Optional a minimum self distance constraint can be applied, by providing a Minimum self distance struct. +/// # Returns +/// - a cut struct containing at least 2 components of concurrent activities +/// - None, otherwise (this means a concurrent cut can not be applied) +pub fn concurrent_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: Option) -> Option> { + // if there are not start or end activities, there is no cut + if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() { + return None; + } + + let result = concurrent_cut(dfg, &mindist); + if let Some(result) = result { + if result.len() <= 1 { + None + } else { + Some(Cut::new(OperatorType::Concurrency, result)) + } + } else { + None + } +} + +#[cfg(test)] +mod test_parallel_cut { + use std::borrow::Cow; + use std::collections::{HashMap, HashSet}; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::{concurrent_cut, concurrent_cut_wrapper}; + use crate::event_log; + + #[test] + fn test_leeman_example() { + let log = event_log!( + ["a", "b", "c"], + ["a", "c", "b"], + ["c", "a", "b"] + ); + let dfg = &DirectlyFollowsGraph::discover(&log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + let mut partitions = cut.unwrap().get_own(); + // sort to ensure order + partitions.sort_by(|x,y| x.len().cmp(&y.len())); + assert_eq!( + partitions, + Vec::from([ + HashSet::from(["c".into()]), + HashSet::from(["a".into(), "b".into()]) + ]) + ); + } + + #[test] + fn test_parallel_cut_with_trailing_activity() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!(["a", "b", "c"], ["b", "a", "c"]) + ); + let _cut = concurrent_cut(&dfg, &None); + } + + #[test] + fn test_easy_parallel_cut_wrapper() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!(["a", "b"], ["b", "a"]) + ); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + assert_eq!(cut.unwrap().len(), 2); + } + + #[test] + fn test_three_branch_parallel() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!( + ["a", "b"], + ["b", "c"], + ["c", "a"], + ["a", "c"], + ["b", "a"], + ["c", "b"] + ) + ); + + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + + let parts = cut.unwrap(); + assert_eq!(parts.len(), 3); + + let flattened: HashSet> = parts.partitions + .iter() + .flat_map(|p| p.iter().map(|s| s.clone())) + .collect(); + + assert!(flattened.contains("a")); + assert!(flattened.contains("b")); + assert!(flattened.contains("c")); + } + + #[test] + fn test_sequence_cut_in_parallel() { + let dfg = DirectlyFollowsGraph::discover( + &event_log!(["a", "b", "c"], ["a", "d", "c"]) + ); + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_hard_parallel_cut_multiple_starts_and_endings() { + let mut dfg = DirectlyFollowsGraph::new(); + dfg.activities = HashMap::from([("a".into(), 1), ("b".into(), 2), ("c".into(), 3)]); + + dfg.start_activities = HashSet::from(["a".into()]); + dfg.end_activities = HashSet::from(["c".into(), "b".into()]); + dfg.directly_follows_relations = HashMap::from([ + (("a".into(), "b".into()), 1), + (("b".into(), "a".into()), 1), + // a <-> c + (("a".into(), "c".into()), 1), + (("c".into(), "a".into()), 1), + // c <-> b + (("b".into(), "c".into()), 1), + (("c".into(), "b".into()), 1), + ]); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + + // // set multiple starts + dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string()]); + dfg.end_activities = HashSet::from(["c".to_string()]); + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + + // overlap + dfg.end_activities = HashSet::from(["c".to_string(), "b".to_string()]); + assert!(concurrent_cut_wrapper(&dfg, None).is_some()); + + // everything is end and start activity + dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string(), "c".to_string()]); + dfg.end_activities = HashSet::from(["a".to_string(), "b".to_string(), "c".to_string()]); + assert!(concurrent_cut_wrapper(&dfg, None).is_some()); + + // no ending or start at b -> AND cut + dfg.start_activities = HashSet::from(["a".to_string(), "c".to_string()]); + dfg.end_activities = HashSet::from(["a".to_string(), "c".to_string()]); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + assert_eq!(cut.unwrap().len(), 2); + } + + #[test] + fn test_perfect_parallel_three_branches() { + // all permutations of a, b, c to allow full bidirectional behavior + let log = event_log!( + ["a", "b", "c"], + ["a", "c", "b"], + ["b", "a", "c"], + ["b", "c", "a"], + ["c", "a", "b"], + ["c", "b", "a"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + assert_eq!(cut.unwrap().get_own().len(), 3); + } + + #[test] + fn test_sequence_cut() { + let log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_xor_cut() { + let log = event_log!(["a", "b"], ["c", "d"], ["a", "b"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + // XOR-Components would be {a, b} and {c, d} + // Parallel Cut has to be None as there are no edges between {a,b} and {c,d} + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_noisy_parallel_fails_without_filter() { + let log = event_log!( + // ("a", "b", "c"), // b-> c missing + ["b", "a", "c"], + ["a", "c", "b"], + //("b", "c", "a"), // c-> a missing + ["c", "b", "a"] //o_trace!("c", "a", "b") is missing -> no edge c -> a + ); + // there are two edges missing c->a and b-> c, therefore there is no bidirectional relation in any case + let dfg = DirectlyFollowsGraph::discover(&log); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } + + #[test] + fn test_loop_cut() { + let log = event_log!( + ["a"], // Start + ["a", "b", "a"], // Loop + ["a", "b", "a", "b", "a"] // Loop + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + assert!(concurrent_cut_wrapper(&dfg, None).is_none()); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs new file mode 100644 index 0000000..2ee50da --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs @@ -0,0 +1,47 @@ +//! This module contains a struct used for representing a found cut by specifying the found partitions and cut type. +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::process_models::process_tree::OperatorType; + +/// Represents a cut typically found by the inductive miner in a directly follows graph. +/// A 'Cut' partitions activities of a graph or log into disjoints sets, according to a +/// specific cut operator (e.g. sequence, xor etc.) +#[derive(Debug, PartialEq)] +pub struct Cut<'a>{ + pub operator: OperatorType, // define what operator this cut is about + pub partitions: Vec>>, +} + +impl<'a> Cut<'a>{ + + /// Creates a new cut with the given Operator and partitions. + /// + /// The caller must ensure that partitions form a valid cut according to the chosen operator. + pub fn new(operator: OperatorType, partitions: Vec>>) -> Cut<'a>{ + Self{operator, partitions} + } + + + /// Returns the number of partitions in this cut. + pub fn len(&self) -> usize { + self.partitions.len() + } + + + /// Consumes the cut and returns the partitions of this cut. + pub fn get_own(self) -> Vec>> { + self.partitions + } + + /// Returns the operator associated with this cut + pub fn get_operator(&self) -> OperatorType { + self.operator // possible due to copy trait + } + + + /// Returns true if this cut contains no partitions. + pub fn is_empty(&self) -> bool{ + self.partitions.is_empty() + } + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs new file mode 100644 index 0000000..799d44f --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs @@ -0,0 +1,213 @@ +//! Utility for detecting an exclusive choice cut in a given Directly Follows Graph + +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use std::borrow::Cow; +use std::collections::{HashMap, HashSet, VecDeque}; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::core::process_models::process_tree::OperatorType; + +/// Calculates all connected components of a Graph. +/// For this it starts from every unvisited activity a Breadth First Search over the Graph. +/// +/// # Returns +/// A vector containing all connected components +fn calc_connected_components<'a>( + activities: &'a HashMap, + adjacent: HashMap, HashSet>>, +) -> Vec>> { + // visited nodes + let mut visited: HashSet> = HashSet::new(); + // components (if cut) + let mut components: Vec>> = Vec::new(); + + // iterate over every activity + for node in activities.keys() { + let node = Cow::from(node); + if !visited.contains(&node) { + // search in components + + // components of the components xd + let mut comp = HashSet::new(); + let mut queue = VecDeque::new(); + + // mark node as already visited + visited.insert(node.clone()); + // Push starting node + queue.push_back(node); + // Explore connected component by looking at every edge of this activity + while let Some(current) = queue.pop_front() { + // the starting node is ofc the first node of this nodes component + comp.insert(current.clone()); + + // insert every other node which is reachable and has not already been visited + if let Some(neighbors) = adjacent.get(¤t) { + for neighbor in neighbors { + if !visited.contains(neighbor) { + visited.insert(neighbor.clone()); + queue.push_back(neighbor.clone()); + } + } + } + } + components.push(comp); + } + } + + components +} + +/// Calculates an undirected adjacency matrix of a given Directly Follows Graph. +/// The matrix is calculated based on direct reachability and does not include +/// transitive reachability. +/// +/// # Returns +/// A hashset mapping each activity to it's neighboring activities, i.e. to activities occurring in an edge with this one +/// +/// Note: Only activities occurring at least once inside an edge are taken into account. +pub fn calculate_undirected_adjacency_matrix<'a>( + dfg: &DirectlyFollowsGraph<'a>, +) -> HashMap, HashSet>> { + let mut adjacent = HashMap::new(); + + for ((a1, a2), _) in &dfg.directly_follows_relations { + // insert both directions + adjacent + .entry(a1.clone()) + .or_insert(HashSet::new()) + .insert(a2.clone()); + adjacent + .entry(a2.clone()) + .or_insert(HashSet::new()) + .insert(a1.clone()); + } + adjacent +} + + +/// Attempts to find an exclusive choice cut in the given Directly Follows Graph, by calculating the connected components of the Graph. +/// +/// Public wrapper for [`calc_connected_components`] +/// +/// # Returns +/// Some(cut) containing the partitions/ connected components found, otherwise None. +#[allow(dead_code)] +pub fn exclusive_choice_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Option> { + // no start or end activity results in no cut + if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() { + return None; + } + + let components = + calc_connected_components(&dfg.activities, calculate_undirected_adjacency_matrix(dfg)); + + // XOR cut only if > 1 disjoint component + if components.len() > 1 { + Some(Cut::new(OperatorType::ExclusiveChoice, components)) + } else { + None + } +} + + +#[cfg(test)] +mod tests { + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; + use crate::event_log; + + + #[test] + fn test_exclusive_choice_cut_2() { + let log = event_log!(["a", "b"], ["e"]); + let dfg: DirectlyFollowsGraph<'_> = DirectlyFollowsGraph::discover(&log); + let result = exclusive_choice_cut_wrapper(&dfg); + assert!(result.is_some()); + assert_eq!(result.unwrap().len(), 2); + } + + // Case 1: Clear XOR between b and c + // Traces: start -> b -> d OR start -> c -> d + #[test] + fn xor_cut_simple_two_branches() { + let log = event_log!(["b", "d"], ["c", "e"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg).unwrap(); + + // Expect two components: {"b","d"} and {"e","c"} + // + assert_eq!(cut.len(), 2); + assert!(cut.partitions.iter().any(|comp| comp.contains("b"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("c"))); + } + + // Case 2: XOR with 3 different branches + // Traces: start -> b -> e, start -> c -> f, start -> d -> g + #[test] + fn xor_cut_three_way_branch() { + let log = event_log!(["b", "e"], ["c", "f"], ["d", "g"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg).unwrap(); + + // Expect three components: one with b, one with c, one with d + assert_eq!(cut.len(), 3); + assert!(cut.partitions.iter().any(|comp| comp.contains("b"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("c"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("d"))); + } + + // Case 3: No XOR (sequence only) + // Traces: a -> b -> c (repeated) + #[test] + fn no_xor_cut_sequence() { + let log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg); + + // Should be None because it’s just a sequence + assert!(cut.is_none()); + } + + // Case 4: Single-event traces -> XOR between start activities + // Traces: ["a"], ["e"], ["f"] + #[test] + fn xor_cut_multiple_single_events() { + let log = event_log!(["a"], ["e"], ["f"]); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = exclusive_choice_cut_wrapper(&dfg).unwrap(); + + // Expect 3 disjoint components + assert_eq!(cut.len(), 3); + assert!(cut.partitions.iter().any(|comp| comp.contains("a"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("e"))); + assert!(cut.partitions.iter().any(|comp| comp.contains("f"))); + } + + #[test] + fn greater_test() { + let log = event_log!(["a", "b", "c"], ["e", "f"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + + assert!(cut.is_some()); + } + + #[test] + fn test_parallel_log_no_cut() { + let log = event_log!(["a", "b"], ["b", "a"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + + // This is a parallel cut, not an exclusive choice cut + assert!(cut.is_none()); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs new file mode 100644 index 0000000..33d4631 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs @@ -0,0 +1,210 @@ +//! Utility for detecting a loop cut in a Directly Follows Graph. + +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; + + +/// Attempts to find a loop cut in a given Directly Follows Graph (DFG). +/// +/// The algorithm groups activities into connected components by using a union-find like structure. +/// +/// 1. Selects a pivot activity from the sets of start activities. +/// 2. Merges all start and end activities with component of pivot. +/// 3. Merges internal activities (no start nor end activity) based on the edges in the DFG, excluding +/// edges that would violate redo-loop semantic. +/// 4. Merges components based on certain rules about their connectivity +/// +/// +/// The resulting vector represents the activity partitions of the +/// candidate redo-loop cut. The first partition corresponds to the +/// component containing the pivot (the "do" part), +/// and the remaining partitions correspond to the "redo" part(s). +/// +/// # Panic +/// Panics if the dfg contains no start activity +fn redo_loop_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec>> { + todo!() +} + +/// Attempts to find a Loop cut in a given DFG. +/// +/// Public wrapper for [`redo_loop_cut`] +/// +/// #Returns +/// Some(cut) if a loop cut has successfully been discovered, None otherwise +pub fn redo_loop_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Option>{ + + // only possible if there are start and end activities + if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() { + return None; + } + + // calculate do-redo loop components + let components = redo_loop_cut(dfg); + + // a cut is found if there is more than one component + if components.len() > 1{ + Some(Cut::new(OperatorType::Loop, components)) + } else { + None + } + + +} + +#[cfg(test)] +mod test_redo_loop_cut{ + use std::collections::HashMap; + use crate::event_log; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::core::process_models::process_tree::OperatorType; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; + use super::*; + + #[test] + fn test_redo_on_single_activity(){ + let log = event_log!( + ["a", "c"], + ["a", "c", "b", "a", "c"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 2); + assert!(cut[0].contains("a") && cut[0].contains("c")); + assert!(cut[1].contains("b")); + } + + #[test] + fn test_no_loop() { + let log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 1); + assert!(cut[0].contains("a") && cut[0].contains("b") && cut[0].contains("c")); + } + + #[test] + fn test_multi_activity_redo() { + let log = event_log!( + ["a", "c"], + ["a", "c", "b", "d", "a", "c"], + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 2); + + let do_group = &cut[0]; + let redo_group = &cut[1]; + + assert!(do_group.contains("a") && do_group.contains("c")); + assert!(redo_group.contains("b") && redo_group.contains("d")); + } + + #[test] + fn test_nested_loops_only_outer_cut() { + let log = event_log!( + ["s", "a", "c", "e"], + ["s", "a", "c", "b", "a", "c", "e"], // inner loop + ["s", "a", "c", "e", "g", "s", "a", "c", "e"], + ["s", "a", "c", "b", "a", "c", "b", "a", "c", "e"], + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + + + let cut = redo_loop_cut(&dfg); + + assert_eq!(cut.len(), 2); + + assert!(cut[1].contains("g")); + assert!(cut[0].contains("a") && cut[0].contains("c")); + } + + + #[test] + fn test_complex_test(){ + let mut dfg = DirectlyFollowsGraph::new(); + dfg.activities = HashMap::from([("a".to_string(), 1), ("b".to_string(), 1),("c".to_string(), 1)]); + dfg.directly_follows_relations = + HashMap::from([ + (("a".into(),"b".into()),1), + (("b".into(),"a".into()),1), + (("b".into(),"c".into()),1), + (("c".into(),"b".into()),1), + (("c".into(),"a".into()),1), + (("a".into(),"c".into()),1), + ] + ); + dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string()]); + dfg.end_activities = HashSet::from(["c".to_string()]); + } + + + + #[test] + fn test_double_loop(){ + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "b", "d", "a", "b"], + ["a", "b", "d", "a", "b", "a", "b", "c", "a", "b"], + ["a", "b", "c", "a", "b", "a", "b", "d", "a", "b"] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut_wrapper(&dfg); + assert!(cut.is_some()); + let cut = cut.unwrap(); + assert_eq!(cut.len(), 3); + assert_eq!(cut.operator, OperatorType::Loop); + assert!( + cut.partitions == vec![HashSet::from(["a".into(), "b".into()]), + HashSet::from(["c".into()]), + HashSet::from(["d".into()])] + || + cut.partitions == vec![HashSet::from(["a".into(), "b".into()]), + HashSet::from(["d".into()]), + HashSet::from(["c".into()])] + + ); + + } + + #[test] + fn test_loop_over_parallel(){ + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "d", "b"], + ["a", "d", "b", "c", "a", "d", "b" ], + ["a", "d", "b", "c", "a", "b" ] + ); + let dfg = DirectlyFollowsGraph::discover(&log); + + let cut = redo_loop_cut_wrapper(&dfg); + + assert!(cut.is_some()); + let expectations = Cut::new(OperatorType::Loop, + vec![ + HashSet::from( + ["a".into(), "b".into(), "d".into()]), + HashSet::from(["c".into()])] + ); + + assert_eq!(cut.unwrap(), expectations); + } + + +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs new file mode 100644 index 0000000..917ac72 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs @@ -0,0 +1,80 @@ +//! This module contains algorithms for detecting a cut in a given Directly Follows Graph. + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::dfg::DirectlyFollowsGraph; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::concurrent_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::loop_cut::redo_loop_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper; +use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters}; +use crate::EventLog; + +pub mod exclusive_choice; +pub mod cut; +pub mod sequence_cut; +pub mod concurrent; +pub mod loop_cut; + + + + +/// Attempts to find a valid cut in the given DirectlyFollowsGraph, by evaluating possible cut types +/// in the following strict order: +/// 1. exclusive choice cut [`exclusive_choice_cut_wrapper`] +/// 2. Sequence cut [`sequence_cut_wrapper`] +/// 3. Concurrent / AND cut [`concurrent_cut_wrapper`] +/// 4. Loop cut [`redo_loop_cut_wrapper`] +/// +/// # Returns +/// - Some([`Cut`]) containing the first detected cut according to the strict order. +/// - None otherwise +pub fn find_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>, log: &EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters) -> Option>{ + // if any cut is found in the presented order, return the first one + if let Some(cut) = exclusive_choice_cut_wrapper(dfg){ + Some(cut) + } else if let Some(cut) = sequence_cut_wrapper(dfg, parameters){ + Some(cut) + } else { + // check whether minimum self distance shall be used + let mindist = if parameters.contains(&Parameter::MinimumSelfDistance) { + Some(MinimumSelfDistance::new(log, event_log_classifier)) + } else { None }; + + if let Some(cut) = concurrent_cut_wrapper(dfg, mindist) { + Some(cut) + } else if let Some(cut) = redo_loop_cut_wrapper(dfg) { + Some(cut) + } else { + None // if no cut is found return none + } + } +} + +#[cfg(test)] +mod test_cut_finder{ + use std::collections::HashSet; + use crate::{ + discovery::case_centric::dfg::discover_dfg, + core::event_data::case_centric::EventLogClassifier, + event_log, + discovery::case_centric::inductive_miner_app::cut_finder::find_cut + }; + + #[test] + fn test_log_with_no_cut(){ + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let dfg = discover_dfg(&log); + let cut = find_cut(&dfg, &log, &EventLogClassifier::default(), &HashSet::new()); + assert!(cut.is_none()); + } + + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs new file mode 100644 index 0000000..997f9fb --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs @@ -0,0 +1,295 @@ +//! Utility for detecting a sequence cut in a Directly Follows Graph. + +use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; +use crate::core::process_models::dfg::{Activity, DirectlyFollowsGraph}; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; + +/// Calculate transitiv reachability using Floyd Warshall +fn compute_reachability_matrix(dfg: &DirectlyFollowsGraph<'_>) -> (HashMap, Vec>) { + let activities = dfg.activities.iter().map(|(a,_)| a.clone()).collect::>(); + let n = activities.len(); + let mut map = HashMap::new(); + + // Activity_string -> index + for (i, act) in activities.iter().enumerate() { + map.insert(act.clone(), i); + } + + // initialize matrix + let mut matrix = vec![vec![false; n]; n]; + + // mark direct edges + for ((a, b), _) in &dfg.directly_follows_relations{ + if let (Some(idx_a), Some(idx_b)) = (map.get(a.as_ref()), map.get(b.as_ref())) { + matrix[*idx_a][*idx_b] = true; + } + } + + // Floyd Warshall + for k in 0..n { + for i in 0..n{ + for j in 0..n{ + // only update if cell isn't already true + matrix[i][j] = matrix[i][j] || (matrix[i][k] && matrix[k][j]); + } + } + } + + (map, matrix) +} + +/// Helper function which calculates whether a set of activities a can reach another set of activities b. +/// +/// # Returns +/// - 'true' if at least one activity in a can transitively reach any activity in b +fn reaches_any_transitive(a: &HashSet>, b: &HashSet>, + idx_map: &HashMap, + matrix: &Vec> +) -> bool { + for act_a in a { + for act_b in b { + if let (Some(&idx_a), Some(&idx_b)) = (idx_map.get(act_a.as_ref()), idx_map.get(act_b.as_ref())) { + if matrix[idx_a][idx_b] { + return true; + } + } + } + } + false +} + + + +/// Calculates Activity Sequences in a given Directly Follows Graph. +/// Two activities are in sequence if they are neither mutually reachable nor mutually unreachable. +/// +/// # Returns +/// A vector of activity partitions representing a candidate sequence cut. +/// Each hashset contains the activity labels belonging to the same sequence block. +/// The partitions are ordered s.t. for any 'i < j', activities in partitions\[i] can (transitively) +/// reach activities in partitions\[j]. +fn calc_sequences<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec>>{ + let (idx_map, matrix) = compute_reachability_matrix(dfg); + + // Initialize each activity with its own partition + let mut partitions : Vec>> = dfg.activities.keys().map( + |a| { + let mut s = HashSet::new(); + s.insert(a.into()); + s + } + ).collect(); + + // break flag + let mut changed = true; + while changed { + changed = false; + // iterative over all activities and find bidirectional reachacble components or mutually non reachable components + let mut i = 0; + while i < partitions.len() { + // safe some iterations as the edges are non directional + let mut j = i + 1; + while j < partitions.len() { + // get the current working partitions + let p_a = &partitions[i]; + let p_b = &partitions[j]; + + // Check connectivity between groups - true if at least one activity in p_a reaches at least one other activity in p_b + let a_reaches_b = reaches_any_transitive(p_a, p_b, &idx_map, &matrix); + let b_reaches_a = reaches_any_transitive(p_b, p_a, &idx_map, &matrix); + + // Merge if: + // 1. Mutually reachable (Loop) + // 2. Mutually unreachable (Exclusive Choice / Parallelism) + if (a_reaches_b && b_reaches_a) || (!a_reaches_b && !b_reaches_a) { + // Merge the whole partition j into partition i + let part_j = partitions.remove(j); + partitions[i].extend(part_j); + // as we changed this partition, we need to iterate over all partitions again, bc maybe the merged partitions are reachable + changed = true; + // Don't increment j, as the vector shrunk + } else { + // process with next partition + j += 1; + } + } + i += 1; + } + } + + // 2. Sort partitions to form the candidate sequence + partitions.sort_by(|p1, p2| { + let p1_to_p2 = reaches_any_transitive(p1, p2, &idx_map, &matrix); + let p2_to_p1 = reaches_any_transitive(p2, p1, &idx_map, &matrix); + // p1 reaches more than p2 + if p1_to_p2 && !p2_to_p1 { // p1 -> p2 but not p2 -> p1 + std::cmp::Ordering::Less + } else if !p1_to_p2 && p2_to_p1 { // p2 -> p1 but not p1 -> p2 + std::cmp::Ordering::Greater + } else { // mutually reachable or not reachable - should not happen at all + panic!("Partitions are in sequence cut are nevertheless mutually reachable or not reachable"); + } + }); + + partitions +} + +/// Public wrapper for [`calc_sequences`]. +/// +/// This function simply forwards its arguments to +/// `calc_sequences` and returns Some(cut) if a cut is found, otherwise None. +/// +/// If a [`strict_sequence_cut`] should be applied, this has to be set in a [`Parameter`] +pub fn sequence_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, _parameters: &Parameters) -> Option>{ + // calculate sequence blocks + let sequences = calc_sequences(dfg); + + // early return + if sequences.len() <= 1{ + return None; + } + + // if there is more than one sequence block, a cut is found successfully + Some(Cut::new(OperatorType::Sequence, sequences)) +} + +#[cfg(test)] +mod test_sequence_cut{ + use std::borrow::Cow; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::calc_sequences; + use std::collections::HashSet; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::{ sequence_cut_wrapper}; + use crate::{event_log}; + #[test] + fn test_single_activity(){ + let dfg = DirectlyFollowsGraph::discover(&event_log!(["a"])); + let cut = calc_sequences(&dfg); + let expected = vec![HashSet::from([Cow::from("a".to_string())])]; + assert_eq!(cut, expected); + } + + #[test] + fn test_exclusive_choice_cut(){ + let input = event_log!(["a", "b", "c"], ["d"]); + let dfg = DirectlyFollowsGraph::discover(&input); + let result = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(result.is_some()); + assert_eq!(result.unwrap().get_own().len(), 3); + + } + #[test] + fn test_simple_sequence(){ + let input = event_log!(["a", "b", "c"]); + let dfg = DirectlyFollowsGraph::discover(&input); + let result = calc_sequences(&dfg); + let expected = vec![HashSet::from(["a".into()]) , HashSet::from(["b".into()]), HashSet::from(["c".into()])]; + assert_eq!(expected, result) + } + + + #[test] + fn test_leemans_example(){ + let input = event_log!(["a", "c", "d"], ["b", "c", "e "]); + let dfg = DirectlyFollowsGraph::discover(&input); + println!("{:?}", calc_sequences(&dfg)); + let result = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(result.is_some()); + let result = result.unwrap(); + println!("{:?}", result); + assert_eq!(result.get_own().len(), 3); + } + + + #[test] + fn test_sequence_with_internal_parallelism() { + // Log: A -> (B || C) -> D + // Traces: A->B->C->D, A->C->B->D + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["A", "B", "C", "D"], + ["A", "C", "B", "D"] + )); + + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()).unwrap(); + let expected: Vec>> = vec![ + HashSet::from(["A".into()]), + HashSet::from(["B".into(), "C".into()]), + HashSet::from(["D".into()]), + ]; + + assert_eq!(cut.get_own(), expected); + } + + #[test] + fn test_parallel_branches_no_sequence_cut() { + // Log: A -> B and A -> C in parallel + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["B", "C"], + ["C", "B"] + )); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_none()); + } + + #[test] + fn test_xor_branch_sequence_cut() { + // Log: A -> B -> D OR A -> C -> D + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["A", "B", "D"], + ["A", "C", "D"], + )); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let cut = cut.unwrap(); + let expected: Vec>> = vec![HashSet::from(["A".into()]) , HashSet::from(["B".into(), "C".into()]), HashSet::from(["D".into()])]; + assert_eq!(cut.get_own(), expected); + + } + + + #[test] + fn test_with_loop(){ + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["B", "C"], + ["C", "B"], + ["B", "C", "E", "F", "B", "C"], + ["C", "B", "E", "F", "B", "C"], + ["B", "C", "E", "F", "C", "B"], + ["C", "B", "E", "F", "B", "C", "E", "F", "C", "B"], + )); + assert!(sequence_cut_wrapper(&dfg, &HashSet::new()).is_none()); + } + + + #[test] + fn test_triangle_cut() { + let dfg = DirectlyFollowsGraph::discover(&event_log!( + ["A", "C"], + ["B", "C", "D"], + ["B", "D"] + )); + + + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + + if let Some(c) = cut { + assert_eq!(c.get_own() , Vec::from([HashSet::from(["A".into(), "B".into()]) , HashSet::from(["C".into()]), HashSet::from(["D".into()])])); + } + } + + + #[test] + fn test_strict_sequence_cut_wrapper(){ + let log = event_log!( + ["a", "b", "c"], + ["a", "c"], + ); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()).unwrap(); + println!("{:?}", cut); + } + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs new file mode 100644 index 0000000..aa91005 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs @@ -0,0 +1,213 @@ +//! Activity Concurrent fallthrough detection utilities. +//! +//! This module implements the **activity concurrent** fallthrough used by the inductive miner. +//! +//! The activity concurrent fallthrough assumes concurrent behavior when a single activity in the event log +//! can occur independently of the ordering of the other activities. In such a case, the activity is +//! considered to run in parallel with the remaining behavior of the log. +//! +//! When this pattern is detected, the activity is separated from the log and modeled as executing +//! concurrently with the rest of the process. + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::find_cut; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityConcurrent, Return}; +use crate::discovery::case_centric::inductive_miner_app::splits::perform_split; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +/// Filters an event log by removing all events whose activity matches a pivot. +/// +/// The function splits the input log into two logs: +/// - one log containing the original traces **without** the pivot activity +/// - one log containing traces consisting only of the filtered-out pivot events +/// +/// The number of traces is preserved in both logs. +/// +/// # Returns +/// A tuple `(filtered_out_log, filtered_log)` where: +/// +/// - `filtered_out_log` contains only the removed pivot events (possibly empty traces). +/// - `filtered_log` contains the original behavior without the pivot events. +fn filter_out_activity( + log: EventLog, + event_log_classifier: &EventLogClassifier, + pivot: String, +) -> (EventLog, EventLog) { + let mut filtered_log = log.clone_without_traces(); // the logs containing the filtered activities + let mut filtered_out_log = log.clone_without_traces(); // the log containing left behavior + + for trace in log.traces { + // get the trace length + let len_t = trace.events.len(); + + // do the same for the traces again + let mut new_trace = trace.clone_without_events(); + let mut other_new_trace = trace.clone_without_events(); + + // need the option for initialization purpose, this option marks whether the element was actually contained in the trace + let mut pivot_event = None; // if set the activity was actually contained in this trace + + // check on every event in this trace + for event in trace.events { + let other = event_log_classifier.get_class_identity(&event); + if pivot != other { + new_trace.events.push(event); + } else if pivot_event.is_none() { + // set the pivot event + pivot_event = Some(event) + } + } + + // check whether the event was actually part of the trace + if pivot_event.is_some() { + // if so push the trace, as it is (excluding the left out events) + let event = pivot_event.unwrap(); + // push the pivot event as often as it has been filtered out (maybe use a counter here) + for _ in 0..(len_t - new_trace.events.len()) { + other_new_trace.events.push(event.clone()); + } + // push the filtered logs + filtered_log.traces.push(new_trace); + filtered_out_log.traces.push(other_new_trace); + } else { + // new trace equals the trace from before, therefore we should not push the empty lg (right?) + filtered_log.traces.push(new_trace); + + + //mind that empty traces are being pushed too + filtered_out_log.traces.push(other_new_trace); + + } + + } + + (filtered_out_log, filtered_log) +} + +/// Attempts to detect an *activity concurrent* fall-through pattern. +/// +/// This fall through iteratively removes one activity at a time +/// (starting with the most frequent one) and checks whether the remaining logs yield any valid cut. +/// If removing the activity yields a valid cut, the activity is considered concurrent to the rest of the process. +/// +/// The split operations is performed on a valid cut as well, for efficiency reasons. +/// +/// # Returns +/// - 'ActivityConcurrent(...)' enum if a concurrent activity is detected, containing the constructed concurrency node, the log of removed activity instances and the already performed split. +/// - 'Return(log)' the original log without changes +fn activity_concurrent( + log: EventLog, + event_log_classifier: &EventLogClassifier, + parameters: &Parameters) -> Fallthrough { + let dfg = discover_dfg_with_classifier(&log, event_log_classifier); + + // get the activities and transform into a vector + let mut activities: Vec<(String,u32)> = dfg.activities.clone().into_iter().collect(); + // sort by cardinality (descending) + (&mut activities).sort_by(|a,b| a.1.partial_cmp(&b.1).unwrap()); // safe unwrap as working with u32 here + + // now leave out one activity after another and try to find a cut + for (activity, _) in activities.into_iter().rev() { + // remove activity from this log + let (filtered_out_log, filtered_log) = + filter_out_activity(log.clone(), event_log_classifier, activity); + + // build a dfg in order to use already established find_cut method + let dfg = discover_dfg_with_classifier(&filtered_log, event_log_classifier); + match find_cut(&dfg, &filtered_log, event_log_classifier, parameters) { + None => continue, // leave out another activity (if another is left) + Some(cut) => { + // do the split here + let split = perform_split(&filtered_log, event_log_classifier, cut); + + // create a node without children, as this has to be processed in the more high level functions + let node = Node::new_operator(OperatorType::Concurrency); + + // return if a cut is found + return ActivityConcurrent(node, filtered_out_log, split); + } + } + } + + // default return + Return(log) +} + +/// Public wrapper for [`activity_concurrent`]. +/// +/// This function simply forwards its arguments to +/// `activity_concurrent` and exists for consistency +/// with other fall-through detection wrappers. +pub fn activity_concurrent_wrapper(log: EventLog, + event_log_classifier: &EventLogClassifier, + parameters: &Parameters) -> Fallthrough { + activity_concurrent(log, event_log_classifier, parameters) +} + +#[cfg(test)] +mod test_activity_concurrent { + use std::collections::HashSet; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::{event_log, EventLog}; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_concurrent::{activity_concurrent, filter_out_activity}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::ActivityConcurrent; + + fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: &EventLogClassifier) -> bool { + if log.traces.len() == o_log.traces.len() { + for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) { + if t0.events.len() == t1.events.len() { + for (e0,e1) in t0.events.iter().zip(t1.events.iter()) { + let a0 = event_log_classifier.get_class_identity(e0); + let a1 = event_log_classifier.get_class_identity(e1); + if a0 != a1 { + return false; + } + } + } + } + return true; + } + false + } + + #[test] + fn test_filter_out_activity_and_activity_concurrent_yield_same_result() { + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + // mind the empty trace + let ex1 = event_log!(["b"], ["b"],[], ["b"]); + + let ex2 = event_log!( + ["a", "c", "d"], + ["d","a"], + ["a", "d", "c"], + ["c", "d"], + ); + + let classifier = EventLogClassifier::default(); + + let (log1, log2) = + filter_out_activity(log.clone(), &EventLogClassifier::default(), "b".to_string()); + + assert!(events_equal(&log1, &ex1, &classifier)); + assert!(events_equal(&log2, &ex2, &classifier)); + let ActivityConcurrent(node, log1, split)= activity_concurrent(log, &classifier, &HashSet::new()) else { return assert!(false); }; + assert!(!log1.traces.is_empty() && !split.sub_logs.is_empty()); + let ex_node = Node::new_operator(OperatorType::Concurrency); + assert_eq!(node, ex_node); + + } + + + +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs new file mode 100644 index 0000000..bf141e5 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs @@ -0,0 +1,339 @@ +//! Activity once per trace detection utilities. +//! +//! This module implements the **activity once per trace** used by the inductive miner. +//! +//! The fallthrough applies when an activity occurs **exactly once in every trace of the event log**. +//! In this case, the activity is assumed to execute independently of the rest of the process. +//! +//! When such an activity is detected, it is removed from the event log and modeled as running in +//! parallel with the remaining behavior of the process. +use std::collections::HashMap; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::EventLog; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return}; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; + +/// This function iterates over every event of every trace and removes the specified event +fn remove_activity_from_log( + mut log: EventLog, + event_log_classifier: &EventLogClassifier, + activity: String, +) -> EventLog { + log.traces = log + .traces + .into_iter() + .map(|mut trace| { + trace.events = trace + .events + .into_iter() + .filter(|event| { + let other = event_log_classifier.get_class_identity(event); + activity != other + }) + .collect(); + trace + }) + .collect(); + + // experimental, what if we only retain traces not empty? + // log.traces.retain(|trace| {trace.events.len() > 0}); + log +} + + +/// Helper struct to count the occurrences of each activity in the whole log and in every trace. +/// In 'trace_activities' each index corresponds to a trace at the same index in the event log. +/// The 'activities' member contains information about how often every activity occurs in the whole event log. +struct ActivityTraceCounter { + activities: HashMap, + trace_activities: Vec>, +} + +impl ActivityTraceCounter { + /// Counts how often every activity of the event log occurs in every trace and in the whole + /// event log. + fn new(log: &EventLog, event_log_classifier: &EventLogClassifier) -> ActivityTraceCounter { + let mut activities = HashMap::new(); + let mut trace_activities = Vec::with_capacity(log.traces.len()); + + for (i, trace) in log.traces.iter().enumerate() { + trace_activities.push(HashMap::new()); + for event in &trace.events { + let activity = event_log_classifier.get_class_identity(event); + // update activities + if let Some(count) = activities.get_mut(&activity) { + *count += 1; + } else { + activities.insert(activity.clone(), 1); + } + + if let Some(count) = trace_activities[i].get_mut(&activity) { + *count += 1; + } else { + trace_activities[i].insert(activity, 1); + } + } + } + + ActivityTraceCounter { + activities, + trace_activities, + } + } + + /// Consume the object and returns the activity count as well as the vector containing the activity count for every trace. + fn get(self) -> (HashMap, Vec>) { + (self.activities, self.trace_activities) + } +} + +fn cleanup_log( + log: EventLog, + event_log_classifier: &EventLogClassifier, + activity: String, +) -> Fallthrough { + let log = remove_activity_from_log(log, event_log_classifier, activity.clone()); + + let mut node = Node::new_operator(OperatorType::Concurrency); + let activity_leaf = Node::new_leaf(Some(activity)); + node.add_child(activity_leaf); + + ActivityOncePerTrace(node,log) +} + +///This fall through applies if an activity occurs once in every trace of the log. +/// In case this applies to multiple ones an arbitrary is chosen (with the lowest cardinality) +pub fn activity_once_per_trace( + log: EventLog, + event_log_classifier: &EventLogClassifier, +) -> Fallthrough { + let k = log.traces.len(); + // count how often every activity occurs in the event log and in every trace + let (activities, trace_activities) = + ActivityTraceCounter::new(&log, event_log_classifier).get(); + let mut activities: Vec<(String, usize)> = activities.into_iter().collect(); // transform to vector in order to sort the activities according to cardinality + + // Sort the activities by cardinality + (&mut activities).sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); // safe unwrap as we compare u32 to other u32 + + // set result to none (for now) + let mut result: Option = None; + + // reverse iterate over the activities, as the activities with more occurrences are more likely to appear precisely once every trace + 'activity_loop: for (activity, cardinality) in activities.into_iter().rev() { + // activity has to appear precisely once in every trace, therefore skip if it does not appear as often as we have traces + if cardinality != k { + continue 'activity_loop; + } + for trace in &trace_activities { + // has to appear precisely one time + if let Some(count) = trace.get(&activity) { + if *count != 1 { + continue 'activity_loop; + } + } else { + // activity did not appear in this trace → condition violated + continue 'activity_loop; + } + } + // at this point the activity has appeared precisely one time in every trace + result = Some(activity); + break 'activity_loop; + } + + // check result of activity loop + if result.is_some() { + cleanup_log(log, event_log_classifier, result.unwrap()) + } else { + // does not apply - return the event log to be used in other fallthrough cases + Return(log) + } +} + +/// Public wrapper for [`activity_once_per_trace`]. +/// +/// This function simply forwards its arguments to +/// `activity_once_per_trace` and exists for consistency +/// with other fall-through detection wrappers. +pub fn activity_once_per_trace_wrapper( + log: EventLog, + event_log_classifier: &EventLogClassifier, + _: &Parameters, +) -> Fallthrough { + activity_once_per_trace(log, event_log_classifier) +} + + +#[cfg(test)] +mod test_activity_once_per_trace { + use crate::{event_log, EventLog}; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::{activity_once_per_trace, remove_activity_from_log}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return}; + + #[test] + fn test_remove_activity_with_empty_trace() { + let log = event_log!([], ["a"], ["b", "a"]); // b as first event intentionally to get the same timestamp as for the expected one + let r = remove_activity_from_log(log, &EventLogClassifier::default(), "a".to_string()); + + let expected = event_log!([], [], ["b"],); + assert_eq!(r, expected); + } + + fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: &EventLogClassifier) -> bool { + if log.traces.len() == o_log.traces.len() { + for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) { + if t0.events.len() == t1.events.len() { + for (e0,e1) in t0.events.iter().zip(t1.events.iter()) { + let a0 = event_log_classifier.get_class_identity(e0); + let a1 = event_log_classifier.get_class_identity(e1); + if a0 != a1 { + return false; + } + } + } + } + return true; + } + false + } + + #[test] + /// The example as defined in Robust Process Mining with Guarantees + fn leeman_example() { + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let result = activity_once_per_trace(log, &EventLogClassifier::default()); + match result { + ActivityOncePerTrace(node, log) => { + let expected_log = event_log!( + ["a", "b", "c"], + ["a", "b"], + ["a", "c"], + ["b", "c"], + ); + assert!(events_equal(&log, &expected_log, &EventLogClassifier::default())); + + let mut expected_node = Node::new_operator(OperatorType::Concurrency); + expected_node.add_child(Node::new_leaf(Some(String::from("d")))); + + assert_eq!(node, expected_node); + } + _ => assert!(false), + } + } + + #[test] + /// Assert that the function returns none if there is no activity once in every trace, but almost + fn test_log_with_no_ft() { + // fist case - first trace + let log = event_log!( + ["a", "b", "c"], // here i removed the 'd' + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let Return(expected_log) = activity_once_per_trace(log.clone(), &EventLogClassifier::default()) + else { + return assert!(false); + }; + + let log1 = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c"], // now the d is missing here + ); + + assert!(events_equal(&log, &expected_log, &EventLogClassifier::default())); + + let Return(log2) = activity_once_per_trace(log1.clone(), &EventLogClassifier::default()) + else { + return assert!(false); + }; + assert!(events_equal(&log1, &log2, &EventLogClassifier::default())); + } + + #[test] + fn test_with_multiple_activities_appearing_once() { + let log = event_log!( + ["a", "b", "c", "d"], // here i removed the 'd' + ["d", "a", "b", "c"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + let ActivityOncePerTrace(process_node, log) = + activity_once_per_trace(log, &EventLogClassifier::default()) + else { + return assert!(false); + }; + + let expected_log = event_log!( + ["a", "b", "d"], + ["d", "a", "b"], + ["a", "d"], + ["b", "d"], + ); + let expected_log2 = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "c"], + ["b", "c"], + ); + + // it really is arbitrary whether c or d is chosen + assert!(events_equal(&log, &expected_log, &EventLogClassifier::default()) || + events_equal(&log, &expected_log2, &EventLogClassifier::default())); + + + let mut expected_node = Node::new_operator(OperatorType::Concurrency); + expected_node.add_child(Node::new_leaf(Some(String::from("c")))); + + let mut expected_node2 = Node::new_operator(OperatorType::Concurrency); + expected_node2.add_child(Node::new_leaf(Some(String::from("d")))); + + assert!(process_node == expected_node || process_node == expected_node2) + } + + #[test] + fn test_two_activites_in_trace() { + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b", "d"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + let Return(log1) = activity_once_per_trace(log.clone(), &EventLogClassifier::default()) + else { + return assert!(false); + }; + + assert!(events_equal(&log, &log1, &EventLogClassifier::default())); + } + + #[test] + fn test_with_empty_log() { + let log = event_log!(["a", "b"], []); + // the fallthrough should not find anything, as there is a trace containing no element + let Return(_) = activity_once_per_trace(log.clone(), &EventLogClassifier::default()) else { + return assert!(false); + }; + + let log2 = event_log!(["a", "b"]); + let r = activity_once_per_trace(log2, &EventLogClassifier::default()); + assert!(r.same_enum_variant(&Fallthrough::ActivityOncePerTrace( + Node::new_operator(OperatorType::Concurrency), + event_log!() + ))); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs new file mode 100644 index 0000000..71cc9eb --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs @@ -0,0 +1,125 @@ +//! Empty traces fallthrough detection utilities. + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{EmptyTraces, Return}; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +/// Checks whether the empty traces fallthrough applies to a given log, +/// it applies when the log contains empty traces. +/// +/// # Returns +/// - [EmptyTraces] if the event log contained empty traces +/// - [Return] if the event log contained no empty traces + fn empty_traces(mut log: EventLog, _event_log_classifier: &EventLogClassifier) -> Fallthrough { + let len_before = log.traces.len(); + log.traces = log.traces.into_iter().filter(|trace| !trace.events.is_empty()).collect(); + + if len_before != log.traces.len(){ + // if the len of the trace has changed in the meantime, this means there are some traces lost, + // due to that they have been empty + + // return a Process node together with the resulting unprocessed traces of the event log + + let mut node = Node::new_operator(OperatorType::ExclusiveChoice); + node.add_child(Node::new_leaf(None)); + EmptyTraces(node, log) + } else { + // otherwise this fallthrough does not apply + Return(log) + } +} + +/// Public wrapper for [`empty_traces`]. +/// +/// This function simply forwards its arguments to +/// `empty_traces` and exists for consistency +/// with other fall-through detection wrappers. +pub fn empty_traces_wrapper(log: EventLog, _event_log_classifier: &EventLogClassifier, _: &Parameters) -> Fallthrough { + empty_traces(log, _event_log_classifier) +} + +#[cfg(test)] +mod test_empty_traces_ft{ + use crate::{event_log, event}; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::empty_traces::empty_traces; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{EmptyTraces, Return}; + + #[test] + /// test the simplest case, the log should retrun a xor-node with one child from type empty and + /// a log containing the only not empty trace + fn test_empty_traces(){ + let log = event_log!( + [], + [], + [], + ["a"], + [], + ); + + let EmptyTraces(node, log) = empty_traces(log, &EventLogClassifier::default()) else { return assert!(false); }; + assert_eq!(log.traces.len(), 1); + assert_eq!(log.traces[0].events.len(), 1); + assert_eq!(log.traces[0].events[0], event!("a")); + + let mut expected_node = Node::new_operator(OperatorType::ExclusiveChoice); + expected_node.add_child(Node::new_leaf(None)); + + assert_eq!(node, expected_node); + } + + #[test] + /// Assert that an event log + fn test_not_empty_traces(){ + let log = event_log!( + ["a"], + ["b"], + ["f"], + ["a"], + ["g"], + ); + + let Return(log1) = empty_traces(log.clone(), &EventLogClassifier::default()) else { return assert!(false); }; + + assert_eq!(log, log1); + } + + #[test] + /// assert that an empty event log results in no result ('None'), + /// as this is the basecase + fn test_empty_log(){ + let log = event_log!(); + let res = empty_traces(log.clone(), &EventLogClassifier::default()); + match res { + Return(log1) => assert_eq!(log, log1), + _ => assert!(false), + } + } + + + #[test] + fn test_log_only_empty_traces(){ + let log = event_log!( + [], [], [] + ); + + let res = empty_traces(log, &EventLogClassifier::default()); + match res { + EmptyTraces(node,log1) => { + assert_eq!(log1.traces.len(), 0); + assert_eq!(log1, event_log!()); + let mut expected_node = Node::new_operator(OperatorType::ExclusiveChoice); + expected_node.add_child(Node::new_leaf(None)); + assert_eq!(node, expected_node); + }, + _ => assert!(false), + } + + } + + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs new file mode 100644 index 0000000..572cae2 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs @@ -0,0 +1,39 @@ +//! Fallthrough labels + +use std::mem::discriminant; +use crate::core::process_models::process_tree::Node; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// Represents the result of attempting to apply a fall-through rule. +/// +/// Each variant corresponds to a specific fall-through strategy and +/// contains the resulting [`Node`], i.e. Operator-type and children if any, together with the +/// event log(s) derived during its application. +/// +/// If no fall-through rule is applicable, the `Return` variant is used. +/// In this case, the original event log is returned unchanged. +/// +/// Not to be confused with [`FallThroughLabel`] +pub enum Fallthrough { + EmptyTraces(Node, EventLog), + ActivityOncePerTrace(Node, EventLog), + ActivityConcurrent(Node, EventLog, Split), + StrictTauLoop(Node, EventLog), + TauLoop(Node, EventLog), + FlowerModel(Node), + Return(EventLog), +} + +impl Fallthrough { + + pub fn same_enum_variant(&self, other: &Self) -> bool { + discriminant(self) == discriminant(other) + } +} + + + + + + diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs new file mode 100644 index 0000000..0936bef --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs @@ -0,0 +1,84 @@ +//! Flower model fall through utilities. +//! +//! This module implements the flower model fallthrough as a last resort fallthrough. + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, OperatorType}; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::FlowerModel; +use crate::EventLog; + +/// This is the last resort of the fallthrough's of the inductive miner. +/// This FT should only be applied if the event log does not contain any empty trace +pub fn flower_model(log: EventLog, event_log_classifier: &EventLogClassifier) -> Fallthrough { + let dfg = discover_dfg_with_classifier(&log, event_log_classifier); + + // get all activities in the directly follows graph + let mut activities: Vec = dfg.activities.iter().map(|(a,_)| a.clone()).collect(); + + // sort activities to allow for a defined behavior or so + (&mut activities).sort(); + + // create a concurrency relation over all non-empty activities + let mut sub_tree = Node::new_operator(OperatorType::Concurrency); + + // add a leaf for each activity + for activity in activities { + sub_tree.add_child(Node::new_leaf(Some(activity))); + } + + // flower root + let mut flower_node_root = Node::new_operator(OperatorType::Loop); + // first child of flower model is a concurrency relation over all non-empty activities - do part + flower_node_root.add_child(sub_tree); + + // add silent transition as second child - redo part + flower_node_root.add_child(Node::new_leaf(None)); + + FlowerModel(flower_node_root) +} + +#[cfg(test)] +mod test_flower_model { + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::{Node, OperatorType}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::flower_model::flower_model; + use crate::event_log; + + #[test] + fn test_basic_flower_model_leemans(){ + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let flower = flower_model(log, &EventLogClassifier::default()); + + // do part consist of all activities in a concurrency relation + let mut expected_sub_flower = Node::new_operator(OperatorType::Concurrency); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("a")))); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("b")))); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("c")))); + expected_sub_flower.add_child(Node::new_leaf(Some(String::from("d")))); + + // build expected flower model + let mut expected_flower = Node::new_operator(OperatorType::Loop); + expected_flower.add_child(expected_sub_flower); + + // the redo part is just a silent transition + expected_flower.add_child(Node::new_leaf(None)); + + + + if let Fallthrough::FlowerModel(flower) = flower { + assert_eq!(expected_flower, flower); + } else { + assert!(false); + } + + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs new file mode 100644 index 0000000..b2b39dd --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs @@ -0,0 +1,77 @@ +//! Fallthrough detection utilities for the Inductive Miner. +//! +//! This module contains utilities of the fallthrough rules used by the Inductive Miner when no +//! standard cut can be discovered in the event log. +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_concurrent::activity_concurrent_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::activity_once_per_trace_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::empty_traces::empty_traces_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::flower_model::flower_model; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::strict_tau_loop::strict_tau_loop_wrapper; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::tau_loop::tau_loop_wrapper; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters}; +use crate::EventLog; + +mod activity_concurrent; +pub mod fallthrough; +mod activity_once_per_trace; +mod empty_traces; +mod flower_model; +mod strict_tau_loop; +mod tau_loop; + + + +/// Applies the sequence of *fallthrough rules* used by the Inductive Miner to an event log. +/// +/// This function iteratively evaluates predefined fallthrough in the following order: +/// - [empty_traces] +/// - [activity_once_per_trace] +/// - [activity_concurrent] +/// - [strict_tau_loop] +/// - [tau_loop] +/// - [flower_model] +/// +/// Whether a Fallthrough is applied at all, is controlled by the provided parameters. +/// Note, that the Flower Model is applied nevertheless. +/// +/// # Parameters +/// - log: The event log to which a Fallthrough rules are applied. +/// - event_log_classifier: classifier to identify activities in event log events +/// - parameters: the provided parameters +/// +/// # Returns +/// A `Fallthrough` value representing either: +/// - a discovered process model produced by a fallthrough, or +/// - the flower model if no fallthrough applies or fallthroughs are disabled. +pub fn apply_fallthrough( + mut log: EventLog, + event_log_classifier: &EventLogClassifier, + parameters: &Parameters, +) -> Fallthrough { + let funcs: Vec Fallthrough> = vec![ + empty_traces_wrapper, + activity_once_per_trace_wrapper, + activity_concurrent_wrapper, + strict_tau_loop_wrapper, + tau_loop_wrapper, + ]; + + // check if Fallthrough shall be applied by provided parameters + if parameters.contains(&Parameter::ApplyFallthrough){ + // iterate over all fall throughs + for apply_fallthrough in funcs { + let ft = apply_fallthrough(log, event_log_classifier, parameters); + if let Fallthrough::Return(returned_log) = ft { + log = returned_log; + continue; + } else { + return ft; + } + } + } // else the flower model is applied + + // last possible Option: Flower Model + flower_model(log, event_log_classifier) +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs new file mode 100644 index 0000000..6b17cd6 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs @@ -0,0 +1,273 @@ +//! Strict tau loop fallthrough detection utilities. +//! +//! This module implements the **strict tau loop fallthrough** used by the Inductive Miner. +//! +//! A strict tau loop assumes that a new iteration of the process starts **only when a start activity +//! directly follows an end activity** within the same trace. Such a pattern suggest that the process +//! silently returned to the beginning of the workflow via a tau transition between iterations. + + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::Node; +use crate::core::process_models::process_tree::OperatorType::Loop; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{Return, StrictTauLoop}; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +/// Splits traces in the event log according to the semantics of a **strict tau loop fallthrough**. +/// +/// A trace is split whenever an **end activity** is immediately followed by a **start activity**. +/// This pattern indicates that one iteration of the process has completed and a new iteration begins +/// via an implicit silent transition. +/// +/// Empty traces may appear in the resulting log if a split occurs at the beginning of a trace segment. +/// +/// # Returns +/// A new [EventLog] where traces are split by the above described logic. +fn split_log_according_to_strict_tau(log: EventLog, classifier: &EventLogClassifier) -> EventLog{ + let dfg = discover_dfg_with_classifier(&log, classifier); + let mut result_log = log.clone_without_traces(); + + + for trace in log.traces{ + let mut last_event_was_end = false; + let mut new_trace = trace.clone_without_events(); + + + for event in trace.events{ + let activity = classifier.get_class_identity(&event); + + // check condition + if last_event_was_end && dfg.start_activities.contains(&activity){ + // condition satisfied, the last activity was an end activity, this one is a start, + // we need to split the current trace at this point right now + let help_trace = new_trace.clone_without_events(); + result_log.traces.push(new_trace); + new_trace = help_trace; + } + + // push event to new_trace + new_trace.events.push(event); + + // if this activity is an end activity set the according flag + last_event_was_end = dfg.end_activities.contains(&activity); + + } + + // if the trace hasn't been pushed, we need to push it now -- this includes empty traces + result_log.traces.push(new_trace); + } + // we need to iterate through the entire log and split a trace if after an end activity an start activity appears + result_log +} + +/// Attempt to detect and apply the **strict tau loop fallthrough**. +/// The log is transformed by using [split_log_according_to_strict_tau]. +/// If the operation increased the number of traces in the log, it indicates that the traces contained +/// implicit restarts of the process. +/// +/// In that case, a loop operator is constructed where: +/// +/// - the **do part** represents a single iteration of the process +/// - the **redo part** +/// +/// The resulting loop node and transformed event log are returned. +/// +/// # Returns +/// - [StrictTauLoop] if stric loop behavior is detected +/// - [Return] is a silent transition + +fn strict_tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough { + let k = log.traces.len(); + let log = split_log_according_to_strict_tau(log, classifier); + + if k < log.traces.len(){ + let mut node = Node::new_operator(Loop); + node.add_child(Node::new_leaf(None)); // temporary at index 0 + node.add_child(Node::new_leaf(None)); // redo part is silent + + + StrictTauLoop( + // first return a process node with the required structure + node, + // secondly return the new event log + log + ) + } else if k > log.traces.len(){ + panic!("Original log contains more traces, than the log split according to strict tau.") + }else { + // default return + Return(log) + } + +} + +/// Public wrapper for [`strict_tau_loop`]. +/// +/// This function simply forwards its arguments to +/// `strict_tau_loop` and exists for consistency +/// with other fall-through detection wrappers. +pub fn strict_tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _:&Parameters) -> Fallthrough { + strict_tau_loop(log, classifier) +} + + +#[cfg(test)] +mod test_strict_tau_loop{ + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{Return, StrictTauLoop}; + use crate::{event_log, EventLog}; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::strict_tau_loop::strict_tau_loop; + + + fn cmp_logs(log: Fallthrough, expected: EventLog){ + let classifier = EventLogClassifier::default(); + assert!(if let StrictTauLoop(_, log) = log { + log.traces.len() == expected.traces.len() && !log.traces.iter().zip(expected.traces.iter()).any(|(t0,t1)| + t0.events.len() != t1.events.len() || t0.events.iter().zip(t1.events.iter()).any(|(e0,e1)| { + classifier.get_class_identity(e0) != classifier.get_class_identity(e1) + }) + ) + } else { + false + }) + } + #[test] + fn test_split(){ + let log = event_log!( + ["a", "b", "c", "d"], + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let expected_log = event_log!( + ["a", "b", "c"], + ["d"], + ["d"], + ["a", "b"], + ["a", "d", "c"], + ["b", "c"], + ["d"] + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + + } + + #[test] + fn strict_tau_loop_simple_split() { + let log = event_log!( + ["a", "b", "c", "a", "c"], // contains c (end) followed by a (start) -> split + ); + + // Splitting at c|a -> two traces: "a b c" and "a d" + // L.len() = 1, L1.len() = 2 => strict tau-loop discovered + let expected_log = event_log!( + ["a", "b", "c"], + ["a", "c"], + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + } + + + #[test] + fn strict_tau_multiple_splits_in_trace() { + let log = event_log!( + // start set will contain "a" (first event of every trace if all traces start with a), + // end set will contain "c" (last events), + // here we have "... c a ... c a ..." -> two splits -> three traces after split + ["a", "b", "c", "a", "b", "c", "a", "b", "c"], + ); + + // Splits at each c|a produce three identical traces "a b c" + // L.len() = 1, L1.len() = 3 => tau-loop discovered + let expected_log = event_log!( + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + + } + + #[test] + fn strict_tau_no_split() { + let log = event_log!( + ["a", "b", "c"], // starts with a, ends with c + ["d", "e"], // starts with d, ends with e + ["f", "g", "h"] // starts with f, ends with h + ); + + // start set = {a, d, f}, end set = {c, e, h} + // There is no occurrence inside any trace of (c|e|h) followed immediately by (a|d|f) + // => L1.len() == L.len() -> no tau-loop found + let expected_log = event_log!( + ["a", "b", "c"], + ["d", "e"], + ["f", "g", "h"] + ); + + if let Return(log) = strict_tau_loop(log, &EventLogClassifier::default()){ + assert_eq!(log, expected_log); + } + + } + + #[test] + fn strict_tau_start_end_overlap() { + let log = event_log!( + ["a", "b", "a", "c", "a"], // start set contains "a", end set contains "a" + ["c", "d"] // trivial trace starting and ending with a + ); + let expected_log = event_log!( + ["a", "b", "a"], // prefix up to first split + ["c", "a"], // remainder after that split + ["c", "d"], // original second trace unchanged + ); + + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + + } + + #[test] + fn strict_tau_single_trace_to_many() { + let log = event_log!( + ["x", "a", "b", "a", "x", "y", "a"], // suppose start set includes x and end set includes a + ); + let expected_log = event_log!( + ["x", "a", "b", "a"], + ["x", "y", "a"], + ); + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + } + + + // 7) Edge case: traces of length 1 where start==end; adjacent repetition inside a longer trace causes multiple tiny splits + #[test] + fn strict_tau_length_one_traces_and_adjacent_repeats() { + let log = event_log!( + ["a"], // start/end = a + ["a", "a", "b", "a", "a"], // many a|a adjacencies + ); + + // start set = {a}, end set = {a, a} => {a} + // split at every a|a adjacency inside second trace -> many fragments + // One reasonable expected L1 (fragmenting around adjacent a's) could be: + let expected_log = event_log!( + ["a"], // first trace unchanged + ["a"], // fragment from leading 'a' in second trace + ["a", "b", "a"], // middle fragment + ["a"], // trailing fragment + ); + cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log); + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs new file mode 100644 index 0000000..1cc1bec --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs @@ -0,0 +1,149 @@ +//! Tau loop fallthrough detection utilities. +//! +//! This module implements the **tau loop fallthrough** used by the Inductive Miner. +//! A tau loop is assumed when a trace appears to restart without an explicit visible transition +//! between the end of one iteration and the beginning of the next. + + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::Node; +use crate::core::process_models::process_tree::OperatorType::Loop; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::Return; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters; +use crate::EventLog; + +/// Splits the event log according to the semantics of the `tau_loop` fall-through. +/// +/// Each trace is split at every occurrence of a *start activity* +/// Whenever a start activity appears and the current subtrace is +/// non-empty, a new trace is created. +/// +/// Empty traces are not inserted into the resulting log +/// +/// # Returns +/// A new 'Eventlog' in which traces are split at occurrences of start activities. +/// The total number of traces may increase +/// if loop behavior is detected. + +fn split_log_according_to_tau_loop(log: EventLog, classifier: &EventLogClassifier) -> EventLog{ + // simply split a trace at the occurrence of any starting activity + let dfg = discover_dfg_with_classifier(&log, classifier); + let mut result_log = log.clone_without_traces(); + + + for trace in log.traces{ + let mut new_trace = trace.clone_without_events(); + + + for event in trace.events{ + let activity = classifier.get_class_identity(&event); + + + // check condition + if dfg.start_activities.contains(&activity) && !new_trace.events.is_empty(){ + // condition satisfied, this activity is a start activity + let help_trace = new_trace.clone_without_events(); + result_log.traces.push(new_trace); + new_trace = help_trace; + } + + new_trace.events.push(event); + } + + // if the trace hasn't been pushed, we need to push it now, but exclude empty traces + if !new_trace.events.is_empty(){ + result_log.traces.push(new_trace); + } + } + // we need to iterate through the entire log and split a trace if after an end activity an start activity appears + result_log +} + +/// Attempts to apply the 'tau_loop' fallthrough. +/// +/// The algorithm first splits the log using [split_log_according_to_tau_loop]. +/// If this operation increases the number of traces, it indicates that traces contained implicit +/// restarts. In that case, a loop operator is created where: +/// +/// - the **do part** represent one iteration of the process +/// - the **redo part** is a silent transition (tau) +/// +/// # Returns +/// - [Fallthrough::TauLoop] if the log split indicates loop behavior +/// - [Fallthrough::Return] if the log split indicates no loop behavior +fn tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough { + let k = log.traces.len(); + let log = split_log_according_to_tau_loop(log, classifier); + + if k < log.traces.len(){ + + let mut node = Node::new_operator(Loop); + node.add_child(Node::new_leaf(None)); // placeholder transition, will be replaced + node.add_child(Node::new_leaf(None)); // silent transition as redo part + Fallthrough::TauLoop( + // first return a process node with the required structure + node, + log + ) + } else if k > log.traces.len(){ + panic!("Original log contains more traces, than the log split according to strict tau.") + }else { + // default return + Return(log) + } + +} + +/// Public wrapper for [`tau_loop`]. +/// +/// This function simply forwards its arguments to +/// `tau_loop` and exists for consistency +/// with other fall-through detection wrappers. +pub fn tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _:&Parameters) -> Fallthrough { + tau_loop(log, classifier) +} + + +#[cfg(test)] +mod test_tau_loop{ + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::TauLoop; + use crate::discovery::case_centric::inductive_miner_app::fallthrough::tau_loop::tau_loop; + use crate::{event_log, EventLog}; + + fn equal_events(log: &EventLog, o_log: &EventLog, classifier: &EventLogClassifier) -> bool { + log.traces.len() == o_log.traces.len() && !log.traces.iter().zip(o_log.traces.iter()).any(|(t, o)| { + t.events.len() != o.events.len() || t.events.iter().zip(o.events.iter()).any(|(e0,e1)| { + classifier.get_class_identity(e0) != classifier.get_class_identity(e1) + }) + }) + } + #[test] + fn test_split(){ + let log = event_log!( + ["a", "b", "c", "d"], // here i removed the 'd' + ["d", "a", "b"], + ["a", "d", "c"], + ["b", "c", "d"], + ); + + let expected_log = event_log!( + ["a"], + ["b", "c"], + ["d"], + ["d"], + ["a"], + ["b"], + ["a"], + ["d", "c"], + ["b", "c"], + ["d"] + ); + + let TauLoop(_node, log)= tau_loop(log, &EventLogClassifier::default()) else { return assert!(false);}; + + assert!(equal_events(&log, &expected_log, &EventLogClassifier::default())); + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs new file mode 100644 index 0000000..76aee79 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs @@ -0,0 +1,264 @@ +//! inductive miner discovery algorithm + +use base_cases::{find_base_case, BaseCases}; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::{Node, ProcessTree}; +use crate::discovery::case_centric::dfg::discover_dfg_with_classifier; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::find_cut; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::apply_fallthrough; +use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough; +use crate::discovery::case_centric::inductive_miner_app::splits::perform_split; +use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters}; +use crate::EventLog; + +mod cut_finder; +mod structures; +mod splits; +mod fallthrough; +mod base_cases; + + +/// Mines a process tree from the given event log using the Inductive Miner +/// with default parameter settings. +/// +/// This function initializes the default mining parameters, recursively +/// builds the process tree, and applies post-processing (folding) +/// if configured in the parameters. +/// +/// # Parameters +/// - `log`: The event log to mine. +/// - `event_log_classifier`: Classifier used to determine activity identities. +/// +/// # Returns +/// The root `ProcessNode` of the discovered process tree. +pub fn inductive_miner_default_parameters(log: EventLog, event_log_classifier: &EventLogClassifier) -> ProcessTree { + // uses default parameters while for mining the process tree model + let parameters = Parameter::generate_default_parameters(); + let node = build_tree(log, event_log_classifier, ¶meters, 0); + ProcessTree::new(node).fold() +} + + +/// Converts a detected cut into a corresponding process tree node. +/// +/// The event log is split according to the cut ([`perform_split`]), and for each resulting +/// sub-log the Inductive Miner is recursively applied. The resulting +/// subtrees become the children of a new process node labeled with +/// the cut's operator. +/// +/// # Parameters +/// - `cut`: The detected cut. +/// - `event_log_classifier`: Activity classifier. +/// - `log`: The event log to split. +/// - `parameters`: Mining parameters. +/// - `depth`: Current recursion depth - debug reasons +/// +/// # Returns +/// A `ProcessNode` representing the cut and its recursively mined children. +fn convert_cut_to_process_node<'a>(cut: Cut<'a>, event_log_classifier: &EventLogClassifier, log: EventLog, parameters: &Parameters, depth: usize) -> Node { + // extract operator and split the original event log + let operator = cut.get_operator(); + let split = perform_split(&log, event_log_classifier, cut); + + // acquire ownership of the split vector + let split = split.get_own(); + + // create new node + let mut cut_node = Node::new_operator(operator); + + // this could be done in parallel + for log in split{ + cut_node.add_child(build_tree(log, &event_log_classifier, parameters,depth +1)); + } + + // return new process node + cut_node +} + +/// Applies fallthrough strategies ([`apply_fallthrough`]) if no valid cut can be found. +/// +/// +/// Fallthroughs ensure that a process tree can always be constructed, +/// even if the log does not yield a structured cut. Depending on the +/// detected pattern, additional recursive mining steps may be performed. +/// +/// # Parameters +/// - `log`: The event log. +/// - `event_log_classifier`: Activity classifier. +/// - `parameters`: Mining parameters. +/// - `depth`: Current recursion depth - debug reasons +/// +/// # Returns +/// A `ProcessNode` representing the fallthrough model. +fn fallthrough_finder(log: EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters, depth: usize) -> Node { + // default fallthrough + // We are getting a guaranteed fallthrough, default is flower model + match apply_fallthrough(log, event_log_classifier, parameters){ + #[allow(unused_mut)] + Fallthrough::EmptyTraces(mut node, log) | + Fallthrough::ActivityOncePerTrace(mut node, log) => { + node.add_child(build_tree(log, &event_log_classifier, parameters,depth+1)); + node + } + #[allow(unused_mut)] + Fallthrough::StrictTauLoop(mut node, log) | + Fallthrough::TauLoop(mut node, log) => { + if let Node::Operator(op) = &mut node{ + // replace the placeholder node at index 0 + op.children[0] = build_tree(log, event_log_classifier, parameters,depth+1); + } else { + panic!("TauLoop node is not an operator node.") + } + node + } + Fallthrough::ActivityConcurrent(mut node, filtered_out_log, split) => { + // The AND-node already holds the concurrent activity as its first child. + // Build the sub-tree for the extracted (concurrent) activity. + node.add_child(build_tree(filtered_out_log, event_log_classifier, parameters, depth + 1)); + + // The split was already performed inside the fallthrough; add each + // resulting sub-log as a further grand-children + let operator_type = split.get_operator().clone(); + let split = split.get_own(); + let mut child = Node::new_operator(operator_type); + for log in split { + child.add_child(build_tree(log, event_log_classifier, parameters, depth + 1)); + } + node.add_child(child); + node + } + Fallthrough::FlowerModel(node) => { node} // not much to do, this is the default + Fallthrough::Return(_) => { // THis point should not be reached at all, as the flower model is the default + panic!("Fallthrough::Return in build tree function - must not happen"); + } + } + +} + +/// Core recursive function of the Inductive Miner. +/// +/// The algorithm proceeds as follows: +/// 1. Check for base cases (empty log or single activity): [`find_base_case`] +/// 2. If none apply, construct the directly-follows graph (DFG) [`DirectlyFollowsGraph::create_from_log`] +/// 3. Attempt to find a valid cut.#: [`find_cut`] +/// 4. If a cut is found, split the log and recurse on each sub-log: [`convert_cut_to_process_node`] +/// 5. Otherwise, apply a fallthrough strategy: [`fallthrough_finder`] +/// +/// # Parameters +/// - `log`: The event log to mine. +/// - `event_log_classifier`: Activity classifier. +/// - `parameters`: Mining parameters. +/// - `depth`: Current recursion depth. +/// +/// # Returns +/// The root `ProcessNode` of the mined (sub)tree. +pub fn build_tree(log: EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters, depth: usize) -> Node{ + match find_base_case(&log, event_log_classifier){ + BaseCases::None => { + let dfg = discover_dfg_with_classifier(&log, event_log_classifier); + let cut = find_cut(&dfg, &log, event_log_classifier, parameters); // find cut, if there is some + if cut.is_some(){ + convert_cut_to_process_node(cut.unwrap(), event_log_classifier, log, parameters, depth) + } else { + fallthrough_finder(log, event_log_classifier, parameters, depth) + } + } + BaseCases::Empty => { + Node::new_leaf(None) + } + BaseCases::SingleActivity(activity) => { + Node::new_leaf(Some(activity)) + } + } + +} + + +#[cfg(test)] +mod tests { + + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::process_tree::Node; + use crate::core::process_models::process_tree::OperatorType::{ExclusiveChoice, Loop}; + use crate::discovery::case_centric::inductive_miner_app::inductive_miner_default_parameters; + use crate::event_log; + + #[test] + fn test_works_without_panic() { + let log = event_log!( + ["a", "b", "c", "d"], + ["a", "b", "c", "d", "e", "a", "b", "c", "d"], + ); + let event_log_classifier = EventLogClassifier::default(); + + let node = inductive_miner_default_parameters(log, &event_log_classifier); + assert!(node.is_valid()); + } + + #[test] + fn test_loop_over_same_activity(){ + let log = event_log!(["a", "a"]); + + + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + let mut expected = Node::new_operator(Loop); + expected.add_child(Node::new_leaf(Some(String::from("a")))); + expected.add_child(Node::new_leaf(None)); + + assert!(node.is_valid()); + assert_eq!(node.root, expected); + } + + #[test] + fn test_complex_log(){ + let log = event_log![ + ["a", "b", "d"], + ["a", "d", "b"], + ["a", "b", "c", "a", "b"], + ["a", "d", "c", "a", "d"], + ["a", "b", "d", "c", "a", "d", "b"], + ["a", "d", "b", "c", "a", "b", "d"], + ]; + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + assert!(node.is_valid()) + } + + + #[test] + fn test_loop_over_same_activity_with_empty_trace(){ + let log = event_log!( + [], + ["a", "a"], + ); + + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + let mut expected_sub = Node::new_operator(Loop); + expected_sub.add_child(Node::new_leaf(Some(String::from("a")))); + expected_sub.add_child(Node::new_leaf(None)); + + let mut expected = Node::new_operator(ExclusiveChoice); + expected.add_child(Node::new_leaf(None)); + expected.add_child(expected_sub); + + + assert!(node.is_valid()); + assert_eq!(node.root, expected); + } + + #[test] + fn test_empty_trace_plus_base_case(){ + let log = event_log!(["a"],[]); + let node = inductive_miner_default_parameters(log, &EventLogClassifier::default()); + + let mut expected = Node::new_operator(ExclusiveChoice); + expected.add_child(Node::new_leaf(None)); + expected.add_child(Node::new_leaf(Some(String::from("a")))); + + assert!(node.is_valid()); + assert_eq!(node.root, expected); + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs new file mode 100644 index 0000000..4e94cdb --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs @@ -0,0 +1,127 @@ +//! Utility for splitting an event log according to a concurrency split. +use std::borrow::Cow; +use std::collections::HashSet; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType::Concurrency; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// Splits an event log according to the partitions of an AND-Cut (concurrency cut). +/// +/// For each partition of the cut a new sub log is created, the traces belonging to these sublogs are retained events of the original trace, +/// those are filtered s.t. only events whose activity belongs to the partition are retained. +/// +/// The result is a vector of sub-logs, one per partition, that together form +/// the split required for recursive process tree discovery. +/// +/// # Returns +/// Some(split) if the cut struct is a valid and cut +/// None if the cut is not a valid and cut +/// +/// +/// # Notes +/// - event order within traces is preserved +/// - empty traces may occur if a trace contains no events from a partition +pub fn and_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { + + // only perform split if the cut is of the type concurrent + if cut.get_operator() != Concurrency{ + return None; + } + + // result vector containing sub logs + let mut result: Vec = Vec::new(); + // the found partitions of the cut + let partitions: Vec>> = cut.get_own(); + + for partition in partitions.into_iter(){ + let mut new_log = log.clone_without_traces(); + + for trace in & log.traces{ + let mut new_trace = trace.clone_without_events(); + + for event in trace.events.iter(){ + let activity = activity_classifier.get_class_identity(event); + if partition.contains(activity.as_str()){ + new_trace.events.push(event.clone()); + } + } + new_log.traces.push(new_trace); + } + + result.push(new_log); + } + Some(Split::new(Concurrency, result)) +} + + +#[cfg(test)] +mod test_and_split{ + use crate::core::chrono::Utc; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::concurrent_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split; + use crate::event_log; + + #[test] + fn test_simple_and_cut_and_split(){ + let time = Utc::now(); // need same timestamp attributes + let test_log = event_log!( + ["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}, "C"; {"time:timestamp" => time.clone()}], + ["A"; {"time:timestamp" => time.clone()}, "C"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], + ["C"; {"time:timestamp" => time.clone()}, "A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], + ); + + let dfg = DirectlyFollowsGraph::discover(&test_log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + println!("{:?}", cut); + let split = and_split(&test_log,&EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap().get_own(); + println!("{}", split.len()); + + let log1 = event_log!(["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], ["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], ["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}]); + let log2 = event_log!(["C"; {"time:timestamp" => time.clone()}], ["C"; {"time:timestamp" => time.clone()}], ["C"; {"time:timestamp" => time.clone()}]); + + let mut b1 = false; + let mut b2 = false; + + + for log in split{ + if log == log1 && !b1{ + b1 = true; + } else if log == log2 && !b2{ + b2 = true; + } else { + assert!(false); + } + } + } + + #[test] + fn test(){ + let test_log = event_log!([], ["A", "B"], ["B", "A"]); + let dfg = DirectlyFollowsGraph::discover(&test_log); + let cut = concurrent_cut_wrapper(&dfg, None); + assert!(cut.is_some()); + let split = and_split(&test_log,&EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap().get_own(); + + + for log in split.into_iter().enumerate(){ + println!("Log: {}", log.0); + for t in log.1.traces.into_iter().enumerate(){ + println!("trace{}", t.0); + for e in t.1.events.into_iter().enumerate(){ + println!(" {}", e.0); + } + + } + } + + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs new file mode 100644 index 0000000..bcc823c --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs @@ -0,0 +1,110 @@ +//! Utility for splitting an event log according to an exclusive choice cut. + +use std::collections::HashMap; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// This functions splits an event log according to a provided valid xor cut. +pub fn xor_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { + if cut.get_operator() != ExclusiveChoice || cut.is_empty() { + return None; + } + + let k = cut.len(); + let partition = cut.get_own(); + let mut result: Vec = vec![log.clone_without_traces(); k]; + + todo!(); + + Some(Split::new(ExclusiveChoice, result)) +} + +#[cfg(test)] +mod tests_xor_split{ + use std::collections::HashSet; + use crate::core::chrono::Utc; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::exclusive_choice::xor_split; + use crate::event_log; + + #[test] + fn test_basic(){ + let log = event_log!( + ["A", "A", "B", "e"], + ["C", "D"] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + + assert!(cut.is_some()); + let cut = cut.unwrap(); + + let x = xor_split(&log, &EventLogClassifier::default(), cut); + assert!(x.is_some()); + let x = x.unwrap(); + assert_eq!(x.sub_logs.len(), 2); + } + + #[test] + fn test_only_empty_traces_and_cut(){ + let log = event_log!( + [], + [] + ); + + let mut cut = Vec::new(); + cut.push(HashSet::new()); + cut.push(HashSet::new()); + cut.push(HashSet::new()); + let cut = Cut::new(ExclusiveChoice, cut); + let x = xor_split(&log, &EventLogClassifier::default(), cut); + assert!(x.is_some()); + let x = x.unwrap().get_own(); + assert_eq!(x.len(), 3); // exactly 3 sublogs + for log in x{ + // each sublog has exactly 2 empty logs + assert_eq!(log.traces.len(), 2); + for trace in log.traces{ + assert!(trace.events.is_empty()) + } + } + } + + #[test] + fn test_leeman_example(){ + let time = Utc::now(); + let log = event_log!( + ["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}], + ["C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = exclusive_choice_cut_wrapper(&dfg); + assert!(cut.is_some()); + let cut = cut.unwrap(); + let x = xor_split(&log, &EventLogClassifier::default(), cut); + assert!(x.is_some()); + let x = x.unwrap().get_own(); + assert_eq!(x.len(), 2); + for log in x{ + if log.traces.len() == 1{ + if log.traces[0].events.len() == 2{ + assert_eq!(log, event_log!(["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}] {"concept:name" => 0},)); + } else { + assert_eq!(log, event_log!(["C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}]{"concept:name" => 1})); + } + } else { + // if there is not exactly one trace per log, sth is really wrong + assert!(false); + } + } + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs new file mode 100644 index 0000000..867ef8e --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs @@ -0,0 +1,83 @@ +//! This module contains utilities for splitting an event log according to either exclusive choice, +//! sequence, loop or concurrency cut. + +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split; +use crate::discovery::case_centric::inductive_miner_app::splits::exclusive_choice::xor_split; +use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split; +use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +mod concurrency; +mod sequence; +mod exclusive_choice; +mod redo_loop; +pub mod split; + + +/// A wrapper for the actual split function. +/// +/// This function simply forwards its arguments to [`splitting`]. +/// +/// # Panic +/// This function panics if the provided cut somehow could not be handled by the splitting algorithm, +/// this should only be the case iff the operator of the cut finds no split operator. +pub fn perform_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Split{ + if let Some(split) = splitting(log, classifier, cut) { + split + } else { + panic!("No split function found for the cut operator.") + } + +} + + +/// Core Split function matching the cut operator to the matching split function. +/// +/// [`xor_split`] +/// +/// [`sequence_split`] +/// +/// [`and_split`] +/// +/// [`loop_split`] +fn splitting<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Option{ + // match the operator and perform the matching split + match cut.get_operator() { + OperatorType::ExclusiveChoice => { + xor_split(log, classifier, cut) + } + OperatorType::Sequence => { + sequence_split(log, classifier, cut) + } + OperatorType::Concurrency => { + and_split(log, classifier, cut) + } + OperatorType::Loop => { + loop_split(log, classifier, cut) + } + } +} + +#[cfg(test)] +mod test_splits{ + use std::collections::HashSet; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; + use crate::event_log; + + #[test] + fn test_sequence_split() { + let log = event_log!(["a", "b", "c", "d"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let split = sequence_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs new file mode 100644 index 0000000..40928bc --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs @@ -0,0 +1,135 @@ +//! Utility for splitting a log according to a loop cut + +use std::collections::HashMap; +use crate::EventLog; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType::Loop; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; + +/// Splits an event log according to the partition of a Loop-cut. +pub fn loop_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Option { + if Loop != cut.get_operator() { + return None; + } + + let k = cut.len(); + let mut result: Vec = vec![log.clone_without_traces(); k]; + let partitions = cut.get_own(); + + todo!(); + + Some(Split::new(Loop, result)) +} + +#[cfg(test)] +mod test_loop_split { + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::loop_cut::redo_loop_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split; + use crate::event_log; + use crate::EventLog; + + fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: EventLogClassifier) -> bool { + if log.traces.len() == o_log.traces.len() { + for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) { + if t0.events.len() == t1.events.len() { + for (e0,e1) in t0.events.iter().zip(t1.events.iter()) { + let a0 = event_log_classifier.get_class_identity(e0); + let a1 = event_log_classifier.get_class_identity(e1); + if a0 != a1 { + return false; + } + } + } + } + return true; + } + false + } + #[test] + fn test_loop_split_leemans_example() { + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "b", "c", "a", "b", "c", "a", "b"] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut_wrapper(&dfg); + assert!(cut.is_some()); + let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap(); + assert_eq!(split.sub_logs.len(), 2); + + // created expected event logs + let do_log = event_log!( + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "b"] + ); + + let redo_log = event_log!(["c"], ["c"], ["c"]); + + for log in split.get_own() { + if log.traces.len() == 6 { + // expected length of 6 + assert!(events_equal(&log, &do_log, EventLogClassifier::default())); + } else if log.traces.len() == 3 { + // expected length of 3 + assert!(events_equal(&log, &redo_log, EventLogClassifier::default())); + } else { + assert!(false); + } + } + } + + #[test] + fn test_more_complex_loop() { + let log = event_log!( + ["a", "b"], + ["a", "b", "c", "a", "b"], + ["a", "d", "b"], + ["a", "d", "b", "c", "a", "d", "b"], + ["a", "d", "b", "c", "a", "b"] + ); + + let do_log = event_log!( + ["a", "b"], + ["a", "b"], + ["a", "b"], + ["a", "d", "b"], + ["a", "d", "b"], + ["a", "d", "b"], + ["a", "d", "b"], + ["a", "b"] + ); + + let redo_log = event_log!(["c"], ["c"], ["c"]); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = redo_loop_cut_wrapper(&dfg); + assert!(cut.is_some()); + let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + let split = split.unwrap(); + assert_eq!(split.sub_logs.len(), 2); + + for log in split.get_own() { + if log.traces.len() == do_log.traces.len() { + // expected length of 6 + assert!(events_equal(&log, &do_log, EventLogClassifier::default())); + } else if log.traces.len() == redo_log.traces.len() { + // expected length of 3 + assert!(events_equal(&log, &redo_log, EventLogClassifier::default())); + } else { + assert!(false); + } + } + } +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs new file mode 100644 index 0000000..184cea1 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs @@ -0,0 +1,140 @@ +//! Utility for resolving sequence cuts into sequence splits. +//! +//! +//! # Reference: +//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.: +//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach." +//! Application of Concurrency to System Design (ACSD), 2013. +//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven +//! University of Technology, 09.05.2017 + +use std::borrow::Cow; +use crate::core::event_data::case_centric::EventLogClassifier; +use crate::core::process_models::process_tree::OperatorType::Sequence; +use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut; +use crate::discovery::case_centric::inductive_miner_app::splits::split::Split; +use crate::EventLog; + +/// Splits an event log according to the partitions of a sequence cut. +/// +/// # Returns +/// - Some(Split) containing as many logs as the number of partitions in the split. +/// - None if the cut was not a sequence cut nor valid +pub fn sequence_split<'a>( + log: &EventLog, + activity_classifier: &EventLogClassifier, + cut: Cut<'a>, +) -> Option { + if cut.get_operator() != Sequence { + return None; + } + + let k = cut.len(); + let partitions = cut.get_own(); + + // Create k empty sublogs + let mut result: Vec = vec![log.clone_without_traces(); k]; + + todo!(); + + Some(Split::new(Sequence, result)) +} + +#[cfg(test)] +mod test_sequence_split { + use std::collections::HashSet; + use crate::core::chrono::Utc; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::core::process_models::dfg::DirectlyFollowsGraph; + use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper; + use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split; + use crate::event_log; + + #[test] + fn test_sequence_split() { + let time = Utc::now(); + let log = event_log!( + ["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}], + ["b"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}] + ); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let cut = cut.unwrap(); + + let split = sequence_split(&log, &EventLogClassifier::default(), cut); + assert!(split.is_some()); + + let split = split.unwrap().get_own(); + // to the actual split + let log1 = event_log!(["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}], ["b"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}]); + let log2 = event_log!(["c"; {"time:timestamp" => time.clone()}], ["c"; {"time:timestamp" => time.clone()}]); + + let mut b1 = false; + let mut b2 = false; + for log in split { + // make certain every log is only compared one time, as we don't know the order + if log == log1 && !b1 { + b1 = true; + } else if log == log2 && !b2 { + b2 = true; + } else { + // no matching log or multiple matchings -> immediately false + assert!(false); + } + } + } + + #[test] + fn test_sequence_split2() { + // this log contains a sequence cut, as b or c never reach an "a" + let time = Utc::now(); + let log = event_log!( + ["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}], + ["a"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}] + ); + // we cut this log and sepreate the "a"s from "b's" and "c's" + // after definition the resulting sublogs contain only those elements which are also in the partition + // create expected logs + let log0 = event_log!(["b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}], ["c"; {"time:timestamp" => time.clone()}]); + let log1 = event_log!(["a"; {"time:timestamp" => time.clone()}], ["a"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}]); + + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let cut = cut.unwrap(); + let split = sequence_split(&log, &EventLogClassifier::default(), cut); + assert!(split.is_some()); + let split = split.unwrap().get_own(); + + assert_eq!(split.len(), 2); + + // check that both resulting logs match the expected sequence of activities + let mut b0 = false; + let mut b1 = false; + for log in split { + if log == log0 && !b0 { + b0 = true; + } else if log == log1 && !b1 { + b1 = true; + } + } + assert!(b1); + assert!(b0); + } + + + #[test] + fn test_sequence_split3() { + let log = event_log!(["a", "b", "c", "d"]); + let dfg = DirectlyFollowsGraph::discover(&log); + let cut = sequence_cut_wrapper(&dfg, &HashSet::new()); + assert!(cut.is_some()); + let split = sequence_split(&log, &EventLogClassifier::default(), cut.unwrap()); + assert!(split.is_some()); + + } + + +} diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs new file mode 100644 index 0000000..4921425 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs @@ -0,0 +1,29 @@ +//! This module contains the basic split used for representing found splits in the Inductive Miner Algorithm. +use crate::core::process_models::process_tree::OperatorType; +use crate::EventLog; + + +/// Helper struct to aggregate the returns of splitting algorithms. +/// +/// # Parameters +/// - 'operator' : ['ImOperator'] defining the split type +/// - 'sub_logs': a vector containing all new logs +pub struct Split{ + pub operator: OperatorType, + pub sub_logs: Vec, +} + +impl Split{ + pub fn new(operator: OperatorType, sub_logs: Vec) -> Split{ + Self{operator, sub_logs} + } + + pub fn get_own(self) -> Vec{ + self.sub_logs + } + + pub fn get_operator(&self) -> OperatorType { + self.operator + } + +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs new file mode 100644 index 0000000..8521cb8 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs @@ -0,0 +1,285 @@ +//! Utilities for determining the minimum self distance of a given activity in a given trace +//! or activity sequence. + +use crate::EventLog; +use std::collections::{HashMap, HashSet}; +use crate::core::event_data::case_centric::{EventLogClassifier, Trace}; +use crate::core::process_models::dfg::Activity; + +type Index = usize; +type MinDist = usize; +type InterveningSet = HashSet; + +/// Stores for every activity, its minimum self-distance and the set of activities occurring +/// between two minium-distance instances of that activity. +pub struct MinimumSelfDistance { + minimum_distance_relation: HashMap)>, +} + +impl MinimumSelfDistance { + + /// Constructs the new minimum self-distance relation from a given log and classifier. + pub fn new( + log: &EventLog, event_log_classifier: &EventLogClassifier) -> MinimumSelfDistance { + Self{minimum_distance_relation: Self::minimum_distances_interleave(log, event_log_classifier)} + } + + + /// Returns the minimum self-distance for a given activity and the set of activities occurring + /// between two minimum-distance instances of that activity. + pub fn get_minimum_distance(&self, activity: &str) -> Option<&(MinDist, HashSet)> { + self.minimum_distance_relation.get(activity) + } + + /// Computes minimum self-distances for all activities within a single trace. + /// + /// For each activity, the minimum number of events between two consecutive + /// executions is determined, together with the set of intervening activities + /// observed at that minimum distance. + + fn extract_interleaving_activities( + start: Index, + end: Index, + trace: &Trace, + event_log_classifier: &EventLogClassifier, + ) -> HashSet { + let mut interleaving_activities = HashSet::new(); + for i in start + 1..end { + if let Some(event) = trace.events.get(i) { + interleaving_activities.insert(event_log_classifier.get_class_identity(event)); + } + } + + interleaving_activities + } + + /// Two activities 'a' and 'b' are in a minimum distance relation iff 'b' appears between two + /// minimum distance executions of a. + /// This function evaluates the minimum distance between two executions of an activity and + /// count the appearing activities. + /// + /// + /// This function calculates the minimum distance relation of every activity. + fn minimum_distances_trace( + trace: &Trace, + event_log_classifier: &EventLogClassifier, + ) -> HashMap { + let mut last_seen: HashMap = HashMap::new(); + let mut results: HashMap = HashMap::new(); + for (index, event) in trace.events.iter().enumerate() { + let activity = event_log_classifier.get_class_identity(event); + if let Some(last_index) = last_seen.get(&activity) { + // calculate distance between the two indexes + let dist = index - *last_index - 1; + if let Some((prev_dist, acts)) = results.get_mut(&activity) { + if *prev_dist > dist { + // previous distance is smaller than the current, so it can't be minimum + *prev_dist = dist; + *acts = Self::extract_interleaving_activities( + *last_index, + index, + trace, + event_log_classifier, + ); + } else if *prev_dist == dist { + acts.extend(Self::extract_interleaving_activities( + *last_index, + index, + trace, + event_log_classifier, + )); + } + // skip, the distance is greater than the one we got previously + } else { + // the first time we found a loop + results.insert( + activity.clone(), // clone as we need to update activity later + ( + dist, + Self::extract_interleaving_activities( + *last_index, + index, + trace, + event_log_classifier, + ), + ), + ); + } + + + } + // update the last seen index of this activity + last_seen.insert(activity, index); + + } + results + } + + /// Aggregates minimum self-distance information over all traces in the log. + /// + /// For each activity, the globally smallest self-distance is retained and + /// the intervening activity sets for equal minimum distances are merged. + fn minimum_distances_interleave(log: &EventLog, event_log_classifier: &EventLogClassifier) -> HashMap { + let mut results: HashMap = HashMap::new(); + + // Go through every trace + for trace in log.traces.iter(){ + for (activity, (dist, interleaving_acts)) in Self::minimum_distances_trace(trace, event_log_classifier) { + if let Some(( min_dist, interleaving_set)) = results.get_mut(&activity) { + if *min_dist > dist{ + *min_dist = dist; + *interleaving_set = interleaving_acts; + } else if *min_dist == dist { + interleaving_set.extend(interleaving_acts); + } else { + // skip if the new distance is greater tan the already saved distance + } + } else { + results.insert(activity, (dist, interleaving_acts)); + } + } + } + results + } +} + + +#[cfg(test)] +mod test_min_dist{ + use std::collections::HashSet; + use crate::core::event_data::case_centric::EventLogClassifier; + use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance; + use crate::{event_log, trace}; + + #[test] + fn test_extract_interleaving_activities() { + let t = trace!("a", "b", "c", "d", "e", "f"); + let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); + assert_eq!(s, HashSet::from(["b".into(), "c".into(), "d".into(), "e".into(), "f".into()])); + } + #[test] + fn test_extract_from_empty_trace() { + let t = trace!(); + let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default()); + assert!(s.is_empty());} + + // ------------ Tests using binary events + #[test] + fn test_one_loop_distance() { + let t = trace!("a", "b", "a"); + + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert!(r.get("a").unwrap().1.contains("b")); + } + + #[test] + fn test_loop_zero_distance(){ + let t = trace!("a","a"); + + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + assert!(r.get("a").unwrap().1.is_empty()); + } + + #[test] + fn test_retrieve_smaller_later_loop(){ + let t = trace!("a", "b", "b", "a", "b", "b", "b", "a", "b", "a"); + + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert!(r.get("a").unwrap().1.contains("b")); + + // trivial, b should have 0 minimum self distance in this example + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 0); + assert!(r.get("b").unwrap().1.is_empty()); + } + + + // -------------------------------- Test using more than two different activities + + #[test] + fn test_complex_trace(){ + let t = trace!("a", "b", "d", "e", "a", "d", "g", "g", "d","b", "f", "a", "c"); + let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default()); + + // check if loops are contained + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 3); + assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "d".into(), "e".into()])); + + + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 7); + assert_eq!(r.get("b").unwrap().1, HashSet::from(["a".into(), "e".into(), "d".into(), "g".into()])); + + assert!(!r.contains_key("c")); + + // special case, because there are two loops with same minimum distance two + assert!(r.contains_key("d")); + assert_eq!(r.get("d").unwrap().0, 2); + // merged activities + assert_eq!(r.get("d").unwrap().1, HashSet::from(["e".into(), "a".into(), "g".into()])); + + + // not appearing twice + assert!(!r.contains_key("e")); + assert!(!r.contains_key("f")); + + // only one trace where g follows after g + assert!(r.contains_key("g")); + assert_eq!(r.get("g").unwrap().0, 0); + assert!(r.get("g").unwrap().1.is_empty()); + } + + + #[test] + fn test_empty_log(){ + let log = event_log!(); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.is_empty()); + } + + #[test] + fn test_zero_loops_log(){ + let log = event_log!(["a", "a"], ["b", "b"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + + assert!(r.contains_key("b")); + assert_eq!(r.get("b").unwrap().0, 0); + } + + #[test] + fn test_find_smaller_loop(){ + let log = event_log!(["a", "a"], ["a", "b", "a"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 0); + + assert!(!r.contains_key("b")); + } + + #[test] + fn test_merge_relations(){ + let log = event_log!(["a", "c", "a"], ["a", "b", "a"]); + let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default()); + + assert!(r.contains_key("a")); + assert_eq!(r.get("a").unwrap().0, 1); + assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "c".into()])); + } + + +} + + + diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs new file mode 100644 index 0000000..7a8aa5d --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs @@ -0,0 +1,3 @@ +//! This module contains additional structures needed for the implementation of the Inductive Miner. +pub mod parameter; +pub mod minimum_self_distance; \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs new file mode 100644 index 0000000..9746001 --- /dev/null +++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs @@ -0,0 +1,33 @@ +//! Parameter settings for controlling the inductive miner implementation +use std::collections::HashSet; + +/// A helper type aggregating parameters which user maybe want the inductive miner to adhere. +/// The Hashset is used, so that every parameter is unique +pub type Parameters = HashSet; + + + +/// Helper enum to express which option shall be activated in the inductive miner +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Parameter{ + MinimumSelfDistance, // consider minimum self distance while looking for concurrent cut + ApplyFallthrough, // apply fallthrough's (Flower Model will always be applied + //-------Ideas for additional parameters: + // Multiprocessing + // StrictSequenceCut, // apply strict sequence cut additionally to the 'ordinary' sequence cut + // FoldTree, // automatically fold tree +} + + + +impl Parameter{ + + /// Generate a Hashset containing all default parameters s.t.: + /// - Strict Sequence Cut is used + /// - Fallthrough's are being applied + /// - Minimum Self Distance is calculated and used during looking for a concurrent cut + /// - Resulting Tree is folded + pub fn generate_default_parameters() -> Parameters{ + HashSet::from([Parameter::MinimumSelfDistance, Parameter::ApplyFallthrough]) + } +} \ No newline at end of file diff --git a/process_mining/src/discovery/case_centric/mod.rs b/process_mining/src/discovery/case_centric/mod.rs index 8c44111..72193bd 100644 --- a/process_mining/src/discovery/case_centric/mod.rs +++ b/process_mining/src/discovery/case_centric/mod.rs @@ -2,4 +2,6 @@ pub mod alphappp; +pub mod inductive_miner_app; + pub mod dfg;