diff --git a/process_mining/.idea/.gitignore b/process_mining/.idea/.gitignore
new file mode 100644
index 0000000..ab1f416
--- /dev/null
+++ b/process_mining/.idea/.gitignore
@@ -0,0 +1,10 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Ignored default folder with query files
+/queries/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/process_mining/.idea/vcs.xml b/process_mining/.idea/vcs.xml
new file mode 100644
index 0000000..6c0b863
--- /dev/null
+++ b/process_mining/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs
index 543f39e..3d0b06b 100644
--- a/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs
+++ b/process_mining/src/core/process_models/case_centric/process_tree/process_tree_struct.rs
@@ -14,7 +14,7 @@ pub enum LeafLabel {
///
/// Node in a process tree
///
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub enum Node {
/// Operator node of a process tree
Operator(Operator),
@@ -64,12 +64,127 @@ impl Node {
Node::Leaf(_) => true,
}
}
+
+ /// Recursively folds this node by merging children that share the same
+ /// associative operator into the current node.
+ ///
+ /// The fold is applied **bottom-up**: children are folded first, and only
+ /// then the current node checks whether any of its (now-folded) children
+ /// can be inlined.
+ ///
+ /// **Example** – `SEQ(SEQ(a, b), c)` becomes `SEQ(a, b, c)`.
+ ///
+ /// Leaf nodes are returned unchanged.
+ pub fn fold(self) -> Self {
+ match self {
+ Node::Leaf(_) => self,
+ Node::Operator(op) => {
+ // Recursively fold all children first (bottom-up).
+ let folded_children: Vec =
+ op.children.into_iter().map(|child| child.fold()).collect();
+
+ // If the current operator is associative, inline any child
+ // that carries the same operator type.
+ let mut children = if op.operator_type.is_associative() {
+ let mut flattened = Vec::with_capacity(folded_children.len());
+ for child in folded_children {
+ match child {
+ Node::Operator(ref inner)
+ if inner.operator_type == op.operator_type =>
+ {
+ // Consume the child and move its children up.
+ if let Node::Operator(inner) = child {
+ flattened.extend(inner.children);
+ }
+ }
+ other => flattened.push(other),
+ }
+ }
+ flattened
+ } else {
+ folded_children
+ };
+
+ // XOR-specific: at most one tau (silent leaf) is semantically
+ // meaningful as a direct child. Remove duplicates introduced
+ // when EmptyTraces fallthrough shells are folded upward.
+ if op.operator_type == OperatorType::ExclusiveChoice {
+ let tau = Node::Leaf(Leaf { activity_label: LeafLabel::Tau });
+ let mut tau_seen = false;
+ children.retain(|c| {
+ if *c == tau {
+ if tau_seen { return false; }
+ tau_seen = true;
+ }
+ true
+ });
+ }
+
+ Node::Operator(Operator {
+ operator_type: op.operator_type,
+ children,
+ })
+ }
+ }
+ }
+}
+
+impl std::fmt::Display for OperatorType {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ OperatorType::Sequence => write!(f, "SEQ"),
+ OperatorType::ExclusiveChoice => write!(f, "XOR"),
+ OperatorType::Concurrency => write!(f, "AND"),
+ OperatorType::Loop => write!(f, "LOOP"),
+ }
+ }
+}
+
+impl std::fmt::Display for LeafLabel {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ LeafLabel::Activity(s) => write!(f, "{s}"),
+ LeafLabel::Tau => write!(f, "tau"),
+ }
+ }
+}
+
+impl std::fmt::Display for Leaf {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.activity_label)
+ }
+}
+
+impl std::fmt::Display for Node {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Node::Leaf(leaf) => write!(f, "{leaf}"),
+ Node::Operator(op) => write!(f, "{op}"),
+ }
+ }
+}
+
+impl std::fmt::Display for Operator {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}(", self.operator_type)?;
+ for (i, child) in self.children.iter().enumerate() {
+ if i > 0 { write!(f, ", ")?; }
+ write!(f, "{child}")?;
+ }
+ write!(f, ")")
+ }
+}
+
+impl std::fmt::Display for ProcessTree {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.root)
+ }
}
///
/// Operator type enum for [`Operator`]
///
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
pub enum OperatorType {
/// Sequence operator
Sequence,
@@ -81,6 +196,25 @@ pub enum OperatorType {
Loop,
}
+impl OperatorType {
+ /// Returns `true` if this operator is associative.
+ ///
+ /// The associative operators are [`Sequence`](OperatorType::Sequence),
+ /// [`ExclusiveChoice`](OperatorType::ExclusiveChoice), and
+ /// [`Concurrency`](OperatorType::Concurrency). The [`Loop`](OperatorType::Loop)
+ /// operator is **not** associative because its first child (the body) and
+ /// subsequent children (the redo / exit branches) carry different semantic
+ /// roles, so merging nested loops would change the language.
+ pub fn is_associative(self) -> bool {
+ matches!(
+ self,
+ OperatorType::Sequence
+ | OperatorType::ExclusiveChoice
+ | OperatorType::Concurrency
+ )
+ }
+}
+
///
/// Object-centric process tree struct that contains [`Node`] as root
///
@@ -98,6 +232,15 @@ impl ProcessTree {
Self { root }
}
+ /// Folds the process tree by merging nodes whose operator is associative.
+ ///
+ /// The fold is applied recursively bottom-up across the entire tree, so
+ /// arbitrarily deep chains of the same associative operator are fully
+ /// collapsed into a single flat node.
+ pub fn fold(self) -> Self {
+ ProcessTree::new(self.root.fold())
+ }
+
///
/// Returns `true` if all nodes have the right number of children and if all operators have
/// eventually descendants that are leaves.
@@ -176,6 +319,7 @@ impl ProcessTree {
/// An operator node in a process tree
///
#[derive(Debug, Serialize, Deserialize)]
+#[derive(PartialEq)]
pub struct Operator {
/// The [`OperatorType`] of the tree itself
pub operator_type: OperatorType,
@@ -221,6 +365,7 @@ impl Operator {
///
/// A leaf in a process tree
///
+#[derive(PartialEq)]
pub struct Leaf {
/// The silent or non-silent activity label [`LeafLabel`]
pub activity_label: LeafLabel,
@@ -250,6 +395,190 @@ mod tests {
Leaf, Node, Operator, OperatorType, ProcessTree,
};
+ // ── folding tests ────────────────────────────────────────────────────────
+
+ #[test]
+ fn fold_flat_sequence_unchanged() {
+ // SEQ(a, b, c) has no nested SEQ — the tree must be returned as-is.
+ let mut seq = Operator::new(OperatorType::Sequence);
+ seq.children.push(Node::new_leaf(Some("a".into())));
+ seq.children.push(Node::new_leaf(Some("b".into())));
+ seq.children.push(Node::new_leaf(Some("c".into())));
+ let pt = ProcessTree::new(Node::Operator(seq)).fold();
+
+ let mut expected = Operator::new(OperatorType::Sequence);
+ expected.children.push(Node::new_leaf(Some("a".into())));
+ expected.children.push(Node::new_leaf(Some("b".into())));
+ expected.children.push(Node::new_leaf(Some("c".into())));
+ assert_eq!(pt.root, Node::Operator(expected));
+ }
+
+ #[test]
+ fn fold_nested_sequence() {
+ // SEQ(SEQ(a, b), c) → SEQ(a, b, c)
+ let mut inner = Operator::new(OperatorType::Sequence);
+ inner.children.push(Node::new_leaf(Some("a".into())));
+ inner.children.push(Node::new_leaf(Some("b".into())));
+
+ let mut outer = Operator::new(OperatorType::Sequence);
+ outer.children.push(Node::Operator(inner));
+ outer.children.push(Node::new_leaf(Some("c".into())));
+
+ let pt = ProcessTree::new(Node::Operator(outer)).fold();
+
+ let mut expected = Operator::new(OperatorType::Sequence);
+ expected.children.push(Node::new_leaf(Some("a".into())));
+ expected.children.push(Node::new_leaf(Some("b".into())));
+ expected.children.push(Node::new_leaf(Some("c".into())));
+ assert_eq!(pt.root, Node::Operator(expected));
+ }
+
+ #[test]
+ fn fold_deeply_nested_sequence() {
+ // SEQ(SEQ(SEQ(a, b), c), d) → SEQ(a, b, c, d)
+ let mut innermost = Operator::new(OperatorType::Sequence);
+ innermost.children.push(Node::new_leaf(Some("a".into())));
+ innermost.children.push(Node::new_leaf(Some("b".into())));
+
+ let mut middle = Operator::new(OperatorType::Sequence);
+ middle.children.push(Node::Operator(innermost));
+ middle.children.push(Node::new_leaf(Some("c".into())));
+
+ let mut outer = Operator::new(OperatorType::Sequence);
+ outer.children.push(Node::Operator(middle));
+ outer.children.push(Node::new_leaf(Some("d".into())));
+
+ let pt = ProcessTree::new(Node::Operator(outer)).fold();
+
+ let mut expected = Operator::new(OperatorType::Sequence);
+ expected.children.push(Node::new_leaf(Some("a".into())));
+ expected.children.push(Node::new_leaf(Some("b".into())));
+ expected.children.push(Node::new_leaf(Some("c".into())));
+ expected.children.push(Node::new_leaf(Some("d".into())));
+ assert_eq!(pt.root, Node::Operator(expected));
+ }
+
+ #[test]
+ fn fold_xor_nested() {
+ // XOR(XOR(a, b), c) → XOR(a, b, c)
+ let mut inner = Operator::new(OperatorType::ExclusiveChoice);
+ inner.children.push(Node::new_leaf(Some("a".into())));
+ inner.children.push(Node::new_leaf(Some("b".into())));
+
+ let mut outer = Operator::new(OperatorType::ExclusiveChoice);
+ outer.children.push(Node::Operator(inner));
+ outer.children.push(Node::new_leaf(Some("c".into())));
+
+ let pt = ProcessTree::new(Node::Operator(outer)).fold();
+
+ let mut expected = Operator::new(OperatorType::ExclusiveChoice);
+ expected.children.push(Node::new_leaf(Some("a".into())));
+ expected.children.push(Node::new_leaf(Some("b".into())));
+ expected.children.push(Node::new_leaf(Some("c".into())));
+ assert_eq!(pt.root, Node::Operator(expected));
+ }
+
+ #[test]
+ fn fold_concurrency_nested() {
+ // AND(AND(a, b), c) → AND(a, b, c)
+ let mut inner = Operator::new(OperatorType::Concurrency);
+ inner.children.push(Node::new_leaf(Some("a".into())));
+ inner.children.push(Node::new_leaf(Some("b".into())));
+
+ let mut outer = Operator::new(OperatorType::Concurrency);
+ outer.children.push(Node::Operator(inner));
+ outer.children.push(Node::new_leaf(Some("c".into())));
+
+ let pt = ProcessTree::new(Node::Operator(outer)).fold();
+
+ let mut expected = Operator::new(OperatorType::Concurrency);
+ expected.children.push(Node::new_leaf(Some("a".into())));
+ expected.children.push(Node::new_leaf(Some("b".into())));
+ expected.children.push(Node::new_leaf(Some("c".into())));
+ assert_eq!(pt.root, Node::Operator(expected));
+ }
+
+ #[test]
+ fn fold_does_not_merge_different_operators() {
+ // SEQ(XOR(a, b), c) — different operator, must stay unchanged.
+ // Build two identical XOR nodes: one for the input, one for expected.
+ let make_inner = || {
+ let mut xor = Operator::new(OperatorType::ExclusiveChoice);
+ xor.children.push(Node::new_leaf(Some("a".into())));
+ xor.children.push(Node::new_leaf(Some("b".into())));
+ xor
+ };
+
+ let mut outer = Operator::new(OperatorType::Sequence);
+ outer.children.push(Node::Operator(make_inner()));
+ outer.children.push(Node::new_leaf(Some("c".into())));
+
+ let pt = ProcessTree::new(Node::Operator(outer)).fold();
+
+ let mut expected_outer = Operator::new(OperatorType::Sequence);
+ expected_outer.children.push(Node::Operator(make_inner()));
+ expected_outer.children.push(Node::new_leaf(Some("c".into())));
+ assert_eq!(pt.root, Node::Operator(expected_outer));
+ }
+
+ #[test]
+ fn fold_does_not_merge_loop() {
+ // LOOP(LOOP(a, tau), tau) — Loop is not associative, must stay unchanged.
+ let make_inner = || {
+ let mut lp = Operator::new(OperatorType::Loop);
+ lp.children.push(Node::new_leaf(Some("a".into())));
+ lp.children.push(Node::new_leaf(None));
+ lp
+ };
+
+ let mut outer = Operator::new(OperatorType::Loop);
+ outer.children.push(Node::Operator(make_inner()));
+ outer.children.push(Node::new_leaf(None));
+
+ let pt = ProcessTree::new(Node::Operator(outer)).fold();
+
+ let mut expected = Operator::new(OperatorType::Loop);
+ expected.children.push(Node::Operator(make_inner()));
+ expected.children.push(Node::new_leaf(None));
+ assert_eq!(pt.root, Node::Operator(expected));
+ }
+
+ #[test]
+ fn fold_mixed_tree() {
+ // SEQ( SEQ(a, b), LOOP(c, tau), SEQ(d, e) )
+ // The two SEQ children get merged; the LOOP stays in place.
+ // Result: SEQ(a, b, LOOP(c, tau), d, e)
+ let make_loop = || {
+ let mut lp = Operator::new(OperatorType::Loop);
+ lp.children.push(Node::new_leaf(Some("c".into())));
+ lp.children.push(Node::new_leaf(None));
+ lp
+ };
+
+ let mut seq1 = Operator::new(OperatorType::Sequence);
+ seq1.children.push(Node::new_leaf(Some("a".into())));
+ seq1.children.push(Node::new_leaf(Some("b".into())));
+
+ let mut seq2 = Operator::new(OperatorType::Sequence);
+ seq2.children.push(Node::new_leaf(Some("d".into())));
+ seq2.children.push(Node::new_leaf(Some("e".into())));
+
+ let mut root = Operator::new(OperatorType::Sequence);
+ root.children.push(Node::Operator(seq1));
+ root.children.push(Node::Operator(make_loop()));
+ root.children.push(Node::Operator(seq2));
+
+ let pt = ProcessTree::new(Node::Operator(root)).fold();
+
+ let mut expected = Operator::new(OperatorType::Sequence);
+ expected.children.push(Node::new_leaf(Some("a".into())));
+ expected.children.push(Node::new_leaf(Some("b".into())));
+ expected.children.push(Node::Operator(make_loop()));
+ expected.children.push(Node::new_leaf(Some("d".into())));
+ expected.children.push(Node::new_leaf(Some("e".into())));
+ assert_eq!(pt.root, Node::Operator(expected));
+ }
+
#[test]
fn is_valid_test() {
// SEQ() is not valid
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs
new file mode 100644
index 0000000..fe61edf
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/base_cases/mod.rs
@@ -0,0 +1,51 @@
+//! This module contains utilities for detecting the base cases 'Empty' and 'Single Activity' used in the Inductive Miner.
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::EventLog;
+
+
+/// Enum Representing whether and if so which type of base case was found.
+#[derive(Debug)]
+pub enum BaseCases {
+ None, // No base case is found
+ Empty, // the event log is completely empty
+ SingleActivity(String) // just one activity in every single trace in the event log
+}
+
+/// Checks whether the base case single activity applies to the given event log.
+/// The BaseCase applies if the event log only contains traces with precisely one event,
+/// which must have the same activity attribute.
+fn check_single_activity_case(log: &EventLog, classifier: &EventLogClassifier) -> Option {
+ let mut activity: Option = None;
+ for t in &log.traces{
+ if t.events.len() != 1{ // catch empty traces
+ return None;
+ }
+ let act = classifier.get_class_identity(&t.events[0]);
+ if let Some(activity) = &activity{
+ if act != *activity{
+ return None;
+ }
+ } else {
+ activity = Some(act);
+ }
+ }
+ activity
+}
+
+/// Checks whether a BaseCase applies to a given event log.
+///
+/// There are two possible base cases:
+/// - 'empty trace' where the entire event log consists of one single empty trace,
+/// - 'single activity' where the entire event log consist of traces containing only one single event with the same activity attribute.
+pub fn find_base_case(log: &EventLog, event_log_classifier: &EventLogClassifier) -> BaseCases {
+
+ if log.traces.len() == 0{
+ // this just checks for an empty event log, this means, even if there are only empty traces, this case case does not apply
+ BaseCases::Empty
+ } else if let Some(activity) = check_single_activity_case(log, event_log_classifier){
+ BaseCases::SingleActivity(activity)
+ } else {
+ // no base case applied to this one
+ BaseCases::None
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs
new file mode 100644
index 0000000..db96c18
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/concurrent.rs
@@ -0,0 +1,244 @@
+//! Utility for detecting a concurrency cut in a given Directly Follows Graph.
+
+use std::borrow::Cow;
+use std::collections::HashSet;
+use crate::core::process_models::dfg::DirectlyFollowsGraph;
+use crate::core::process_models::process_tree::OperatorType;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance;
+
+///Partitions activities into components, such that activities in different components can occur
+/// concurrently. Two activities are in the same component if they are not bidirectionally reachable.
+///
+/// Optionally, a minimum self distance constraint can further restrict concurrency, by
+/// forcing activities, which are in a minimum self distance relation with other activities,
+/// into the same component.
+///
+/// # Parameters
+fn concurrent_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: &Option) -> Option>>> {
+ todo!()
+}
+
+
+
+/// Examines whether in a given Directly Follows Graph a concurrent cut can be applied.
+///
+/// Public wrapper for [`concurrent_cut`]
+///
+/// # Parameters
+/// - 'dfg': the directly follows Graph which shall be examined
+/// - 'mindist': Optional a minimum self distance constraint can be applied, by providing a Minimum self distance struct.
+/// # Returns
+/// - a cut struct containing at least 2 components of concurrent activities
+/// - None, otherwise (this means a concurrent cut can not be applied)
+pub fn concurrent_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, mindist: Option) -> Option> {
+ // if there are not start or end activities, there is no cut
+ if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() {
+ return None;
+ }
+
+ let result = concurrent_cut(dfg, &mindist);
+ if let Some(result) = result {
+ if result.len() <= 1 {
+ None
+ } else {
+ Some(Cut::new(OperatorType::Concurrency, result))
+ }
+ } else {
+ None
+ }
+}
+
+#[cfg(test)]
+mod test_parallel_cut {
+ use std::borrow::Cow;
+ use std::collections::{HashMap, HashSet};
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::{concurrent_cut, concurrent_cut_wrapper};
+ use crate::event_log;
+
+ #[test]
+ fn test_leeman_example() {
+ let log = event_log!(
+ ["a", "b", "c"],
+ ["a", "c", "b"],
+ ["c", "a", "b"]
+ );
+ let dfg = &DirectlyFollowsGraph::discover(&log);
+ let cut = concurrent_cut_wrapper(&dfg, None);
+ assert!(cut.is_some());
+ let mut partitions = cut.unwrap().get_own();
+ // sort to ensure order
+ partitions.sort_by(|x,y| x.len().cmp(&y.len()));
+ assert_eq!(
+ partitions,
+ Vec::from([
+ HashSet::from(["c".into()]),
+ HashSet::from(["a".into(), "b".into()])
+ ])
+ );
+ }
+
+ #[test]
+ fn test_parallel_cut_with_trailing_activity() {
+ let dfg = DirectlyFollowsGraph::discover(
+ &event_log!(["a", "b", "c"], ["b", "a", "c"])
+ );
+ let _cut = concurrent_cut(&dfg, &None);
+ }
+
+ #[test]
+ fn test_easy_parallel_cut_wrapper() {
+ let dfg = DirectlyFollowsGraph::discover(
+ &event_log!(["a", "b"], ["b", "a"])
+ );
+ let cut = concurrent_cut_wrapper(&dfg, None);
+ assert!(cut.is_some());
+ assert_eq!(cut.unwrap().len(), 2);
+ }
+
+ #[test]
+ fn test_three_branch_parallel() {
+ let dfg = DirectlyFollowsGraph::discover(
+ &event_log!(
+ ["a", "b"],
+ ["b", "c"],
+ ["c", "a"],
+ ["a", "c"],
+ ["b", "a"],
+ ["c", "b"]
+ )
+ );
+
+ let cut = concurrent_cut_wrapper(&dfg, None);
+ assert!(cut.is_some());
+
+ let parts = cut.unwrap();
+ assert_eq!(parts.len(), 3);
+
+ let flattened: HashSet> = parts.partitions
+ .iter()
+ .flat_map(|p| p.iter().map(|s| s.clone()))
+ .collect();
+
+ assert!(flattened.contains("a"));
+ assert!(flattened.contains("b"));
+ assert!(flattened.contains("c"));
+ }
+
+ #[test]
+ fn test_sequence_cut_in_parallel() {
+ let dfg = DirectlyFollowsGraph::discover(
+ &event_log!(["a", "b", "c"], ["a", "d", "c"])
+ );
+ assert!(concurrent_cut_wrapper(&dfg, None).is_none());
+ }
+
+ #[test]
+ fn test_hard_parallel_cut_multiple_starts_and_endings() {
+ let mut dfg = DirectlyFollowsGraph::new();
+ dfg.activities = HashMap::from([("a".into(), 1), ("b".into(), 2), ("c".into(), 3)]);
+
+ dfg.start_activities = HashSet::from(["a".into()]);
+ dfg.end_activities = HashSet::from(["c".into(), "b".into()]);
+ dfg.directly_follows_relations = HashMap::from([
+ (("a".into(), "b".into()), 1),
+ (("b".into(), "a".into()), 1),
+ // a <-> c
+ (("a".into(), "c".into()), 1),
+ (("c".into(), "a".into()), 1),
+ // c <-> b
+ (("b".into(), "c".into()), 1),
+ (("c".into(), "b".into()), 1),
+ ]);
+
+ assert!(concurrent_cut_wrapper(&dfg, None).is_none());
+
+ // // set multiple starts
+ dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string()]);
+ dfg.end_activities = HashSet::from(["c".to_string()]);
+ assert!(concurrent_cut_wrapper(&dfg, None).is_none());
+
+ // overlap
+ dfg.end_activities = HashSet::from(["c".to_string(), "b".to_string()]);
+ assert!(concurrent_cut_wrapper(&dfg, None).is_some());
+
+ // everything is end and start activity
+ dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string(), "c".to_string()]);
+ dfg.end_activities = HashSet::from(["a".to_string(), "b".to_string(), "c".to_string()]);
+ assert!(concurrent_cut_wrapper(&dfg, None).is_some());
+
+ // no ending or start at b -> AND cut
+ dfg.start_activities = HashSet::from(["a".to_string(), "c".to_string()]);
+ dfg.end_activities = HashSet::from(["a".to_string(), "c".to_string()]);
+ let cut = concurrent_cut_wrapper(&dfg, None);
+ assert!(cut.is_some());
+ assert_eq!(cut.unwrap().len(), 2);
+ }
+
+ #[test]
+ fn test_perfect_parallel_three_branches() {
+ // all permutations of a, b, c to allow full bidirectional behavior
+ let log = event_log!(
+ ["a", "b", "c"],
+ ["a", "c", "b"],
+ ["b", "a", "c"],
+ ["b", "c", "a"],
+ ["c", "a", "b"],
+ ["c", "b", "a"]
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = concurrent_cut_wrapper(&dfg, None);
+ assert!(cut.is_some());
+ assert_eq!(cut.unwrap().get_own().len(), 3);
+ }
+
+ #[test]
+ fn test_sequence_cut() {
+ let log = event_log!(
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ ["a", "b", "c"]
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ assert!(concurrent_cut_wrapper(&dfg, None).is_none());
+ }
+
+ #[test]
+ fn test_xor_cut() {
+ let log = event_log!(["a", "b"], ["c", "d"], ["a", "b"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ // XOR-Components would be {a, b} and {c, d}
+ // Parallel Cut has to be None as there are no edges between {a,b} and {c,d}
+ assert!(concurrent_cut_wrapper(&dfg, None).is_none());
+ }
+
+ #[test]
+ fn test_noisy_parallel_fails_without_filter() {
+ let log = event_log!(
+ // ("a", "b", "c"), // b-> c missing
+ ["b", "a", "c"],
+ ["a", "c", "b"],
+ //("b", "c", "a"), // c-> a missing
+ ["c", "b", "a"] //o_trace!("c", "a", "b") is missing -> no edge c -> a
+ );
+ // there are two edges missing c->a and b-> c, therefore there is no bidirectional relation in any case
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ assert!(concurrent_cut_wrapper(&dfg, None).is_none());
+ }
+
+ #[test]
+ fn test_loop_cut() {
+ let log = event_log!(
+ ["a"], // Start
+ ["a", "b", "a"], // Loop
+ ["a", "b", "a", "b", "a"] // Loop
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ assert!(concurrent_cut_wrapper(&dfg, None).is_none());
+ }
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs
new file mode 100644
index 0000000..2ee50da
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/cut.rs
@@ -0,0 +1,47 @@
+//! This module contains a struct used for representing a found cut by specifying the found partitions and cut type.
+use std::borrow::Cow;
+use std::collections::HashSet;
+use crate::core::process_models::process_tree::OperatorType;
+
+/// Represents a cut typically found by the inductive miner in a directly follows graph.
+/// A 'Cut' partitions activities of a graph or log into disjoints sets, according to a
+/// specific cut operator (e.g. sequence, xor etc.)
+#[derive(Debug, PartialEq)]
+pub struct Cut<'a>{
+ pub operator: OperatorType, // define what operator this cut is about
+ pub partitions: Vec>>,
+}
+
+impl<'a> Cut<'a>{
+
+ /// Creates a new cut with the given Operator and partitions.
+ ///
+ /// The caller must ensure that partitions form a valid cut according to the chosen operator.
+ pub fn new(operator: OperatorType, partitions: Vec>>) -> Cut<'a>{
+ Self{operator, partitions}
+ }
+
+
+ /// Returns the number of partitions in this cut.
+ pub fn len(&self) -> usize {
+ self.partitions.len()
+ }
+
+
+ /// Consumes the cut and returns the partitions of this cut.
+ pub fn get_own(self) -> Vec>> {
+ self.partitions
+ }
+
+ /// Returns the operator associated with this cut
+ pub fn get_operator(&self) -> OperatorType {
+ self.operator // possible due to copy trait
+ }
+
+
+ /// Returns true if this cut contains no partitions.
+ pub fn is_empty(&self) -> bool{
+ self.partitions.is_empty()
+ }
+
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs
new file mode 100644
index 0000000..799d44f
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/exclusive_choice.rs
@@ -0,0 +1,213 @@
+//! Utility for detecting an exclusive choice cut in a given Directly Follows Graph
+
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use std::borrow::Cow;
+use std::collections::{HashMap, HashSet, VecDeque};
+use crate::core::process_models::dfg::DirectlyFollowsGraph;
+use crate::core::process_models::process_tree::OperatorType;
+
+/// Calculates all connected components of a Graph.
+/// For this it starts from every unvisited activity a Breadth First Search over the Graph.
+///
+/// # Returns
+/// A vector containing all connected components
+fn calc_connected_components<'a>(
+ activities: &'a HashMap,
+ adjacent: HashMap, HashSet>>,
+) -> Vec>> {
+ // visited nodes
+ let mut visited: HashSet> = HashSet::new();
+ // components (if cut)
+ let mut components: Vec>> = Vec::new();
+
+ // iterate over every activity
+ for node in activities.keys() {
+ let node = Cow::from(node);
+ if !visited.contains(&node) {
+ // search in components
+
+ // components of the components xd
+ let mut comp = HashSet::new();
+ let mut queue = VecDeque::new();
+
+ // mark node as already visited
+ visited.insert(node.clone());
+ // Push starting node
+ queue.push_back(node);
+ // Explore connected component by looking at every edge of this activity
+ while let Some(current) = queue.pop_front() {
+ // the starting node is ofc the first node of this nodes component
+ comp.insert(current.clone());
+
+ // insert every other node which is reachable and has not already been visited
+ if let Some(neighbors) = adjacent.get(¤t) {
+ for neighbor in neighbors {
+ if !visited.contains(neighbor) {
+ visited.insert(neighbor.clone());
+ queue.push_back(neighbor.clone());
+ }
+ }
+ }
+ }
+ components.push(comp);
+ }
+ }
+
+ components
+}
+
+/// Calculates an undirected adjacency matrix of a given Directly Follows Graph.
+/// The matrix is calculated based on direct reachability and does not include
+/// transitive reachability.
+///
+/// # Returns
+/// A hashset mapping each activity to it's neighboring activities, i.e. to activities occurring in an edge with this one
+///
+/// Note: Only activities occurring at least once inside an edge are taken into account.
+pub fn calculate_undirected_adjacency_matrix<'a>(
+ dfg: &DirectlyFollowsGraph<'a>,
+) -> HashMap, HashSet>> {
+ let mut adjacent = HashMap::new();
+
+ for ((a1, a2), _) in &dfg.directly_follows_relations {
+ // insert both directions
+ adjacent
+ .entry(a1.clone())
+ .or_insert(HashSet::new())
+ .insert(a2.clone());
+ adjacent
+ .entry(a2.clone())
+ .or_insert(HashSet::new())
+ .insert(a1.clone());
+ }
+ adjacent
+}
+
+
+/// Attempts to find an exclusive choice cut in the given Directly Follows Graph, by calculating the connected components of the Graph.
+///
+/// Public wrapper for [`calc_connected_components`]
+///
+/// # Returns
+/// Some(cut) containing the partitions/ connected components found, otherwise None.
+#[allow(dead_code)]
+pub fn exclusive_choice_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Option> {
+ // no start or end activity results in no cut
+ if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() {
+ return None;
+ }
+
+ let components =
+ calc_connected_components(&dfg.activities, calculate_undirected_adjacency_matrix(dfg));
+
+ // XOR cut only if > 1 disjoint component
+ if components.len() > 1 {
+ Some(Cut::new(OperatorType::ExclusiveChoice, components))
+ } else {
+ None
+ }
+}
+
+
+#[cfg(test)]
+mod tests {
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper;
+ use crate::event_log;
+
+
+ #[test]
+ fn test_exclusive_choice_cut_2() {
+ let log = event_log!(["a", "b"], ["e"]);
+ let dfg: DirectlyFollowsGraph<'_> = DirectlyFollowsGraph::discover(&log);
+ let result = exclusive_choice_cut_wrapper(&dfg);
+ assert!(result.is_some());
+ assert_eq!(result.unwrap().len(), 2);
+ }
+
+ // Case 1: Clear XOR between b and c
+ // Traces: start -> b -> d OR start -> c -> d
+ #[test]
+ fn xor_cut_simple_two_branches() {
+ let log = event_log!(["b", "d"], ["c", "e"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ let cut = exclusive_choice_cut_wrapper(&dfg).unwrap();
+
+ // Expect two components: {"b","d"} and {"e","c"}
+ //
+ assert_eq!(cut.len(), 2);
+ assert!(cut.partitions.iter().any(|comp| comp.contains("b")));
+ assert!(cut.partitions.iter().any(|comp| comp.contains("c")));
+ }
+
+ // Case 2: XOR with 3 different branches
+ // Traces: start -> b -> e, start -> c -> f, start -> d -> g
+ #[test]
+ fn xor_cut_three_way_branch() {
+ let log = event_log!(["b", "e"], ["c", "f"], ["d", "g"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ let cut = exclusive_choice_cut_wrapper(&dfg).unwrap();
+
+ // Expect three components: one with b, one with c, one with d
+ assert_eq!(cut.len(), 3);
+ assert!(cut.partitions.iter().any(|comp| comp.contains("b")));
+ assert!(cut.partitions.iter().any(|comp| comp.contains("c")));
+ assert!(cut.partitions.iter().any(|comp| comp.contains("d")));
+ }
+
+ // Case 3: No XOR (sequence only)
+ // Traces: a -> b -> c (repeated)
+ #[test]
+ fn no_xor_cut_sequence() {
+ let log = event_log!(
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ ["a", "b", "c"]
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ let cut = exclusive_choice_cut_wrapper(&dfg);
+
+ // Should be None because it’s just a sequence
+ assert!(cut.is_none());
+ }
+
+ // Case 4: Single-event traces -> XOR between start activities
+ // Traces: ["a"], ["e"], ["f"]
+ #[test]
+ fn xor_cut_multiple_single_events() {
+ let log = event_log!(["a"], ["e"], ["f"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ let cut = exclusive_choice_cut_wrapper(&dfg).unwrap();
+
+ // Expect 3 disjoint components
+ assert_eq!(cut.len(), 3);
+ assert!(cut.partitions.iter().any(|comp| comp.contains("a")));
+ assert!(cut.partitions.iter().any(|comp| comp.contains("e")));
+ assert!(cut.partitions.iter().any(|comp| comp.contains("f")));
+ }
+
+ #[test]
+ fn greater_test() {
+ let log = event_log!(["a", "b", "c"], ["e", "f"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = exclusive_choice_cut_wrapper(&dfg);
+
+ assert!(cut.is_some());
+ }
+
+ #[test]
+ fn test_parallel_log_no_cut() {
+ let log = event_log!(["a", "b"], ["b", "a"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = exclusive_choice_cut_wrapper(&dfg);
+
+ // This is a parallel cut, not an exclusive choice cut
+ assert!(cut.is_none());
+ }
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs
new file mode 100644
index 0000000..33d4631
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/loop_cut.rs
@@ -0,0 +1,210 @@
+//! Utility for detecting a loop cut in a Directly Follows Graph.
+
+use std::borrow::Cow;
+use std::collections::HashSet;
+use crate::core::process_models::dfg::DirectlyFollowsGraph;
+use crate::core::process_models::process_tree::OperatorType;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+
+
+/// Attempts to find a loop cut in a given Directly Follows Graph (DFG).
+///
+/// The algorithm groups activities into connected components by using a union-find like structure.
+///
+/// 1. Selects a pivot activity from the sets of start activities.
+/// 2. Merges all start and end activities with component of pivot.
+/// 3. Merges internal activities (no start nor end activity) based on the edges in the DFG, excluding
+/// edges that would violate redo-loop semantic.
+/// 4. Merges components based on certain rules about their connectivity
+///
+///
+/// The resulting vector represents the activity partitions of the
+/// candidate redo-loop cut. The first partition corresponds to the
+/// component containing the pivot (the "do" part),
+/// and the remaining partitions correspond to the "redo" part(s).
+///
+/// # Panic
+/// Panics if the dfg contains no start activity
+fn redo_loop_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec>> {
+ todo!()
+}
+
+/// Attempts to find a Loop cut in a given DFG.
+///
+/// Public wrapper for [`redo_loop_cut`]
+///
+/// #Returns
+/// Some(cut) if a loop cut has successfully been discovered, None otherwise
+pub fn redo_loop_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Option>{
+
+ // only possible if there are start and end activities
+ if dfg.start_activities.is_empty() || dfg.end_activities.is_empty() {
+ return None;
+ }
+
+ // calculate do-redo loop components
+ let components = redo_loop_cut(dfg);
+
+ // a cut is found if there is more than one component
+ if components.len() > 1{
+ Some(Cut::new(OperatorType::Loop, components))
+ } else {
+ None
+ }
+
+
+}
+
+#[cfg(test)]
+mod test_redo_loop_cut{
+ use std::collections::HashMap;
+ use crate::event_log;
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::core::process_models::process_tree::OperatorType;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+ use super::*;
+
+ #[test]
+ fn test_redo_on_single_activity(){
+ let log = event_log!(
+ ["a", "c"],
+ ["a", "c", "b", "a", "c"]
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = redo_loop_cut(&dfg);
+
+ assert_eq!(cut.len(), 2);
+ assert!(cut[0].contains("a") && cut[0].contains("c"));
+ assert!(cut[1].contains("b"));
+ }
+
+ #[test]
+ fn test_no_loop() {
+ let log = event_log!(
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ let cut = redo_loop_cut(&dfg);
+
+ assert_eq!(cut.len(), 1);
+ assert!(cut[0].contains("a") && cut[0].contains("b") && cut[0].contains("c"));
+ }
+
+ #[test]
+ fn test_multi_activity_redo() {
+ let log = event_log!(
+ ["a", "c"],
+ ["a", "c", "b", "d", "a", "c"],
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ let cut = redo_loop_cut(&dfg);
+
+ assert_eq!(cut.len(), 2);
+
+ let do_group = &cut[0];
+ let redo_group = &cut[1];
+
+ assert!(do_group.contains("a") && do_group.contains("c"));
+ assert!(redo_group.contains("b") && redo_group.contains("d"));
+ }
+
+ #[test]
+ fn test_nested_loops_only_outer_cut() {
+ let log = event_log!(
+ ["s", "a", "c", "e"],
+ ["s", "a", "c", "b", "a", "c", "e"], // inner loop
+ ["s", "a", "c", "e", "g", "s", "a", "c", "e"],
+ ["s", "a", "c", "b", "a", "c", "b", "a", "c", "e"],
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+
+ let cut = redo_loop_cut(&dfg);
+
+ assert_eq!(cut.len(), 2);
+
+ assert!(cut[1].contains("g"));
+ assert!(cut[0].contains("a") && cut[0].contains("c"));
+ }
+
+
+ #[test]
+ fn test_complex_test(){
+ let mut dfg = DirectlyFollowsGraph::new();
+ dfg.activities = HashMap::from([("a".to_string(), 1), ("b".to_string(), 1),("c".to_string(), 1)]);
+ dfg.directly_follows_relations =
+ HashMap::from([
+ (("a".into(),"b".into()),1),
+ (("b".into(),"a".into()),1),
+ (("b".into(),"c".into()),1),
+ (("c".into(),"b".into()),1),
+ (("c".into(),"a".into()),1),
+ (("a".into(),"c".into()),1),
+ ]
+ );
+ dfg.start_activities = HashSet::from(["a".to_string(), "b".to_string()]);
+ dfg.end_activities = HashSet::from(["c".to_string()]);
+ }
+
+
+
+ #[test]
+ fn test_double_loop(){
+ let log = event_log!(
+ ["a", "b"],
+ ["a", "b", "c", "a", "b"],
+ ["a", "b", "d", "a", "b"],
+ ["a", "b", "d", "a", "b", "a", "b", "c", "a", "b"],
+ ["a", "b", "c", "a", "b", "a", "b", "d", "a", "b"]
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = redo_loop_cut_wrapper(&dfg);
+ assert!(cut.is_some());
+ let cut = cut.unwrap();
+ assert_eq!(cut.len(), 3);
+ assert_eq!(cut.operator, OperatorType::Loop);
+ assert!(
+ cut.partitions == vec![HashSet::from(["a".into(), "b".into()]),
+ HashSet::from(["c".into()]),
+ HashSet::from(["d".into()])]
+ ||
+ cut.partitions == vec![HashSet::from(["a".into(), "b".into()]),
+ HashSet::from(["d".into()]),
+ HashSet::from(["c".into()])]
+
+ );
+
+ }
+
+ #[test]
+ fn test_loop_over_parallel(){
+ let log = event_log!(
+ ["a", "b"],
+ ["a", "b", "c", "a", "b"],
+ ["a", "d", "b"],
+ ["a", "d", "b", "c", "a", "d", "b" ],
+ ["a", "d", "b", "c", "a", "b" ]
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+
+ let cut = redo_loop_cut_wrapper(&dfg);
+
+ assert!(cut.is_some());
+ let expectations = Cut::new(OperatorType::Loop,
+ vec![
+ HashSet::from(
+ ["a".into(), "b".into(), "d".into()]),
+ HashSet::from(["c".into()])]
+ );
+
+ assert_eq!(cut.unwrap(), expectations);
+ }
+
+
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs
new file mode 100644
index 0000000..917ac72
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/mod.rs
@@ -0,0 +1,80 @@
+//! This module contains algorithms for detecting a cut in a given Directly Follows Graph.
+
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::dfg::DirectlyFollowsGraph;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::concurrent_cut_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::loop_cut::redo_loop_cut_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance;
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters};
+use crate::EventLog;
+
+pub mod exclusive_choice;
+pub mod cut;
+pub mod sequence_cut;
+pub mod concurrent;
+pub mod loop_cut;
+
+
+
+
+/// Attempts to find a valid cut in the given DirectlyFollowsGraph, by evaluating possible cut types
+/// in the following strict order:
+/// 1. exclusive choice cut [`exclusive_choice_cut_wrapper`]
+/// 2. Sequence cut [`sequence_cut_wrapper`]
+/// 3. Concurrent / AND cut [`concurrent_cut_wrapper`]
+/// 4. Loop cut [`redo_loop_cut_wrapper`]
+///
+/// # Returns
+/// - Some([`Cut`]) containing the first detected cut according to the strict order.
+/// - None otherwise
+pub fn find_cut<'a>(dfg: &'a DirectlyFollowsGraph<'_>, log: &EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters) -> Option>{
+ // if any cut is found in the presented order, return the first one
+ if let Some(cut) = exclusive_choice_cut_wrapper(dfg){
+ Some(cut)
+ } else if let Some(cut) = sequence_cut_wrapper(dfg, parameters){
+ Some(cut)
+ } else {
+ // check whether minimum self distance shall be used
+ let mindist = if parameters.contains(&Parameter::MinimumSelfDistance) {
+ Some(MinimumSelfDistance::new(log, event_log_classifier))
+ } else { None };
+
+ if let Some(cut) = concurrent_cut_wrapper(dfg, mindist) {
+ Some(cut)
+ } else if let Some(cut) = redo_loop_cut_wrapper(dfg) {
+ Some(cut)
+ } else {
+ None // if no cut is found return none
+ }
+ }
+}
+
+#[cfg(test)]
+mod test_cut_finder{
+ use std::collections::HashSet;
+ use crate::{
+ discovery::case_centric::dfg::discover_dfg,
+ core::event_data::case_centric::EventLogClassifier,
+ event_log,
+ discovery::case_centric::inductive_miner_app::cut_finder::find_cut
+ };
+
+ #[test]
+ fn test_log_with_no_cut(){
+ let log = event_log!(
+ ["a", "b", "c", "d"],
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+
+ let dfg = discover_dfg(&log);
+ let cut = find_cut(&dfg, &log, &EventLogClassifier::default(), &HashSet::new());
+ assert!(cut.is_none());
+ }
+
+
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs
new file mode 100644
index 0000000..997f9fb
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/cut_finder/sequence_cut.rs
@@ -0,0 +1,295 @@
+//! Utility for detecting a sequence cut in a Directly Follows Graph.
+
+use std::borrow::Cow;
+use std::collections::{HashMap, HashSet};
+use crate::core::process_models::dfg::{Activity, DirectlyFollowsGraph};
+use crate::core::process_models::process_tree::OperatorType;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters;
+
+/// Calculate transitiv reachability using Floyd Warshall
+fn compute_reachability_matrix(dfg: &DirectlyFollowsGraph<'_>) -> (HashMap, Vec>) {
+ let activities = dfg.activities.iter().map(|(a,_)| a.clone()).collect::>();
+ let n = activities.len();
+ let mut map = HashMap::new();
+
+ // Activity_string -> index
+ for (i, act) in activities.iter().enumerate() {
+ map.insert(act.clone(), i);
+ }
+
+ // initialize matrix
+ let mut matrix = vec![vec![false; n]; n];
+
+ // mark direct edges
+ for ((a, b), _) in &dfg.directly_follows_relations{
+ if let (Some(idx_a), Some(idx_b)) = (map.get(a.as_ref()), map.get(b.as_ref())) {
+ matrix[*idx_a][*idx_b] = true;
+ }
+ }
+
+ // Floyd Warshall
+ for k in 0..n {
+ for i in 0..n{
+ for j in 0..n{
+ // only update if cell isn't already true
+ matrix[i][j] = matrix[i][j] || (matrix[i][k] && matrix[k][j]);
+ }
+ }
+ }
+
+ (map, matrix)
+}
+
+/// Helper function which calculates whether a set of activities a can reach another set of activities b.
+///
+/// # Returns
+/// - 'true' if at least one activity in a can transitively reach any activity in b
+fn reaches_any_transitive(a: &HashSet>, b: &HashSet>,
+ idx_map: &HashMap,
+ matrix: &Vec>
+) -> bool {
+ for act_a in a {
+ for act_b in b {
+ if let (Some(&idx_a), Some(&idx_b)) = (idx_map.get(act_a.as_ref()), idx_map.get(act_b.as_ref())) {
+ if matrix[idx_a][idx_b] {
+ return true;
+ }
+ }
+ }
+ }
+ false
+}
+
+
+
+/// Calculates Activity Sequences in a given Directly Follows Graph.
+/// Two activities are in sequence if they are neither mutually reachable nor mutually unreachable.
+///
+/// # Returns
+/// A vector of activity partitions representing a candidate sequence cut.
+/// Each hashset contains the activity labels belonging to the same sequence block.
+/// The partitions are ordered s.t. for any 'i < j', activities in partitions\[i] can (transitively)
+/// reach activities in partitions\[j].
+fn calc_sequences<'a>(dfg: &'a DirectlyFollowsGraph<'_>) -> Vec>>{
+ let (idx_map, matrix) = compute_reachability_matrix(dfg);
+
+ // Initialize each activity with its own partition
+ let mut partitions : Vec>> = dfg.activities.keys().map(
+ |a| {
+ let mut s = HashSet::new();
+ s.insert(a.into());
+ s
+ }
+ ).collect();
+
+ // break flag
+ let mut changed = true;
+ while changed {
+ changed = false;
+ // iterative over all activities and find bidirectional reachacble components or mutually non reachable components
+ let mut i = 0;
+ while i < partitions.len() {
+ // safe some iterations as the edges are non directional
+ let mut j = i + 1;
+ while j < partitions.len() {
+ // get the current working partitions
+ let p_a = &partitions[i];
+ let p_b = &partitions[j];
+
+ // Check connectivity between groups - true if at least one activity in p_a reaches at least one other activity in p_b
+ let a_reaches_b = reaches_any_transitive(p_a, p_b, &idx_map, &matrix);
+ let b_reaches_a = reaches_any_transitive(p_b, p_a, &idx_map, &matrix);
+
+ // Merge if:
+ // 1. Mutually reachable (Loop)
+ // 2. Mutually unreachable (Exclusive Choice / Parallelism)
+ if (a_reaches_b && b_reaches_a) || (!a_reaches_b && !b_reaches_a) {
+ // Merge the whole partition j into partition i
+ let part_j = partitions.remove(j);
+ partitions[i].extend(part_j);
+ // as we changed this partition, we need to iterate over all partitions again, bc maybe the merged partitions are reachable
+ changed = true;
+ // Don't increment j, as the vector shrunk
+ } else {
+ // process with next partition
+ j += 1;
+ }
+ }
+ i += 1;
+ }
+ }
+
+ // 2. Sort partitions to form the candidate sequence
+ partitions.sort_by(|p1, p2| {
+ let p1_to_p2 = reaches_any_transitive(p1, p2, &idx_map, &matrix);
+ let p2_to_p1 = reaches_any_transitive(p2, p1, &idx_map, &matrix);
+ // p1 reaches more than p2
+ if p1_to_p2 && !p2_to_p1 { // p1 -> p2 but not p2 -> p1
+ std::cmp::Ordering::Less
+ } else if !p1_to_p2 && p2_to_p1 { // p2 -> p1 but not p1 -> p2
+ std::cmp::Ordering::Greater
+ } else { // mutually reachable or not reachable - should not happen at all
+ panic!("Partitions are in sequence cut are nevertheless mutually reachable or not reachable");
+ }
+ });
+
+ partitions
+}
+
+/// Public wrapper for [`calc_sequences`].
+///
+/// This function simply forwards its arguments to
+/// `calc_sequences` and returns Some(cut) if a cut is found, otherwise None.
+///
+/// If a [`strict_sequence_cut`] should be applied, this has to be set in a [`Parameter`]
+pub fn sequence_cut_wrapper<'a>(dfg: &'a DirectlyFollowsGraph<'_>, _parameters: &Parameters) -> Option>{
+ // calculate sequence blocks
+ let sequences = calc_sequences(dfg);
+
+ // early return
+ if sequences.len() <= 1{
+ return None;
+ }
+
+ // if there is more than one sequence block, a cut is found successfully
+ Some(Cut::new(OperatorType::Sequence, sequences))
+}
+
+#[cfg(test)]
+mod test_sequence_cut{
+ use std::borrow::Cow;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::calc_sequences;
+ use std::collections::HashSet;
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::{ sequence_cut_wrapper};
+ use crate::{event_log};
+ #[test]
+ fn test_single_activity(){
+ let dfg = DirectlyFollowsGraph::discover(&event_log!(["a"]));
+ let cut = calc_sequences(&dfg);
+ let expected = vec![HashSet::from([Cow::from("a".to_string())])];
+ assert_eq!(cut, expected);
+ }
+
+ #[test]
+ fn test_exclusive_choice_cut(){
+ let input = event_log!(["a", "b", "c"], ["d"]);
+ let dfg = DirectlyFollowsGraph::discover(&input);
+ let result = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(result.is_some());
+ assert_eq!(result.unwrap().get_own().len(), 3);
+
+ }
+ #[test]
+ fn test_simple_sequence(){
+ let input = event_log!(["a", "b", "c"]);
+ let dfg = DirectlyFollowsGraph::discover(&input);
+ let result = calc_sequences(&dfg);
+ let expected = vec![HashSet::from(["a".into()]) , HashSet::from(["b".into()]), HashSet::from(["c".into()])];
+ assert_eq!(expected, result)
+ }
+
+
+ #[test]
+ fn test_leemans_example(){
+ let input = event_log!(["a", "c", "d"], ["b", "c", "e "]);
+ let dfg = DirectlyFollowsGraph::discover(&input);
+ println!("{:?}", calc_sequences(&dfg));
+ let result = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(result.is_some());
+ let result = result.unwrap();
+ println!("{:?}", result);
+ assert_eq!(result.get_own().len(), 3);
+ }
+
+
+ #[test]
+ fn test_sequence_with_internal_parallelism() {
+ // Log: A -> (B || C) -> D
+ // Traces: A->B->C->D, A->C->B->D
+ let dfg = DirectlyFollowsGraph::discover(&event_log!(
+ ["A", "B", "C", "D"],
+ ["A", "C", "B", "D"]
+ ));
+
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new()).unwrap();
+ let expected: Vec>> = vec![
+ HashSet::from(["A".into()]),
+ HashSet::from(["B".into(), "C".into()]),
+ HashSet::from(["D".into()]),
+ ];
+
+ assert_eq!(cut.get_own(), expected);
+ }
+
+ #[test]
+ fn test_parallel_branches_no_sequence_cut() {
+ // Log: A -> B and A -> C in parallel
+ let dfg = DirectlyFollowsGraph::discover(&event_log!(
+ ["B", "C"],
+ ["C", "B"]
+ ));
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(cut.is_none());
+ }
+
+ #[test]
+ fn test_xor_branch_sequence_cut() {
+ // Log: A -> B -> D OR A -> C -> D
+ let dfg = DirectlyFollowsGraph::discover(&event_log!(
+ ["A", "B", "D"],
+ ["A", "C", "D"],
+ ));
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(cut.is_some());
+ let cut = cut.unwrap();
+ let expected: Vec>> = vec![HashSet::from(["A".into()]) , HashSet::from(["B".into(), "C".into()]), HashSet::from(["D".into()])];
+ assert_eq!(cut.get_own(), expected);
+
+ }
+
+
+ #[test]
+ fn test_with_loop(){
+ let dfg = DirectlyFollowsGraph::discover(&event_log!(
+ ["B", "C"],
+ ["C", "B"],
+ ["B", "C", "E", "F", "B", "C"],
+ ["C", "B", "E", "F", "B", "C"],
+ ["B", "C", "E", "F", "C", "B"],
+ ["C", "B", "E", "F", "B", "C", "E", "F", "C", "B"],
+ ));
+ assert!(sequence_cut_wrapper(&dfg, &HashSet::new()).is_none());
+ }
+
+
+ #[test]
+ fn test_triangle_cut() {
+ let dfg = DirectlyFollowsGraph::discover(&event_log!(
+ ["A", "C"],
+ ["B", "C", "D"],
+ ["B", "D"]
+ ));
+
+
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new());
+
+ if let Some(c) = cut {
+ assert_eq!(c.get_own() , Vec::from([HashSet::from(["A".into(), "B".into()]) , HashSet::from(["C".into()]), HashSet::from(["D".into()])]));
+ }
+ }
+
+
+ #[test]
+ fn test_strict_sequence_cut_wrapper(){
+ let log = event_log!(
+ ["a", "b", "c"],
+ ["a", "c"],
+ );
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new()).unwrap();
+ println!("{:?}", cut);
+ }
+
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs
new file mode 100644
index 0000000..aa91005
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_concurrent.rs
@@ -0,0 +1,213 @@
+//! Activity Concurrent fallthrough detection utilities.
+//!
+//! This module implements the **activity concurrent** fallthrough used by the inductive miner.
+//!
+//! The activity concurrent fallthrough assumes concurrent behavior when a single activity in the event log
+//! can occur independently of the ordering of the other activities. In such a case, the activity is
+//! considered to run in parallel with the remaining behavior of the log.
+//!
+//! When this pattern is detected, the activity is separated from the log and modeled as executing
+//! concurrently with the rest of the process.
+
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::{Node, OperatorType};
+use crate::discovery::case_centric::dfg::discover_dfg_with_classifier;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::find_cut;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityConcurrent, Return};
+use crate::discovery::case_centric::inductive_miner_app::splits::perform_split;
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters;
+use crate::EventLog;
+
+/// Filters an event log by removing all events whose activity matches a pivot.
+///
+/// The function splits the input log into two logs:
+/// - one log containing the original traces **without** the pivot activity
+/// - one log containing traces consisting only of the filtered-out pivot events
+///
+/// The number of traces is preserved in both logs.
+///
+/// # Returns
+/// A tuple `(filtered_out_log, filtered_log)` where:
+///
+/// - `filtered_out_log` contains only the removed pivot events (possibly empty traces).
+/// - `filtered_log` contains the original behavior without the pivot events.
+fn filter_out_activity(
+ log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+ pivot: String,
+) -> (EventLog, EventLog) {
+ let mut filtered_log = log.clone_without_traces(); // the logs containing the filtered activities
+ let mut filtered_out_log = log.clone_without_traces(); // the log containing left behavior
+
+ for trace in log.traces {
+ // get the trace length
+ let len_t = trace.events.len();
+
+ // do the same for the traces again
+ let mut new_trace = trace.clone_without_events();
+ let mut other_new_trace = trace.clone_without_events();
+
+ // need the option for initialization purpose, this option marks whether the element was actually contained in the trace
+ let mut pivot_event = None; // if set the activity was actually contained in this trace
+
+ // check on every event in this trace
+ for event in trace.events {
+ let other = event_log_classifier.get_class_identity(&event);
+ if pivot != other {
+ new_trace.events.push(event);
+ } else if pivot_event.is_none() {
+ // set the pivot event
+ pivot_event = Some(event)
+ }
+ }
+
+ // check whether the event was actually part of the trace
+ if pivot_event.is_some() {
+ // if so push the trace, as it is (excluding the left out events)
+ let event = pivot_event.unwrap();
+ // push the pivot event as often as it has been filtered out (maybe use a counter here)
+ for _ in 0..(len_t - new_trace.events.len()) {
+ other_new_trace.events.push(event.clone());
+ }
+ // push the filtered logs
+ filtered_log.traces.push(new_trace);
+ filtered_out_log.traces.push(other_new_trace);
+ } else {
+ // new trace equals the trace from before, therefore we should not push the empty lg (right?)
+ filtered_log.traces.push(new_trace);
+
+
+ //mind that empty traces are being pushed too
+ filtered_out_log.traces.push(other_new_trace);
+
+ }
+
+ }
+
+ (filtered_out_log, filtered_log)
+}
+
+/// Attempts to detect an *activity concurrent* fall-through pattern.
+///
+/// This fall through iteratively removes one activity at a time
+/// (starting with the most frequent one) and checks whether the remaining logs yield any valid cut.
+/// If removing the activity yields a valid cut, the activity is considered concurrent to the rest of the process.
+///
+/// The split operations is performed on a valid cut as well, for efficiency reasons.
+///
+/// # Returns
+/// - 'ActivityConcurrent(...)' enum if a concurrent activity is detected, containing the constructed concurrency node, the log of removed activity instances and the already performed split.
+/// - 'Return(log)' the original log without changes
+fn activity_concurrent(
+ log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+ parameters: &Parameters) -> Fallthrough {
+ let dfg = discover_dfg_with_classifier(&log, event_log_classifier);
+
+ // get the activities and transform into a vector
+ let mut activities: Vec<(String,u32)> = dfg.activities.clone().into_iter().collect();
+ // sort by cardinality (descending)
+ (&mut activities).sort_by(|a,b| a.1.partial_cmp(&b.1).unwrap()); // safe unwrap as working with u32 here
+
+ // now leave out one activity after another and try to find a cut
+ for (activity, _) in activities.into_iter().rev() {
+ // remove activity from this log
+ let (filtered_out_log, filtered_log) =
+ filter_out_activity(log.clone(), event_log_classifier, activity);
+
+ // build a dfg in order to use already established find_cut method
+ let dfg = discover_dfg_with_classifier(&filtered_log, event_log_classifier);
+ match find_cut(&dfg, &filtered_log, event_log_classifier, parameters) {
+ None => continue, // leave out another activity (if another is left)
+ Some(cut) => {
+ // do the split here
+ let split = perform_split(&filtered_log, event_log_classifier, cut);
+
+ // create a node without children, as this has to be processed in the more high level functions
+ let node = Node::new_operator(OperatorType::Concurrency);
+
+ // return if a cut is found
+ return ActivityConcurrent(node, filtered_out_log, split);
+ }
+ }
+ }
+
+ // default return
+ Return(log)
+}
+
+/// Public wrapper for [`activity_concurrent`].
+///
+/// This function simply forwards its arguments to
+/// `activity_concurrent` and exists for consistency
+/// with other fall-through detection wrappers.
+pub fn activity_concurrent_wrapper(log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+ parameters: &Parameters) -> Fallthrough {
+ activity_concurrent(log, event_log_classifier, parameters)
+}
+
+#[cfg(test)]
+mod test_activity_concurrent {
+ use std::collections::HashSet;
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::{event_log, EventLog};
+ use crate::core::process_models::process_tree::{Node, OperatorType};
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_concurrent::{activity_concurrent, filter_out_activity};
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::ActivityConcurrent;
+
+ fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: &EventLogClassifier) -> bool {
+ if log.traces.len() == o_log.traces.len() {
+ for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) {
+ if t0.events.len() == t1.events.len() {
+ for (e0,e1) in t0.events.iter().zip(t1.events.iter()) {
+ let a0 = event_log_classifier.get_class_identity(e0);
+ let a1 = event_log_classifier.get_class_identity(e1);
+ if a0 != a1 {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+ false
+ }
+
+ #[test]
+ fn test_filter_out_activity_and_activity_concurrent_yield_same_result() {
+ let log = event_log!(
+ ["a", "b", "c", "d"],
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+
+ // mind the empty trace
+ let ex1 = event_log!(["b"], ["b"],[], ["b"]);
+
+ let ex2 = event_log!(
+ ["a", "c", "d"],
+ ["d","a"],
+ ["a", "d", "c"],
+ ["c", "d"],
+ );
+
+ let classifier = EventLogClassifier::default();
+
+ let (log1, log2) =
+ filter_out_activity(log.clone(), &EventLogClassifier::default(), "b".to_string());
+
+ assert!(events_equal(&log1, &ex1, &classifier));
+ assert!(events_equal(&log2, &ex2, &classifier));
+ let ActivityConcurrent(node, log1, split)= activity_concurrent(log, &classifier, &HashSet::new()) else { return assert!(false); };
+ assert!(!log1.traces.is_empty() && !split.sub_logs.is_empty());
+ let ex_node = Node::new_operator(OperatorType::Concurrency);
+ assert_eq!(node, ex_node);
+
+ }
+
+
+
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs
new file mode 100644
index 0000000..bf141e5
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/activity_once_per_trace.rs
@@ -0,0 +1,339 @@
+//! Activity once per trace detection utilities.
+//!
+//! This module implements the **activity once per trace** used by the inductive miner.
+//!
+//! The fallthrough applies when an activity occurs **exactly once in every trace of the event log**.
+//! In this case, the activity is assumed to execute independently of the rest of the process.
+//!
+//! When such an activity is detected, it is removed from the event log and modeled as running in
+//! parallel with the remaining behavior of the process.
+use std::collections::HashMap;
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::EventLog;
+use crate::core::process_models::process_tree::{Node, OperatorType};
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return};
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters;
+
+/// This function iterates over every event of every trace and removes the specified event
+fn remove_activity_from_log(
+ mut log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+ activity: String,
+) -> EventLog {
+ log.traces = log
+ .traces
+ .into_iter()
+ .map(|mut trace| {
+ trace.events = trace
+ .events
+ .into_iter()
+ .filter(|event| {
+ let other = event_log_classifier.get_class_identity(event);
+ activity != other
+ })
+ .collect();
+ trace
+ })
+ .collect();
+
+ // experimental, what if we only retain traces not empty?
+ // log.traces.retain(|trace| {trace.events.len() > 0});
+ log
+}
+
+
+/// Helper struct to count the occurrences of each activity in the whole log and in every trace.
+/// In 'trace_activities' each index corresponds to a trace at the same index in the event log.
+/// The 'activities' member contains information about how often every activity occurs in the whole event log.
+struct ActivityTraceCounter {
+ activities: HashMap,
+ trace_activities: Vec>,
+}
+
+impl ActivityTraceCounter {
+ /// Counts how often every activity of the event log occurs in every trace and in the whole
+ /// event log.
+ fn new(log: &EventLog, event_log_classifier: &EventLogClassifier) -> ActivityTraceCounter {
+ let mut activities = HashMap::new();
+ let mut trace_activities = Vec::with_capacity(log.traces.len());
+
+ for (i, trace) in log.traces.iter().enumerate() {
+ trace_activities.push(HashMap::new());
+ for event in &trace.events {
+ let activity = event_log_classifier.get_class_identity(event);
+ // update activities
+ if let Some(count) = activities.get_mut(&activity) {
+ *count += 1;
+ } else {
+ activities.insert(activity.clone(), 1);
+ }
+
+ if let Some(count) = trace_activities[i].get_mut(&activity) {
+ *count += 1;
+ } else {
+ trace_activities[i].insert(activity, 1);
+ }
+ }
+ }
+
+ ActivityTraceCounter {
+ activities,
+ trace_activities,
+ }
+ }
+
+ /// Consume the object and returns the activity count as well as the vector containing the activity count for every trace.
+ fn get(self) -> (HashMap, Vec>) {
+ (self.activities, self.trace_activities)
+ }
+}
+
+fn cleanup_log(
+ log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+ activity: String,
+) -> Fallthrough {
+ let log = remove_activity_from_log(log, event_log_classifier, activity.clone());
+
+ let mut node = Node::new_operator(OperatorType::Concurrency);
+ let activity_leaf = Node::new_leaf(Some(activity));
+ node.add_child(activity_leaf);
+
+ ActivityOncePerTrace(node,log)
+}
+
+///This fall through applies if an activity occurs once in every trace of the log.
+/// In case this applies to multiple ones an arbitrary is chosen (with the lowest cardinality)
+pub fn activity_once_per_trace(
+ log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+) -> Fallthrough {
+ let k = log.traces.len();
+ // count how often every activity occurs in the event log and in every trace
+ let (activities, trace_activities) =
+ ActivityTraceCounter::new(&log, event_log_classifier).get();
+ let mut activities: Vec<(String, usize)> = activities.into_iter().collect(); // transform to vector in order to sort the activities according to cardinality
+
+ // Sort the activities by cardinality
+ (&mut activities).sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); // safe unwrap as we compare u32 to other u32
+
+ // set result to none (for now)
+ let mut result: Option = None;
+
+ // reverse iterate over the activities, as the activities with more occurrences are more likely to appear precisely once every trace
+ 'activity_loop: for (activity, cardinality) in activities.into_iter().rev() {
+ // activity has to appear precisely once in every trace, therefore skip if it does not appear as often as we have traces
+ if cardinality != k {
+ continue 'activity_loop;
+ }
+ for trace in &trace_activities {
+ // has to appear precisely one time
+ if let Some(count) = trace.get(&activity) {
+ if *count != 1 {
+ continue 'activity_loop;
+ }
+ } else {
+ // activity did not appear in this trace → condition violated
+ continue 'activity_loop;
+ }
+ }
+ // at this point the activity has appeared precisely one time in every trace
+ result = Some(activity);
+ break 'activity_loop;
+ }
+
+ // check result of activity loop
+ if result.is_some() {
+ cleanup_log(log, event_log_classifier, result.unwrap())
+ } else {
+ // does not apply - return the event log to be used in other fallthrough cases
+ Return(log)
+ }
+}
+
+/// Public wrapper for [`activity_once_per_trace`].
+///
+/// This function simply forwards its arguments to
+/// `activity_once_per_trace` and exists for consistency
+/// with other fall-through detection wrappers.
+pub fn activity_once_per_trace_wrapper(
+ log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+ _: &Parameters,
+) -> Fallthrough {
+ activity_once_per_trace(log, event_log_classifier)
+}
+
+
+#[cfg(test)]
+mod test_activity_once_per_trace {
+ use crate::{event_log, EventLog};
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::process_tree::{Node, OperatorType};
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::{activity_once_per_trace, remove_activity_from_log};
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{ActivityOncePerTrace, Return};
+
+ #[test]
+ fn test_remove_activity_with_empty_trace() {
+ let log = event_log!([], ["a"], ["b", "a"]); // b as first event intentionally to get the same timestamp as for the expected one
+ let r = remove_activity_from_log(log, &EventLogClassifier::default(), "a".to_string());
+
+ let expected = event_log!([], [], ["b"],);
+ assert_eq!(r, expected);
+ }
+
+ fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: &EventLogClassifier) -> bool {
+ if log.traces.len() == o_log.traces.len() {
+ for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) {
+ if t0.events.len() == t1.events.len() {
+ for (e0,e1) in t0.events.iter().zip(t1.events.iter()) {
+ let a0 = event_log_classifier.get_class_identity(e0);
+ let a1 = event_log_classifier.get_class_identity(e1);
+ if a0 != a1 {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+ false
+ }
+
+ #[test]
+ /// The example as defined in Robust Process Mining with Guarantees
+ fn leeman_example() {
+ let log = event_log!(
+ ["a", "b", "c", "d"],
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+
+ let result = activity_once_per_trace(log, &EventLogClassifier::default());
+ match result {
+ ActivityOncePerTrace(node, log) => {
+ let expected_log = event_log!(
+ ["a", "b", "c"],
+ ["a", "b"],
+ ["a", "c"],
+ ["b", "c"],
+ );
+ assert!(events_equal(&log, &expected_log, &EventLogClassifier::default()));
+
+ let mut expected_node = Node::new_operator(OperatorType::Concurrency);
+ expected_node.add_child(Node::new_leaf(Some(String::from("d"))));
+
+ assert_eq!(node, expected_node);
+ }
+ _ => assert!(false),
+ }
+ }
+
+ #[test]
+ /// Assert that the function returns none if there is no activity once in every trace, but almost
+ fn test_log_with_no_ft() {
+ // fist case - first trace
+ let log = event_log!(
+ ["a", "b", "c"], // here i removed the 'd'
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+
+ let Return(expected_log) = activity_once_per_trace(log.clone(), &EventLogClassifier::default())
+ else {
+ return assert!(false);
+ };
+
+ let log1 = event_log!(
+ ["a", "b", "c", "d"],
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c"], // now the d is missing here
+ );
+
+ assert!(events_equal(&log, &expected_log, &EventLogClassifier::default()));
+
+ let Return(log2) = activity_once_per_trace(log1.clone(), &EventLogClassifier::default())
+ else {
+ return assert!(false);
+ };
+ assert!(events_equal(&log1, &log2, &EventLogClassifier::default()));
+ }
+
+ #[test]
+ fn test_with_multiple_activities_appearing_once() {
+ let log = event_log!(
+ ["a", "b", "c", "d"], // here i removed the 'd'
+ ["d", "a", "b", "c"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+ let ActivityOncePerTrace(process_node, log) =
+ activity_once_per_trace(log, &EventLogClassifier::default())
+ else {
+ return assert!(false);
+ };
+
+ let expected_log = event_log!(
+ ["a", "b", "d"],
+ ["d", "a", "b"],
+ ["a", "d"],
+ ["b", "d"],
+ );
+ let expected_log2 = event_log!(
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ ["a", "c"],
+ ["b", "c"],
+ );
+
+ // it really is arbitrary whether c or d is chosen
+ assert!(events_equal(&log, &expected_log, &EventLogClassifier::default()) ||
+ events_equal(&log, &expected_log2, &EventLogClassifier::default()));
+
+
+ let mut expected_node = Node::new_operator(OperatorType::Concurrency);
+ expected_node.add_child(Node::new_leaf(Some(String::from("c"))));
+
+ let mut expected_node2 = Node::new_operator(OperatorType::Concurrency);
+ expected_node2.add_child(Node::new_leaf(Some(String::from("d"))));
+
+ assert!(process_node == expected_node || process_node == expected_node2)
+ }
+
+ #[test]
+ fn test_two_activites_in_trace() {
+ let log = event_log!(
+ ["a", "b", "c", "d"],
+ ["d", "a", "b", "d"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+ let Return(log1) = activity_once_per_trace(log.clone(), &EventLogClassifier::default())
+ else {
+ return assert!(false);
+ };
+
+ assert!(events_equal(&log, &log1, &EventLogClassifier::default()));
+ }
+
+ #[test]
+ fn test_with_empty_log() {
+ let log = event_log!(["a", "b"], []);
+ // the fallthrough should not find anything, as there is a trace containing no element
+ let Return(_) = activity_once_per_trace(log.clone(), &EventLogClassifier::default()) else {
+ return assert!(false);
+ };
+
+ let log2 = event_log!(["a", "b"]);
+ let r = activity_once_per_trace(log2, &EventLogClassifier::default());
+ assert!(r.same_enum_variant(&Fallthrough::ActivityOncePerTrace(
+ Node::new_operator(OperatorType::Concurrency),
+ event_log!()
+ )));
+ }
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs
new file mode 100644
index 0000000..71cc9eb
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/empty_traces.rs
@@ -0,0 +1,125 @@
+//! Empty traces fallthrough detection utilities.
+
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::{Node, OperatorType};
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{EmptyTraces, Return};
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters;
+use crate::EventLog;
+
+/// Checks whether the empty traces fallthrough applies to a given log,
+/// it applies when the log contains empty traces.
+///
+/// # Returns
+/// - [EmptyTraces] if the event log contained empty traces
+/// - [Return] if the event log contained no empty traces
+ fn empty_traces(mut log: EventLog, _event_log_classifier: &EventLogClassifier) -> Fallthrough {
+ let len_before = log.traces.len();
+ log.traces = log.traces.into_iter().filter(|trace| !trace.events.is_empty()).collect();
+
+ if len_before != log.traces.len(){
+ // if the len of the trace has changed in the meantime, this means there are some traces lost,
+ // due to that they have been empty
+
+ // return a Process node together with the resulting unprocessed traces of the event log
+
+ let mut node = Node::new_operator(OperatorType::ExclusiveChoice);
+ node.add_child(Node::new_leaf(None));
+ EmptyTraces(node, log)
+ } else {
+ // otherwise this fallthrough does not apply
+ Return(log)
+ }
+}
+
+/// Public wrapper for [`empty_traces`].
+///
+/// This function simply forwards its arguments to
+/// `empty_traces` and exists for consistency
+/// with other fall-through detection wrappers.
+pub fn empty_traces_wrapper(log: EventLog, _event_log_classifier: &EventLogClassifier, _: &Parameters) -> Fallthrough {
+ empty_traces(log, _event_log_classifier)
+}
+
+#[cfg(test)]
+mod test_empty_traces_ft{
+ use crate::{event_log, event};
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::process_tree::{Node, OperatorType};
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::empty_traces::empty_traces;
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{EmptyTraces, Return};
+
+ #[test]
+ /// test the simplest case, the log should retrun a xor-node with one child from type empty and
+ /// a log containing the only not empty trace
+ fn test_empty_traces(){
+ let log = event_log!(
+ [],
+ [],
+ [],
+ ["a"],
+ [],
+ );
+
+ let EmptyTraces(node, log) = empty_traces(log, &EventLogClassifier::default()) else { return assert!(false); };
+ assert_eq!(log.traces.len(), 1);
+ assert_eq!(log.traces[0].events.len(), 1);
+ assert_eq!(log.traces[0].events[0], event!("a"));
+
+ let mut expected_node = Node::new_operator(OperatorType::ExclusiveChoice);
+ expected_node.add_child(Node::new_leaf(None));
+
+ assert_eq!(node, expected_node);
+ }
+
+ #[test]
+ /// Assert that an event log
+ fn test_not_empty_traces(){
+ let log = event_log!(
+ ["a"],
+ ["b"],
+ ["f"],
+ ["a"],
+ ["g"],
+ );
+
+ let Return(log1) = empty_traces(log.clone(), &EventLogClassifier::default()) else { return assert!(false); };
+
+ assert_eq!(log, log1);
+ }
+
+ #[test]
+ /// assert that an empty event log results in no result ('None'),
+ /// as this is the basecase
+ fn test_empty_log(){
+ let log = event_log!();
+ let res = empty_traces(log.clone(), &EventLogClassifier::default());
+ match res {
+ Return(log1) => assert_eq!(log, log1),
+ _ => assert!(false),
+ }
+ }
+
+
+ #[test]
+ fn test_log_only_empty_traces(){
+ let log = event_log!(
+ [], [], []
+ );
+
+ let res = empty_traces(log, &EventLogClassifier::default());
+ match res {
+ EmptyTraces(node,log1) => {
+ assert_eq!(log1.traces.len(), 0);
+ assert_eq!(log1, event_log!());
+ let mut expected_node = Node::new_operator(OperatorType::ExclusiveChoice);
+ expected_node.add_child(Node::new_leaf(None));
+ assert_eq!(node, expected_node);
+ },
+ _ => assert!(false),
+ }
+
+ }
+
+
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs
new file mode 100644
index 0000000..572cae2
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/fallthrough.rs
@@ -0,0 +1,39 @@
+//! Fallthrough labels
+
+use std::mem::discriminant;
+use crate::core::process_models::process_tree::Node;
+use crate::discovery::case_centric::inductive_miner_app::splits::split::Split;
+use crate::EventLog;
+
+/// Represents the result of attempting to apply a fall-through rule.
+///
+/// Each variant corresponds to a specific fall-through strategy and
+/// contains the resulting [`Node`], i.e. Operator-type and children if any, together with the
+/// event log(s) derived during its application.
+///
+/// If no fall-through rule is applicable, the `Return` variant is used.
+/// In this case, the original event log is returned unchanged.
+///
+/// Not to be confused with [`FallThroughLabel`]
+pub enum Fallthrough {
+ EmptyTraces(Node, EventLog),
+ ActivityOncePerTrace(Node, EventLog),
+ ActivityConcurrent(Node, EventLog, Split),
+ StrictTauLoop(Node, EventLog),
+ TauLoop(Node, EventLog),
+ FlowerModel(Node),
+ Return(EventLog),
+}
+
+impl Fallthrough {
+
+ pub fn same_enum_variant(&self, other: &Self) -> bool {
+ discriminant(self) == discriminant(other)
+ }
+}
+
+
+
+
+
+
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs
new file mode 100644
index 0000000..0936bef
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/flower_model.rs
@@ -0,0 +1,84 @@
+//! Flower model fall through utilities.
+//!
+//! This module implements the flower model fallthrough as a last resort fallthrough.
+
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::{Node, OperatorType};
+use crate::discovery::case_centric::dfg::discover_dfg_with_classifier;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::FlowerModel;
+use crate::EventLog;
+
+/// This is the last resort of the fallthrough's of the inductive miner.
+/// This FT should only be applied if the event log does not contain any empty trace
+pub fn flower_model(log: EventLog, event_log_classifier: &EventLogClassifier) -> Fallthrough {
+ let dfg = discover_dfg_with_classifier(&log, event_log_classifier);
+
+ // get all activities in the directly follows graph
+ let mut activities: Vec = dfg.activities.iter().map(|(a,_)| a.clone()).collect();
+
+ // sort activities to allow for a defined behavior or so
+ (&mut activities).sort();
+
+ // create a concurrency relation over all non-empty activities
+ let mut sub_tree = Node::new_operator(OperatorType::Concurrency);
+
+ // add a leaf for each activity
+ for activity in activities {
+ sub_tree.add_child(Node::new_leaf(Some(activity)));
+ }
+
+ // flower root
+ let mut flower_node_root = Node::new_operator(OperatorType::Loop);
+ // first child of flower model is a concurrency relation over all non-empty activities - do part
+ flower_node_root.add_child(sub_tree);
+
+ // add silent transition as second child - redo part
+ flower_node_root.add_child(Node::new_leaf(None));
+
+ FlowerModel(flower_node_root)
+}
+
+#[cfg(test)]
+mod test_flower_model {
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::process_tree::{Node, OperatorType};
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::flower_model::flower_model;
+ use crate::event_log;
+
+ #[test]
+ fn test_basic_flower_model_leemans(){
+ let log = event_log!(
+ ["a", "b", "c", "d"],
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+
+ let flower = flower_model(log, &EventLogClassifier::default());
+
+ // do part consist of all activities in a concurrency relation
+ let mut expected_sub_flower = Node::new_operator(OperatorType::Concurrency);
+ expected_sub_flower.add_child(Node::new_leaf(Some(String::from("a"))));
+ expected_sub_flower.add_child(Node::new_leaf(Some(String::from("b"))));
+ expected_sub_flower.add_child(Node::new_leaf(Some(String::from("c"))));
+ expected_sub_flower.add_child(Node::new_leaf(Some(String::from("d"))));
+
+ // build expected flower model
+ let mut expected_flower = Node::new_operator(OperatorType::Loop);
+ expected_flower.add_child(expected_sub_flower);
+
+ // the redo part is just a silent transition
+ expected_flower.add_child(Node::new_leaf(None));
+
+
+
+ if let Fallthrough::FlowerModel(flower) = flower {
+ assert_eq!(expected_flower, flower);
+ } else {
+ assert!(false);
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs
new file mode 100644
index 0000000..b2b39dd
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/mod.rs
@@ -0,0 +1,77 @@
+//! Fallthrough detection utilities for the Inductive Miner.
+//!
+//! This module contains utilities of the fallthrough rules used by the Inductive Miner when no
+//! standard cut can be discovered in the event log.
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_concurrent::activity_concurrent_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::activity_once_per_trace::activity_once_per_trace_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::empty_traces::empty_traces_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::flower_model::flower_model;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::strict_tau_loop::strict_tau_loop_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::tau_loop::tau_loop_wrapper;
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters};
+use crate::EventLog;
+
+mod activity_concurrent;
+pub mod fallthrough;
+mod activity_once_per_trace;
+mod empty_traces;
+mod flower_model;
+mod strict_tau_loop;
+mod tau_loop;
+
+
+
+/// Applies the sequence of *fallthrough rules* used by the Inductive Miner to an event log.
+///
+/// This function iteratively evaluates predefined fallthrough in the following order:
+/// - [empty_traces]
+/// - [activity_once_per_trace]
+/// - [activity_concurrent]
+/// - [strict_tau_loop]
+/// - [tau_loop]
+/// - [flower_model]
+///
+/// Whether a Fallthrough is applied at all, is controlled by the provided parameters.
+/// Note, that the Flower Model is applied nevertheless.
+///
+/// # Parameters
+/// - log: The event log to which a Fallthrough rules are applied.
+/// - event_log_classifier: classifier to identify activities in event log events
+/// - parameters: the provided parameters
+///
+/// # Returns
+/// A `Fallthrough` value representing either:
+/// - a discovered process model produced by a fallthrough, or
+/// - the flower model if no fallthrough applies or fallthroughs are disabled.
+pub fn apply_fallthrough(
+ mut log: EventLog,
+ event_log_classifier: &EventLogClassifier,
+ parameters: &Parameters,
+) -> Fallthrough {
+ let funcs: Vec Fallthrough> = vec![
+ empty_traces_wrapper,
+ activity_once_per_trace_wrapper,
+ activity_concurrent_wrapper,
+ strict_tau_loop_wrapper,
+ tau_loop_wrapper,
+ ];
+
+ // check if Fallthrough shall be applied by provided parameters
+ if parameters.contains(&Parameter::ApplyFallthrough){
+ // iterate over all fall throughs
+ for apply_fallthrough in funcs {
+ let ft = apply_fallthrough(log, event_log_classifier, parameters);
+ if let Fallthrough::Return(returned_log) = ft {
+ log = returned_log;
+ continue;
+ } else {
+ return ft;
+ }
+ }
+ } // else the flower model is applied
+
+ // last possible Option: Flower Model
+ flower_model(log, event_log_classifier)
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs
new file mode 100644
index 0000000..6b17cd6
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/strict_tau_loop.rs
@@ -0,0 +1,273 @@
+//! Strict tau loop fallthrough detection utilities.
+//!
+//! This module implements the **strict tau loop fallthrough** used by the Inductive Miner.
+//!
+//! A strict tau loop assumes that a new iteration of the process starts **only when a start activity
+//! directly follows an end activity** within the same trace. Such a pattern suggest that the process
+//! silently returned to the beginning of the workflow via a tau transition between iterations.
+
+
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::Node;
+use crate::core::process_models::process_tree::OperatorType::Loop;
+use crate::discovery::case_centric::dfg::discover_dfg_with_classifier;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{Return, StrictTauLoop};
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters;
+use crate::EventLog;
+
+/// Splits traces in the event log according to the semantics of a **strict tau loop fallthrough**.
+///
+/// A trace is split whenever an **end activity** is immediately followed by a **start activity**.
+/// This pattern indicates that one iteration of the process has completed and a new iteration begins
+/// via an implicit silent transition.
+///
+/// Empty traces may appear in the resulting log if a split occurs at the beginning of a trace segment.
+///
+/// # Returns
+/// A new [EventLog] where traces are split by the above described logic.
+fn split_log_according_to_strict_tau(log: EventLog, classifier: &EventLogClassifier) -> EventLog{
+ let dfg = discover_dfg_with_classifier(&log, classifier);
+ let mut result_log = log.clone_without_traces();
+
+
+ for trace in log.traces{
+ let mut last_event_was_end = false;
+ let mut new_trace = trace.clone_without_events();
+
+
+ for event in trace.events{
+ let activity = classifier.get_class_identity(&event);
+
+ // check condition
+ if last_event_was_end && dfg.start_activities.contains(&activity){
+ // condition satisfied, the last activity was an end activity, this one is a start,
+ // we need to split the current trace at this point right now
+ let help_trace = new_trace.clone_without_events();
+ result_log.traces.push(new_trace);
+ new_trace = help_trace;
+ }
+
+ // push event to new_trace
+ new_trace.events.push(event);
+
+ // if this activity is an end activity set the according flag
+ last_event_was_end = dfg.end_activities.contains(&activity);
+
+ }
+
+ // if the trace hasn't been pushed, we need to push it now -- this includes empty traces
+ result_log.traces.push(new_trace);
+ }
+ // we need to iterate through the entire log and split a trace if after an end activity an start activity appears
+ result_log
+}
+
+/// Attempt to detect and apply the **strict tau loop fallthrough**.
+/// The log is transformed by using [split_log_according_to_strict_tau].
+/// If the operation increased the number of traces in the log, it indicates that the traces contained
+/// implicit restarts of the process.
+///
+/// In that case, a loop operator is constructed where:
+///
+/// - the **do part** represents a single iteration of the process
+/// - the **redo part**
+///
+/// The resulting loop node and transformed event log are returned.
+///
+/// # Returns
+/// - [StrictTauLoop] if stric loop behavior is detected
+/// - [Return] is a silent transition
+
+fn strict_tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough {
+ let k = log.traces.len();
+ let log = split_log_according_to_strict_tau(log, classifier);
+
+ if k < log.traces.len(){
+ let mut node = Node::new_operator(Loop);
+ node.add_child(Node::new_leaf(None)); // temporary at index 0
+ node.add_child(Node::new_leaf(None)); // redo part is silent
+
+
+ StrictTauLoop(
+ // first return a process node with the required structure
+ node,
+ // secondly return the new event log
+ log
+ )
+ } else if k > log.traces.len(){
+ panic!("Original log contains more traces, than the log split according to strict tau.")
+ }else {
+ // default return
+ Return(log)
+ }
+
+}
+
+/// Public wrapper for [`strict_tau_loop`].
+///
+/// This function simply forwards its arguments to
+/// `strict_tau_loop` and exists for consistency
+/// with other fall-through detection wrappers.
+pub fn strict_tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _:&Parameters) -> Fallthrough {
+ strict_tau_loop(log, classifier)
+}
+
+
+#[cfg(test)]
+mod test_strict_tau_loop{
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::{Return, StrictTauLoop};
+ use crate::{event_log, EventLog};
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::strict_tau_loop::strict_tau_loop;
+
+
+ fn cmp_logs(log: Fallthrough, expected: EventLog){
+ let classifier = EventLogClassifier::default();
+ assert!(if let StrictTauLoop(_, log) = log {
+ log.traces.len() == expected.traces.len() && !log.traces.iter().zip(expected.traces.iter()).any(|(t0,t1)|
+ t0.events.len() != t1.events.len() || t0.events.iter().zip(t1.events.iter()).any(|(e0,e1)| {
+ classifier.get_class_identity(e0) != classifier.get_class_identity(e1)
+ })
+ )
+ } else {
+ false
+ })
+ }
+ #[test]
+ fn test_split(){
+ let log = event_log!(
+ ["a", "b", "c", "d"],
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+
+ let expected_log = event_log!(
+ ["a", "b", "c"],
+ ["d"],
+ ["d"],
+ ["a", "b"],
+ ["a", "d", "c"],
+ ["b", "c"],
+ ["d"]
+ );
+
+ cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log);
+
+
+ }
+
+ #[test]
+ fn strict_tau_loop_simple_split() {
+ let log = event_log!(
+ ["a", "b", "c", "a", "c"], // contains c (end) followed by a (start) -> split
+ );
+
+ // Splitting at c|a -> two traces: "a b c" and "a d"
+ // L.len() = 1, L1.len() = 2 => strict tau-loop discovered
+ let expected_log = event_log!(
+ ["a", "b", "c"],
+ ["a", "c"],
+ );
+
+ cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log);
+
+ }
+
+
+ #[test]
+ fn strict_tau_multiple_splits_in_trace() {
+ let log = event_log!(
+ // start set will contain "a" (first event of every trace if all traces start with a),
+ // end set will contain "c" (last events),
+ // here we have "... c a ... c a ..." -> two splits -> three traces after split
+ ["a", "b", "c", "a", "b", "c", "a", "b", "c"],
+ );
+
+ // Splits at each c|a produce three identical traces "a b c"
+ // L.len() = 1, L1.len() = 3 => tau-loop discovered
+ let expected_log = event_log!(
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ ["a", "b", "c"],
+ );
+
+ cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log);
+
+
+ }
+
+ #[test]
+ fn strict_tau_no_split() {
+ let log = event_log!(
+ ["a", "b", "c"], // starts with a, ends with c
+ ["d", "e"], // starts with d, ends with e
+ ["f", "g", "h"] // starts with f, ends with h
+ );
+
+ // start set = {a, d, f}, end set = {c, e, h}
+ // There is no occurrence inside any trace of (c|e|h) followed immediately by (a|d|f)
+ // => L1.len() == L.len() -> no tau-loop found
+ let expected_log = event_log!(
+ ["a", "b", "c"],
+ ["d", "e"],
+ ["f", "g", "h"]
+ );
+
+ if let Return(log) = strict_tau_loop(log, &EventLogClassifier::default()){
+ assert_eq!(log, expected_log);
+ }
+
+ }
+
+ #[test]
+ fn strict_tau_start_end_overlap() {
+ let log = event_log!(
+ ["a", "b", "a", "c", "a"], // start set contains "a", end set contains "a"
+ ["c", "d"] // trivial trace starting and ending with a
+ );
+ let expected_log = event_log!(
+ ["a", "b", "a"], // prefix up to first split
+ ["c", "a"], // remainder after that split
+ ["c", "d"], // original second trace unchanged
+ );
+
+ cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log);
+
+ }
+
+ #[test]
+ fn strict_tau_single_trace_to_many() {
+ let log = event_log!(
+ ["x", "a", "b", "a", "x", "y", "a"], // suppose start set includes x and end set includes a
+ );
+ let expected_log = event_log!(
+ ["x", "a", "b", "a"],
+ ["x", "y", "a"],
+ );
+ cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log);
+ }
+
+
+ // 7) Edge case: traces of length 1 where start==end; adjacent repetition inside a longer trace causes multiple tiny splits
+ #[test]
+ fn strict_tau_length_one_traces_and_adjacent_repeats() {
+ let log = event_log!(
+ ["a"], // start/end = a
+ ["a", "a", "b", "a", "a"], // many a|a adjacencies
+ );
+
+ // start set = {a}, end set = {a, a} => {a}
+ // split at every a|a adjacency inside second trace -> many fragments
+ // One reasonable expected L1 (fragmenting around adjacent a's) could be:
+ let expected_log = event_log!(
+ ["a"], // first trace unchanged
+ ["a"], // fragment from leading 'a' in second trace
+ ["a", "b", "a"], // middle fragment
+ ["a"], // trailing fragment
+ );
+ cmp_logs(strict_tau_loop(log, &EventLogClassifier::default()), expected_log);
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs
new file mode 100644
index 0000000..1cc1bec
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/fallthrough/tau_loop.rs
@@ -0,0 +1,149 @@
+//! Tau loop fallthrough detection utilities.
+//!
+//! This module implements the **tau loop fallthrough** used by the Inductive Miner.
+//! A tau loop is assumed when a trace appears to restart without an explicit visible transition
+//! between the end of one iteration and the beginning of the next.
+
+
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::Node;
+use crate::core::process_models::process_tree::OperatorType::Loop;
+use crate::discovery::case_centric::dfg::discover_dfg_with_classifier;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::Return;
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::Parameters;
+use crate::EventLog;
+
+/// Splits the event log according to the semantics of the `tau_loop` fall-through.
+///
+/// Each trace is split at every occurrence of a *start activity*
+/// Whenever a start activity appears and the current subtrace is
+/// non-empty, a new trace is created.
+///
+/// Empty traces are not inserted into the resulting log
+///
+/// # Returns
+/// A new 'Eventlog' in which traces are split at occurrences of start activities.
+/// The total number of traces may increase
+/// if loop behavior is detected.
+
+fn split_log_according_to_tau_loop(log: EventLog, classifier: &EventLogClassifier) -> EventLog{
+ // simply split a trace at the occurrence of any starting activity
+ let dfg = discover_dfg_with_classifier(&log, classifier);
+ let mut result_log = log.clone_without_traces();
+
+
+ for trace in log.traces{
+ let mut new_trace = trace.clone_without_events();
+
+
+ for event in trace.events{
+ let activity = classifier.get_class_identity(&event);
+
+
+ // check condition
+ if dfg.start_activities.contains(&activity) && !new_trace.events.is_empty(){
+ // condition satisfied, this activity is a start activity
+ let help_trace = new_trace.clone_without_events();
+ result_log.traces.push(new_trace);
+ new_trace = help_trace;
+ }
+
+ new_trace.events.push(event);
+ }
+
+ // if the trace hasn't been pushed, we need to push it now, but exclude empty traces
+ if !new_trace.events.is_empty(){
+ result_log.traces.push(new_trace);
+ }
+ }
+ // we need to iterate through the entire log and split a trace if after an end activity an start activity appears
+ result_log
+}
+
+/// Attempts to apply the 'tau_loop' fallthrough.
+///
+/// The algorithm first splits the log using [split_log_according_to_tau_loop].
+/// If this operation increases the number of traces, it indicates that traces contained implicit
+/// restarts. In that case, a loop operator is created where:
+///
+/// - the **do part** represent one iteration of the process
+/// - the **redo part** is a silent transition (tau)
+///
+/// # Returns
+/// - [Fallthrough::TauLoop] if the log split indicates loop behavior
+/// - [Fallthrough::Return] if the log split indicates no loop behavior
+fn tau_loop(log: EventLog, classifier: &EventLogClassifier) -> Fallthrough {
+ let k = log.traces.len();
+ let log = split_log_according_to_tau_loop(log, classifier);
+
+ if k < log.traces.len(){
+
+ let mut node = Node::new_operator(Loop);
+ node.add_child(Node::new_leaf(None)); // placeholder transition, will be replaced
+ node.add_child(Node::new_leaf(None)); // silent transition as redo part
+ Fallthrough::TauLoop(
+ // first return a process node with the required structure
+ node,
+ log
+ )
+ } else if k > log.traces.len(){
+ panic!("Original log contains more traces, than the log split according to strict tau.")
+ }else {
+ // default return
+ Return(log)
+ }
+
+}
+
+/// Public wrapper for [`tau_loop`].
+///
+/// This function simply forwards its arguments to
+/// `tau_loop` and exists for consistency
+/// with other fall-through detection wrappers.
+pub fn tau_loop_wrapper(log: EventLog, classifier: &EventLogClassifier, _:&Parameters) -> Fallthrough {
+ tau_loop(log, classifier)
+}
+
+
+#[cfg(test)]
+mod test_tau_loop{
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough::TauLoop;
+ use crate::discovery::case_centric::inductive_miner_app::fallthrough::tau_loop::tau_loop;
+ use crate::{event_log, EventLog};
+
+ fn equal_events(log: &EventLog, o_log: &EventLog, classifier: &EventLogClassifier) -> bool {
+ log.traces.len() == o_log.traces.len() && !log.traces.iter().zip(o_log.traces.iter()).any(|(t, o)| {
+ t.events.len() != o.events.len() || t.events.iter().zip(o.events.iter()).any(|(e0,e1)| {
+ classifier.get_class_identity(e0) != classifier.get_class_identity(e1)
+ })
+ })
+ }
+ #[test]
+ fn test_split(){
+ let log = event_log!(
+ ["a", "b", "c", "d"], // here i removed the 'd'
+ ["d", "a", "b"],
+ ["a", "d", "c"],
+ ["b", "c", "d"],
+ );
+
+ let expected_log = event_log!(
+ ["a"],
+ ["b", "c"],
+ ["d"],
+ ["d"],
+ ["a"],
+ ["b"],
+ ["a"],
+ ["d", "c"],
+ ["b", "c"],
+ ["d"]
+ );
+
+ let TauLoop(_node, log)= tau_loop(log, &EventLogClassifier::default()) else { return assert!(false);};
+
+ assert!(equal_events(&log, &expected_log, &EventLogClassifier::default()));
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs
new file mode 100644
index 0000000..76aee79
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/mod.rs
@@ -0,0 +1,264 @@
+//! inductive miner discovery algorithm
+
+use base_cases::{find_base_case, BaseCases};
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::{Node, ProcessTree};
+use crate::discovery::case_centric::dfg::discover_dfg_with_classifier;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::find_cut;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::apply_fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::fallthrough::fallthrough::Fallthrough;
+use crate::discovery::case_centric::inductive_miner_app::splits::perform_split;
+use crate::discovery::case_centric::inductive_miner_app::structures::parameter::{Parameter, Parameters};
+use crate::EventLog;
+
+mod cut_finder;
+mod structures;
+mod splits;
+mod fallthrough;
+mod base_cases;
+
+
+/// Mines a process tree from the given event log using the Inductive Miner
+/// with default parameter settings.
+///
+/// This function initializes the default mining parameters, recursively
+/// builds the process tree, and applies post-processing (folding)
+/// if configured in the parameters.
+///
+/// # Parameters
+/// - `log`: The event log to mine.
+/// - `event_log_classifier`: Classifier used to determine activity identities.
+///
+/// # Returns
+/// The root `ProcessNode` of the discovered process tree.
+pub fn inductive_miner_default_parameters(log: EventLog, event_log_classifier: &EventLogClassifier) -> ProcessTree {
+ // uses default parameters while for mining the process tree model
+ let parameters = Parameter::generate_default_parameters();
+ let node = build_tree(log, event_log_classifier, ¶meters, 0);
+ ProcessTree::new(node).fold()
+}
+
+
+/// Converts a detected cut into a corresponding process tree node.
+///
+/// The event log is split according to the cut ([`perform_split`]), and for each resulting
+/// sub-log the Inductive Miner is recursively applied. The resulting
+/// subtrees become the children of a new process node labeled with
+/// the cut's operator.
+///
+/// # Parameters
+/// - `cut`: The detected cut.
+/// - `event_log_classifier`: Activity classifier.
+/// - `log`: The event log to split.
+/// - `parameters`: Mining parameters.
+/// - `depth`: Current recursion depth - debug reasons
+///
+/// # Returns
+/// A `ProcessNode` representing the cut and its recursively mined children.
+fn convert_cut_to_process_node<'a>(cut: Cut<'a>, event_log_classifier: &EventLogClassifier, log: EventLog, parameters: &Parameters, depth: usize) -> Node {
+ // extract operator and split the original event log
+ let operator = cut.get_operator();
+ let split = perform_split(&log, event_log_classifier, cut);
+
+ // acquire ownership of the split vector
+ let split = split.get_own();
+
+ // create new node
+ let mut cut_node = Node::new_operator(operator);
+
+ // this could be done in parallel
+ for log in split{
+ cut_node.add_child(build_tree(log, &event_log_classifier, parameters,depth +1));
+ }
+
+ // return new process node
+ cut_node
+}
+
+/// Applies fallthrough strategies ([`apply_fallthrough`]) if no valid cut can be found.
+///
+///
+/// Fallthroughs ensure that a process tree can always be constructed,
+/// even if the log does not yield a structured cut. Depending on the
+/// detected pattern, additional recursive mining steps may be performed.
+///
+/// # Parameters
+/// - `log`: The event log.
+/// - `event_log_classifier`: Activity classifier.
+/// - `parameters`: Mining parameters.
+/// - `depth`: Current recursion depth - debug reasons
+///
+/// # Returns
+/// A `ProcessNode` representing the fallthrough model.
+fn fallthrough_finder(log: EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters, depth: usize) -> Node {
+ // default fallthrough
+ // We are getting a guaranteed fallthrough, default is flower model
+ match apply_fallthrough(log, event_log_classifier, parameters){
+ #[allow(unused_mut)]
+ Fallthrough::EmptyTraces(mut node, log) |
+ Fallthrough::ActivityOncePerTrace(mut node, log) => {
+ node.add_child(build_tree(log, &event_log_classifier, parameters,depth+1));
+ node
+ }
+ #[allow(unused_mut)]
+ Fallthrough::StrictTauLoop(mut node, log) |
+ Fallthrough::TauLoop(mut node, log) => {
+ if let Node::Operator(op) = &mut node{
+ // replace the placeholder node at index 0
+ op.children[0] = build_tree(log, event_log_classifier, parameters,depth+1);
+ } else {
+ panic!("TauLoop node is not an operator node.")
+ }
+ node
+ }
+ Fallthrough::ActivityConcurrent(mut node, filtered_out_log, split) => {
+ // The AND-node already holds the concurrent activity as its first child.
+ // Build the sub-tree for the extracted (concurrent) activity.
+ node.add_child(build_tree(filtered_out_log, event_log_classifier, parameters, depth + 1));
+
+ // The split was already performed inside the fallthrough; add each
+ // resulting sub-log as a further grand-children
+ let operator_type = split.get_operator().clone();
+ let split = split.get_own();
+ let mut child = Node::new_operator(operator_type);
+ for log in split {
+ child.add_child(build_tree(log, event_log_classifier, parameters, depth + 1));
+ }
+ node.add_child(child);
+ node
+ }
+ Fallthrough::FlowerModel(node) => { node} // not much to do, this is the default
+ Fallthrough::Return(_) => { // THis point should not be reached at all, as the flower model is the default
+ panic!("Fallthrough::Return in build tree function - must not happen");
+ }
+ }
+
+}
+
+/// Core recursive function of the Inductive Miner.
+///
+/// The algorithm proceeds as follows:
+/// 1. Check for base cases (empty log or single activity): [`find_base_case`]
+/// 2. If none apply, construct the directly-follows graph (DFG) [`DirectlyFollowsGraph::create_from_log`]
+/// 3. Attempt to find a valid cut.#: [`find_cut`]
+/// 4. If a cut is found, split the log and recurse on each sub-log: [`convert_cut_to_process_node`]
+/// 5. Otherwise, apply a fallthrough strategy: [`fallthrough_finder`]
+///
+/// # Parameters
+/// - `log`: The event log to mine.
+/// - `event_log_classifier`: Activity classifier.
+/// - `parameters`: Mining parameters.
+/// - `depth`: Current recursion depth.
+///
+/// # Returns
+/// The root `ProcessNode` of the mined (sub)tree.
+pub fn build_tree(log: EventLog, event_log_classifier: &EventLogClassifier, parameters: &Parameters, depth: usize) -> Node{
+ match find_base_case(&log, event_log_classifier){
+ BaseCases::None => {
+ let dfg = discover_dfg_with_classifier(&log, event_log_classifier);
+ let cut = find_cut(&dfg, &log, event_log_classifier, parameters); // find cut, if there is some
+ if cut.is_some(){
+ convert_cut_to_process_node(cut.unwrap(), event_log_classifier, log, parameters, depth)
+ } else {
+ fallthrough_finder(log, event_log_classifier, parameters, depth)
+ }
+ }
+ BaseCases::Empty => {
+ Node::new_leaf(None)
+ }
+ BaseCases::SingleActivity(activity) => {
+ Node::new_leaf(Some(activity))
+ }
+ }
+
+}
+
+
+#[cfg(test)]
+mod tests {
+
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::process_tree::Node;
+ use crate::core::process_models::process_tree::OperatorType::{ExclusiveChoice, Loop};
+ use crate::discovery::case_centric::inductive_miner_app::inductive_miner_default_parameters;
+ use crate::event_log;
+
+ #[test]
+ fn test_works_without_panic() {
+ let log = event_log!(
+ ["a", "b", "c", "d"],
+ ["a", "b", "c", "d", "e", "a", "b", "c", "d"],
+ );
+ let event_log_classifier = EventLogClassifier::default();
+
+ let node = inductive_miner_default_parameters(log, &event_log_classifier);
+ assert!(node.is_valid());
+ }
+
+ #[test]
+ fn test_loop_over_same_activity(){
+ let log = event_log!(["a", "a"]);
+
+
+ let node = inductive_miner_default_parameters(log, &EventLogClassifier::default());
+
+ let mut expected = Node::new_operator(Loop);
+ expected.add_child(Node::new_leaf(Some(String::from("a"))));
+ expected.add_child(Node::new_leaf(None));
+
+ assert!(node.is_valid());
+ assert_eq!(node.root, expected);
+ }
+
+ #[test]
+ fn test_complex_log(){
+ let log = event_log![
+ ["a", "b", "d"],
+ ["a", "d", "b"],
+ ["a", "b", "c", "a", "b"],
+ ["a", "d", "c", "a", "d"],
+ ["a", "b", "d", "c", "a", "d", "b"],
+ ["a", "d", "b", "c", "a", "b", "d"],
+ ];
+ let node = inductive_miner_default_parameters(log, &EventLogClassifier::default());
+
+ assert!(node.is_valid())
+ }
+
+
+ #[test]
+ fn test_loop_over_same_activity_with_empty_trace(){
+ let log = event_log!(
+ [],
+ ["a", "a"],
+ );
+
+ let node = inductive_miner_default_parameters(log, &EventLogClassifier::default());
+
+ let mut expected_sub = Node::new_operator(Loop);
+ expected_sub.add_child(Node::new_leaf(Some(String::from("a"))));
+ expected_sub.add_child(Node::new_leaf(None));
+
+ let mut expected = Node::new_operator(ExclusiveChoice);
+ expected.add_child(Node::new_leaf(None));
+ expected.add_child(expected_sub);
+
+
+ assert!(node.is_valid());
+ assert_eq!(node.root, expected);
+ }
+
+ #[test]
+ fn test_empty_trace_plus_base_case(){
+ let log = event_log!(["a"],[]);
+ let node = inductive_miner_default_parameters(log, &EventLogClassifier::default());
+
+ let mut expected = Node::new_operator(ExclusiveChoice);
+ expected.add_child(Node::new_leaf(None));
+ expected.add_child(Node::new_leaf(Some(String::from("a"))));
+
+ assert!(node.is_valid());
+ assert_eq!(node.root, expected);
+ }
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs
new file mode 100644
index 0000000..4e94cdb
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/concurrency.rs
@@ -0,0 +1,127 @@
+//! Utility for splitting an event log according to a concurrency split.
+use std::borrow::Cow;
+use std::collections::HashSet;
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::OperatorType::Concurrency;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::splits::split::Split;
+use crate::EventLog;
+
+/// Splits an event log according to the partitions of an AND-Cut (concurrency cut).
+///
+/// For each partition of the cut a new sub log is created, the traces belonging to these sublogs are retained events of the original trace,
+/// those are filtered s.t. only events whose activity belongs to the partition are retained.
+///
+/// The result is a vector of sub-logs, one per partition, that together form
+/// the split required for recursive process tree discovery.
+///
+/// # Returns
+/// Some(split) if the cut struct is a valid and cut
+/// None if the cut is not a valid and cut
+///
+///
+/// # Notes
+/// - event order within traces is preserved
+/// - empty traces may occur if a trace contains no events from a partition
+pub fn and_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, cut: Cut<'a>) -> Option {
+
+ // only perform split if the cut is of the type concurrent
+ if cut.get_operator() != Concurrency{
+ return None;
+ }
+
+ // result vector containing sub logs
+ let mut result: Vec = Vec::new();
+ // the found partitions of the cut
+ let partitions: Vec>> = cut.get_own();
+
+ for partition in partitions.into_iter(){
+ let mut new_log = log.clone_without_traces();
+
+ for trace in & log.traces{
+ let mut new_trace = trace.clone_without_events();
+
+ for event in trace.events.iter(){
+ let activity = activity_classifier.get_class_identity(event);
+ if partition.contains(activity.as_str()){
+ new_trace.events.push(event.clone());
+ }
+ }
+ new_log.traces.push(new_trace);
+ }
+
+ result.push(new_log);
+ }
+ Some(Split::new(Concurrency, result))
+}
+
+
+#[cfg(test)]
+mod test_and_split{
+ use crate::core::chrono::Utc;
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::concurrent::concurrent_cut_wrapper;
+ use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split;
+ use crate::event_log;
+
+ #[test]
+ fn test_simple_and_cut_and_split(){
+ let time = Utc::now(); // need same timestamp attributes
+ let test_log = event_log!(
+ ["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}, "C"; {"time:timestamp" => time.clone()}],
+ ["A"; {"time:timestamp" => time.clone()}, "C"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}],
+ ["C"; {"time:timestamp" => time.clone()}, "A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}],
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&test_log);
+ let cut = concurrent_cut_wrapper(&dfg, None);
+ assert!(cut.is_some());
+ println!("{:?}", cut);
+ let split = and_split(&test_log,&EventLogClassifier::default(), cut.unwrap());
+ assert!(split.is_some());
+ let split = split.unwrap().get_own();
+ println!("{}", split.len());
+
+ let log1 = event_log!(["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], ["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}], ["A"; {"time:timestamp" => time.clone()}, "B"; {"time:timestamp" => time.clone()}]);
+ let log2 = event_log!(["C"; {"time:timestamp" => time.clone()}], ["C"; {"time:timestamp" => time.clone()}], ["C"; {"time:timestamp" => time.clone()}]);
+
+ let mut b1 = false;
+ let mut b2 = false;
+
+
+ for log in split{
+ if log == log1 && !b1{
+ b1 = true;
+ } else if log == log2 && !b2{
+ b2 = true;
+ } else {
+ assert!(false);
+ }
+ }
+ }
+
+ #[test]
+ fn test(){
+ let test_log = event_log!([], ["A", "B"], ["B", "A"]);
+ let dfg = DirectlyFollowsGraph::discover(&test_log);
+ let cut = concurrent_cut_wrapper(&dfg, None);
+ assert!(cut.is_some());
+ let split = and_split(&test_log,&EventLogClassifier::default(), cut.unwrap());
+ assert!(split.is_some());
+ let split = split.unwrap().get_own();
+
+
+ for log in split.into_iter().enumerate(){
+ println!("Log: {}", log.0);
+ for t in log.1.traces.into_iter().enumerate(){
+ println!("trace{}", t.0);
+ for e in t.1.events.into_iter().enumerate(){
+ println!(" {}", e.0);
+ }
+
+ }
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs
new file mode 100644
index 0000000..bcc823c
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/exclusive_choice.rs
@@ -0,0 +1,110 @@
+//! Utility for splitting an event log according to an exclusive choice cut.
+
+use std::collections::HashMap;
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::splits::split::Split;
+use crate::EventLog;
+
+/// This functions splits an event log according to a provided valid xor cut.
+pub fn xor_split<'a>(log: &EventLog, activity_classifier: &EventLogClassifier, cut: Cut<'a>) -> Option {
+ if cut.get_operator() != ExclusiveChoice || cut.is_empty() {
+ return None;
+ }
+
+ let k = cut.len();
+ let partition = cut.get_own();
+ let mut result: Vec = vec![log.clone_without_traces(); k];
+
+ todo!();
+
+ Some(Split::new(ExclusiveChoice, result))
+}
+
+#[cfg(test)]
+mod tests_xor_split{
+ use std::collections::HashSet;
+ use crate::core::chrono::Utc;
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::core::process_models::process_tree::OperatorType::ExclusiveChoice;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::exclusive_choice::exclusive_choice_cut_wrapper;
+ use crate::discovery::case_centric::inductive_miner_app::splits::exclusive_choice::xor_split;
+ use crate::event_log;
+
+ #[test]
+ fn test_basic(){
+ let log = event_log!(
+ ["A", "A", "B", "e"],
+ ["C", "D"]
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = exclusive_choice_cut_wrapper(&dfg);
+
+ assert!(cut.is_some());
+ let cut = cut.unwrap();
+
+ let x = xor_split(&log, &EventLogClassifier::default(), cut);
+ assert!(x.is_some());
+ let x = x.unwrap();
+ assert_eq!(x.sub_logs.len(), 2);
+ }
+
+ #[test]
+ fn test_only_empty_traces_and_cut(){
+ let log = event_log!(
+ [],
+ []
+ );
+
+ let mut cut = Vec::new();
+ cut.push(HashSet::new());
+ cut.push(HashSet::new());
+ cut.push(HashSet::new());
+ let cut = Cut::new(ExclusiveChoice, cut);
+ let x = xor_split(&log, &EventLogClassifier::default(), cut);
+ assert!(x.is_some());
+ let x = x.unwrap().get_own();
+ assert_eq!(x.len(), 3); // exactly 3 sublogs
+ for log in x{
+ // each sublog has exactly 2 empty logs
+ assert_eq!(log.traces.len(), 2);
+ for trace in log.traces{
+ assert!(trace.events.is_empty())
+ }
+ }
+ }
+
+ #[test]
+ fn test_leeman_example(){
+ let time = Utc::now();
+ let log = event_log!(
+ ["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}],
+ ["C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}]
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = exclusive_choice_cut_wrapper(&dfg);
+ assert!(cut.is_some());
+ let cut = cut.unwrap();
+ let x = xor_split(&log, &EventLogClassifier::default(), cut);
+ assert!(x.is_some());
+ let x = x.unwrap().get_own();
+ assert_eq!(x.len(), 2);
+ for log in x{
+ if log.traces.len() == 1{
+ if log.traces[0].events.len() == 2{
+ assert_eq!(log, event_log!(["A";{"time:timestamp" => time.clone()}, "B";{"time:timestamp" => time.clone()}] {"concept:name" => 0},));
+ } else {
+ assert_eq!(log, event_log!(["C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}, "C";{"time:timestamp" => time.clone()}]{"concept:name" => 1}));
+ }
+ } else {
+ // if there is not exactly one trace per log, sth is really wrong
+ assert!(false);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs
new file mode 100644
index 0000000..867ef8e
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/mod.rs
@@ -0,0 +1,83 @@
+//! This module contains utilities for splitting an event log according to either exclusive choice,
+//! sequence, loop or concurrency cut.
+
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::OperatorType;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::splits::concurrency::and_split;
+use crate::discovery::case_centric::inductive_miner_app::splits::exclusive_choice::xor_split;
+use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split;
+use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split;
+use crate::discovery::case_centric::inductive_miner_app::splits::split::Split;
+use crate::EventLog;
+
+mod concurrency;
+mod sequence;
+mod exclusive_choice;
+mod redo_loop;
+pub mod split;
+
+
+/// A wrapper for the actual split function.
+///
+/// This function simply forwards its arguments to [`splitting`].
+///
+/// # Panic
+/// This function panics if the provided cut somehow could not be handled by the splitting algorithm,
+/// this should only be the case iff the operator of the cut finds no split operator.
+pub fn perform_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Split{
+ if let Some(split) = splitting(log, classifier, cut) {
+ split
+ } else {
+ panic!("No split function found for the cut operator.")
+ }
+
+}
+
+
+/// Core Split function matching the cut operator to the matching split function.
+///
+/// [`xor_split`]
+///
+/// [`sequence_split`]
+///
+/// [`and_split`]
+///
+/// [`loop_split`]
+fn splitting<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Option{
+ // match the operator and perform the matching split
+ match cut.get_operator() {
+ OperatorType::ExclusiveChoice => {
+ xor_split(log, classifier, cut)
+ }
+ OperatorType::Sequence => {
+ sequence_split(log, classifier, cut)
+ }
+ OperatorType::Concurrency => {
+ and_split(log, classifier, cut)
+ }
+ OperatorType::Loop => {
+ loop_split(log, classifier, cut)
+ }
+ }
+}
+
+#[cfg(test)]
+mod test_splits{
+ use std::collections::HashSet;
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper;
+ use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split;
+ use crate::event_log;
+
+ #[test]
+ fn test_sequence_split() {
+ let log = event_log!(["a", "b", "c", "d"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(cut.is_some());
+ let split = sequence_split(&log, &EventLogClassifier::default(), cut.unwrap());
+ assert!(split.is_some());
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs
new file mode 100644
index 0000000..40928bc
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/redo_loop.rs
@@ -0,0 +1,135 @@
+//! Utility for splitting a log according to a loop cut
+
+use std::collections::HashMap;
+use crate::EventLog;
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::OperatorType::Loop;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::splits::split::Split;
+
+/// Splits an event log according to the partition of a Loop-cut.
+pub fn loop_split<'a>(log: &EventLog, classifier: &EventLogClassifier, cut: Cut<'a>) -> Option {
+ if Loop != cut.get_operator() {
+ return None;
+ }
+
+ let k = cut.len();
+ let mut result: Vec = vec![log.clone_without_traces(); k];
+ let partitions = cut.get_own();
+
+ todo!();
+
+ Some(Split::new(Loop, result))
+}
+
+#[cfg(test)]
+mod test_loop_split {
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::loop_cut::redo_loop_cut_wrapper;
+ use crate::discovery::case_centric::inductive_miner_app::splits::redo_loop::loop_split;
+ use crate::event_log;
+ use crate::EventLog;
+
+ fn events_equal(log: &EventLog, o_log: &EventLog, event_log_classifier: EventLogClassifier) -> bool {
+ if log.traces.len() == o_log.traces.len() {
+ for (t0, t1) in log.traces.iter().zip(o_log.traces.iter()) {
+ if t0.events.len() == t1.events.len() {
+ for (e0,e1) in t0.events.iter().zip(t1.events.iter()) {
+ let a0 = event_log_classifier.get_class_identity(e0);
+ let a1 = event_log_classifier.get_class_identity(e1);
+ if a0 != a1 {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+ false
+ }
+ #[test]
+ fn test_loop_split_leemans_example() {
+ let log = event_log!(
+ ["a", "b"],
+ ["a", "b", "c", "a", "b"],
+ ["a", "b", "c", "a", "b", "c", "a", "b"]
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = redo_loop_cut_wrapper(&dfg);
+ assert!(cut.is_some());
+ let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap());
+ assert!(split.is_some());
+ let split = split.unwrap();
+ assert_eq!(split.sub_logs.len(), 2);
+
+ // created expected event logs
+ let do_log = event_log!(
+ ["a", "b"],
+ ["a", "b"],
+ ["a", "b"],
+ ["a", "b"],
+ ["a", "b"],
+ ["a", "b"]
+ );
+
+ let redo_log = event_log!(["c"], ["c"], ["c"]);
+
+ for log in split.get_own() {
+ if log.traces.len() == 6 {
+ // expected length of 6
+ assert!(events_equal(&log, &do_log, EventLogClassifier::default()));
+ } else if log.traces.len() == 3 {
+ // expected length of 3
+ assert!(events_equal(&log, &redo_log, EventLogClassifier::default()));
+ } else {
+ assert!(false);
+ }
+ }
+ }
+
+ #[test]
+ fn test_more_complex_loop() {
+ let log = event_log!(
+ ["a", "b"],
+ ["a", "b", "c", "a", "b"],
+ ["a", "d", "b"],
+ ["a", "d", "b", "c", "a", "d", "b"],
+ ["a", "d", "b", "c", "a", "b"]
+ );
+
+ let do_log = event_log!(
+ ["a", "b"],
+ ["a", "b"],
+ ["a", "b"],
+ ["a", "d", "b"],
+ ["a", "d", "b"],
+ ["a", "d", "b"],
+ ["a", "d", "b"],
+ ["a", "b"]
+ );
+
+ let redo_log = event_log!(["c"], ["c"], ["c"]);
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = redo_loop_cut_wrapper(&dfg);
+ assert!(cut.is_some());
+ let split = loop_split(&log, &EventLogClassifier::default(), cut.unwrap());
+ assert!(split.is_some());
+ let split = split.unwrap();
+ assert_eq!(split.sub_logs.len(), 2);
+
+ for log in split.get_own() {
+ if log.traces.len() == do_log.traces.len() {
+ // expected length of 6
+ assert!(events_equal(&log, &do_log, EventLogClassifier::default()));
+ } else if log.traces.len() == redo_log.traces.len() {
+ // expected length of 3
+ assert!(events_equal(&log, &redo_log, EventLogClassifier::default()));
+ } else {
+ assert!(false);
+ }
+ }
+ }
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs
new file mode 100644
index 0000000..184cea1
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/sequence.rs
@@ -0,0 +1,140 @@
+//! Utility for resolving sequence cuts into sequence splits.
+//!
+//!
+//! # Reference:
+//! - Leemans, S.J.J., Fahland, D., van der Aalst, W.M.P.:
+//! "Discovering Block-Structured Process Models from Event Logs – A Constructive Approach."
+//! Application of Concurrency to System Design (ACSD), 2013.
+//! - Leemans S.J.J., "Robust process mining with guarantees", Ph.D. Thesis, Eindhoven
+//! University of Technology, 09.05.2017
+
+use std::borrow::Cow;
+use crate::core::event_data::case_centric::EventLogClassifier;
+use crate::core::process_models::process_tree::OperatorType::Sequence;
+use crate::discovery::case_centric::inductive_miner_app::cut_finder::cut::Cut;
+use crate::discovery::case_centric::inductive_miner_app::splits::split::Split;
+use crate::EventLog;
+
+/// Splits an event log according to the partitions of a sequence cut.
+///
+/// # Returns
+/// - Some(Split) containing as many logs as the number of partitions in the split.
+/// - None if the cut was not a sequence cut nor valid
+pub fn sequence_split<'a>(
+ log: &EventLog,
+ activity_classifier: &EventLogClassifier,
+ cut: Cut<'a>,
+) -> Option {
+ if cut.get_operator() != Sequence {
+ return None;
+ }
+
+ let k = cut.len();
+ let partitions = cut.get_own();
+
+ // Create k empty sublogs
+ let mut result: Vec = vec![log.clone_without_traces(); k];
+
+ todo!();
+
+ Some(Split::new(Sequence, result))
+}
+
+#[cfg(test)]
+mod test_sequence_split {
+ use std::collections::HashSet;
+ use crate::core::chrono::Utc;
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::core::process_models::dfg::DirectlyFollowsGraph;
+ use crate::discovery::case_centric::inductive_miner_app::cut_finder::sequence_cut::sequence_cut_wrapper;
+ use crate::discovery::case_centric::inductive_miner_app::splits::sequence::sequence_split;
+ use crate::event_log;
+
+ #[test]
+ fn test_sequence_split() {
+ let time = Utc::now();
+ let log = event_log!(
+ ["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}],
+ ["b"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}]
+ );
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(cut.is_some());
+ let cut = cut.unwrap();
+
+ let split = sequence_split(&log, &EventLogClassifier::default(), cut);
+ assert!(split.is_some());
+
+ let split = split.unwrap().get_own();
+ // to the actual split
+ let log1 = event_log!(["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}], ["b"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}]);
+ let log2 = event_log!(["c"; {"time:timestamp" => time.clone()}], ["c"; {"time:timestamp" => time.clone()}]);
+
+ let mut b1 = false;
+ let mut b2 = false;
+ for log in split {
+ // make certain every log is only compared one time, as we don't know the order
+ if log == log1 && !b1 {
+ b1 = true;
+ } else if log == log2 && !b2 {
+ b2 = true;
+ } else {
+ // no matching log or multiple matchings -> immediately false
+ assert!(false);
+ }
+ }
+ }
+
+ #[test]
+ fn test_sequence_split2() {
+ // this log contains a sequence cut, as b or c never reach an "a"
+ let time = Utc::now();
+ let log = event_log!(
+ ["a"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}],
+ ["a"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}]
+ );
+ // we cut this log and sepreate the "a"s from "b's" and "c's"
+ // after definition the resulting sublogs contain only those elements which are also in the partition
+ // create expected logs
+ let log0 = event_log!(["b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}, "b"; {"time:timestamp" => time.clone()}, "c"; {"time:timestamp" => time.clone()}], ["c"; {"time:timestamp" => time.clone()}]);
+ let log1 = event_log!(["a"; {"time:timestamp" => time.clone()}], ["a"; {"time:timestamp" => time.clone()}, "a"; {"time:timestamp" => time.clone()}]);
+
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(cut.is_some());
+ let cut = cut.unwrap();
+ let split = sequence_split(&log, &EventLogClassifier::default(), cut);
+ assert!(split.is_some());
+ let split = split.unwrap().get_own();
+
+ assert_eq!(split.len(), 2);
+
+ // check that both resulting logs match the expected sequence of activities
+ let mut b0 = false;
+ let mut b1 = false;
+ for log in split {
+ if log == log0 && !b0 {
+ b0 = true;
+ } else if log == log1 && !b1 {
+ b1 = true;
+ }
+ }
+ assert!(b1);
+ assert!(b0);
+ }
+
+
+ #[test]
+ fn test_sequence_split3() {
+ let log = event_log!(["a", "b", "c", "d"]);
+ let dfg = DirectlyFollowsGraph::discover(&log);
+ let cut = sequence_cut_wrapper(&dfg, &HashSet::new());
+ assert!(cut.is_some());
+ let split = sequence_split(&log, &EventLogClassifier::default(), cut.unwrap());
+ assert!(split.is_some());
+
+ }
+
+
+}
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs
new file mode 100644
index 0000000..4921425
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/splits/split.rs
@@ -0,0 +1,29 @@
+//! This module contains the basic split used for representing found splits in the Inductive Miner Algorithm.
+use crate::core::process_models::process_tree::OperatorType;
+use crate::EventLog;
+
+
+/// Helper struct to aggregate the returns of splitting algorithms.
+///
+/// # Parameters
+/// - 'operator' : ['ImOperator'] defining the split type
+/// - 'sub_logs': a vector containing all new logs
+pub struct Split{
+ pub operator: OperatorType,
+ pub sub_logs: Vec,
+}
+
+impl Split{
+ pub fn new(operator: OperatorType, sub_logs: Vec) -> Split{
+ Self{operator, sub_logs}
+ }
+
+ pub fn get_own(self) -> Vec{
+ self.sub_logs
+ }
+
+ pub fn get_operator(&self) -> OperatorType {
+ self.operator
+ }
+
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs
new file mode 100644
index 0000000..8521cb8
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/minimum_self_distance.rs
@@ -0,0 +1,285 @@
+//! Utilities for determining the minimum self distance of a given activity in a given trace
+//! or activity sequence.
+
+use crate::EventLog;
+use std::collections::{HashMap, HashSet};
+use crate::core::event_data::case_centric::{EventLogClassifier, Trace};
+use crate::core::process_models::dfg::Activity;
+
+type Index = usize;
+type MinDist = usize;
+type InterveningSet = HashSet;
+
+/// Stores for every activity, its minimum self-distance and the set of activities occurring
+/// between two minium-distance instances of that activity.
+pub struct MinimumSelfDistance {
+ minimum_distance_relation: HashMap)>,
+}
+
+impl MinimumSelfDistance {
+
+ /// Constructs the new minimum self-distance relation from a given log and classifier.
+ pub fn new(
+ log: &EventLog, event_log_classifier: &EventLogClassifier) -> MinimumSelfDistance {
+ Self{minimum_distance_relation: Self::minimum_distances_interleave(log, event_log_classifier)}
+ }
+
+
+ /// Returns the minimum self-distance for a given activity and the set of activities occurring
+ /// between two minimum-distance instances of that activity.
+ pub fn get_minimum_distance(&self, activity: &str) -> Option<&(MinDist, HashSet)> {
+ self.minimum_distance_relation.get(activity)
+ }
+
+ /// Computes minimum self-distances for all activities within a single trace.
+ ///
+ /// For each activity, the minimum number of events between two consecutive
+ /// executions is determined, together with the set of intervening activities
+ /// observed at that minimum distance.
+
+ fn extract_interleaving_activities(
+ start: Index,
+ end: Index,
+ trace: &Trace,
+ event_log_classifier: &EventLogClassifier,
+ ) -> HashSet {
+ let mut interleaving_activities = HashSet::new();
+ for i in start + 1..end {
+ if let Some(event) = trace.events.get(i) {
+ interleaving_activities.insert(event_log_classifier.get_class_identity(event));
+ }
+ }
+
+ interleaving_activities
+ }
+
+ /// Two activities 'a' and 'b' are in a minimum distance relation iff 'b' appears between two
+ /// minimum distance executions of a.
+ /// This function evaluates the minimum distance between two executions of an activity and
+ /// count the appearing activities.
+ ///
+ ///
+ /// This function calculates the minimum distance relation of every activity.
+ fn minimum_distances_trace(
+ trace: &Trace,
+ event_log_classifier: &EventLogClassifier,
+ ) -> HashMap {
+ let mut last_seen: HashMap = HashMap::new();
+ let mut results: HashMap = HashMap::new();
+ for (index, event) in trace.events.iter().enumerate() {
+ let activity = event_log_classifier.get_class_identity(event);
+ if let Some(last_index) = last_seen.get(&activity) {
+ // calculate distance between the two indexes
+ let dist = index - *last_index - 1;
+ if let Some((prev_dist, acts)) = results.get_mut(&activity) {
+ if *prev_dist > dist {
+ // previous distance is smaller than the current, so it can't be minimum
+ *prev_dist = dist;
+ *acts = Self::extract_interleaving_activities(
+ *last_index,
+ index,
+ trace,
+ event_log_classifier,
+ );
+ } else if *prev_dist == dist {
+ acts.extend(Self::extract_interleaving_activities(
+ *last_index,
+ index,
+ trace,
+ event_log_classifier,
+ ));
+ }
+ // skip, the distance is greater than the one we got previously
+ } else {
+ // the first time we found a loop
+ results.insert(
+ activity.clone(), // clone as we need to update activity later
+ (
+ dist,
+ Self::extract_interleaving_activities(
+ *last_index,
+ index,
+ trace,
+ event_log_classifier,
+ ),
+ ),
+ );
+ }
+
+
+ }
+ // update the last seen index of this activity
+ last_seen.insert(activity, index);
+
+ }
+ results
+ }
+
+ /// Aggregates minimum self-distance information over all traces in the log.
+ ///
+ /// For each activity, the globally smallest self-distance is retained and
+ /// the intervening activity sets for equal minimum distances are merged.
+ fn minimum_distances_interleave(log: &EventLog, event_log_classifier: &EventLogClassifier) -> HashMap {
+ let mut results: HashMap = HashMap::new();
+
+ // Go through every trace
+ for trace in log.traces.iter(){
+ for (activity, (dist, interleaving_acts)) in Self::minimum_distances_trace(trace, event_log_classifier) {
+ if let Some(( min_dist, interleaving_set)) = results.get_mut(&activity) {
+ if *min_dist > dist{
+ *min_dist = dist;
+ *interleaving_set = interleaving_acts;
+ } else if *min_dist == dist {
+ interleaving_set.extend(interleaving_acts);
+ } else {
+ // skip if the new distance is greater tan the already saved distance
+ }
+ } else {
+ results.insert(activity, (dist, interleaving_acts));
+ }
+ }
+ }
+ results
+ }
+}
+
+
+#[cfg(test)]
+mod test_min_dist{
+ use std::collections::HashSet;
+ use crate::core::event_data::case_centric::EventLogClassifier;
+ use crate::discovery::case_centric::inductive_miner_app::structures::minimum_self_distance::MinimumSelfDistance;
+ use crate::{event_log, trace};
+
+ #[test]
+ fn test_extract_interleaving_activities() {
+ let t = trace!("a", "b", "c", "d", "e", "f");
+ let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default());
+ assert_eq!(s, HashSet::from(["b".into(), "c".into(), "d".into(), "e".into(), "f".into()]));
+ }
+ #[test]
+ fn test_extract_from_empty_trace() {
+ let t = trace!();
+ let s = MinimumSelfDistance::extract_interleaving_activities(0, 6, &t, &EventLogClassifier::default());
+ assert!(s.is_empty());}
+
+ // ------------ Tests using binary events
+ #[test]
+ fn test_one_loop_distance() {
+ let t = trace!("a", "b", "a");
+
+ let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default());
+ assert!(r.contains_key("a"));
+ assert_eq!(r.get("a").unwrap().0, 1);
+ assert!(r.get("a").unwrap().1.contains("b"));
+ }
+
+ #[test]
+ fn test_loop_zero_distance(){
+ let t = trace!("a","a");
+
+ let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default());
+ assert!(r.contains_key("a"));
+ assert_eq!(r.get("a").unwrap().0, 0);
+ assert!(r.get("a").unwrap().1.is_empty());
+ }
+
+ #[test]
+ fn test_retrieve_smaller_later_loop(){
+ let t = trace!("a", "b", "b", "a", "b", "b", "b", "a", "b", "a");
+
+ let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default());
+ assert!(r.contains_key("a"));
+ assert_eq!(r.get("a").unwrap().0, 1);
+ assert!(r.get("a").unwrap().1.contains("b"));
+
+ // trivial, b should have 0 minimum self distance in this example
+ assert!(r.contains_key("b"));
+ assert_eq!(r.get("b").unwrap().0, 0);
+ assert!(r.get("b").unwrap().1.is_empty());
+ }
+
+
+ // -------------------------------- Test using more than two different activities
+
+ #[test]
+ fn test_complex_trace(){
+ let t = trace!("a", "b", "d", "e", "a", "d", "g", "g", "d","b", "f", "a", "c");
+ let r = MinimumSelfDistance::minimum_distances_trace(&t, &EventLogClassifier::default());
+
+ // check if loops are contained
+ assert!(r.contains_key("a"));
+ assert_eq!(r.get("a").unwrap().0, 3);
+ assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "d".into(), "e".into()]));
+
+
+ assert!(r.contains_key("b"));
+ assert_eq!(r.get("b").unwrap().0, 7);
+ assert_eq!(r.get("b").unwrap().1, HashSet::from(["a".into(), "e".into(), "d".into(), "g".into()]));
+
+ assert!(!r.contains_key("c"));
+
+ // special case, because there are two loops with same minimum distance two
+ assert!(r.contains_key("d"));
+ assert_eq!(r.get("d").unwrap().0, 2);
+ // merged activities
+ assert_eq!(r.get("d").unwrap().1, HashSet::from(["e".into(), "a".into(), "g".into()]));
+
+
+ // not appearing twice
+ assert!(!r.contains_key("e"));
+ assert!(!r.contains_key("f"));
+
+ // only one trace where g follows after g
+ assert!(r.contains_key("g"));
+ assert_eq!(r.get("g").unwrap().0, 0);
+ assert!(r.get("g").unwrap().1.is_empty());
+ }
+
+
+ #[test]
+ fn test_empty_log(){
+ let log = event_log!();
+ let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default());
+
+ assert!(r.is_empty());
+ }
+
+ #[test]
+ fn test_zero_loops_log(){
+ let log = event_log!(["a", "a"], ["b", "b"]);
+ let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default());
+
+ assert!(r.contains_key("a"));
+ assert_eq!(r.get("a").unwrap().0, 0);
+
+ assert!(r.contains_key("b"));
+ assert_eq!(r.get("b").unwrap().0, 0);
+ }
+
+ #[test]
+ fn test_find_smaller_loop(){
+ let log = event_log!(["a", "a"], ["a", "b", "a"]);
+ let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default());
+
+ assert!(r.contains_key("a"));
+ assert_eq!(r.get("a").unwrap().0, 0);
+
+ assert!(!r.contains_key("b"));
+ }
+
+ #[test]
+ fn test_merge_relations(){
+ let log = event_log!(["a", "c", "a"], ["a", "b", "a"]);
+ let r = MinimumSelfDistance::minimum_distances_interleave(&log, &EventLogClassifier::default());
+
+ assert!(r.contains_key("a"));
+ assert_eq!(r.get("a").unwrap().0, 1);
+ assert_eq!(r.get("a").unwrap().1, HashSet::from(["b".into(), "c".into()]));
+ }
+
+
+}
+
+
+
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs
new file mode 100644
index 0000000..7a8aa5d
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/mod.rs
@@ -0,0 +1,3 @@
+//! This module contains additional structures needed for the implementation of the Inductive Miner.
+pub mod parameter;
+pub mod minimum_self_distance;
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs
new file mode 100644
index 0000000..9746001
--- /dev/null
+++ b/process_mining/src/discovery/case_centric/inductive_miner_app/structures/parameter.rs
@@ -0,0 +1,33 @@
+//! Parameter settings for controlling the inductive miner implementation
+use std::collections::HashSet;
+
+/// A helper type aggregating parameters which user maybe want the inductive miner to adhere.
+/// The Hashset is used, so that every parameter is unique
+pub type Parameters = HashSet;
+
+
+
+/// Helper enum to express which option shall be activated in the inductive miner
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum Parameter{
+ MinimumSelfDistance, // consider minimum self distance while looking for concurrent cut
+ ApplyFallthrough, // apply fallthrough's (Flower Model will always be applied
+ //-------Ideas for additional parameters:
+ // Multiprocessing
+ // StrictSequenceCut, // apply strict sequence cut additionally to the 'ordinary' sequence cut
+ // FoldTree, // automatically fold tree
+}
+
+
+
+impl Parameter{
+
+ /// Generate a Hashset containing all default parameters s.t.:
+ /// - Strict Sequence Cut is used
+ /// - Fallthrough's are being applied
+ /// - Minimum Self Distance is calculated and used during looking for a concurrent cut
+ /// - Resulting Tree is folded
+ pub fn generate_default_parameters() -> Parameters{
+ HashSet::from([Parameter::MinimumSelfDistance, Parameter::ApplyFallthrough])
+ }
+}
\ No newline at end of file
diff --git a/process_mining/src/discovery/case_centric/mod.rs b/process_mining/src/discovery/case_centric/mod.rs
index 8c44111..72193bd 100644
--- a/process_mining/src/discovery/case_centric/mod.rs
+++ b/process_mining/src/discovery/case_centric/mod.rs
@@ -2,4 +2,6 @@
pub mod alphappp;
+pub mod inductive_miner_app;
+
pub mod dfg;