|
| 1 | +version: 5 |
| 2 | + |
| 3 | +########## |
| 4 | +# Common # |
| 5 | +########## |
| 6 | +common: |
| 7 | + # Path to the event log in CSV format |
| 8 | + train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz |
| 9 | + # Specify the name for each of the columns in the CSV file (XES standard by default) |
| 10 | + log_ids: |
| 11 | + case: "case_id" |
| 12 | + activity: "activity" |
| 13 | + resource: "resource" |
| 14 | + enabled_time: "enabled_time" # If not present in the log, automatically estimated (see preprocessing) |
| 15 | + start_time: "start_time" # Should be present, but if not, can be estimated (see preprocessing) |
| 16 | + end_time: "end_time" |
| 17 | + # Use this process model and skip its discovery |
| 18 | + process_model_path: ../models/LoanApp_simplified.bpmn |
| 19 | + # Event log to evaluate the discovered BPS model with |
| 20 | + test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz |
| 21 | + # Flag to perform evaluation (if 'test_log_path' not provided) with a test partition of the input log |
| 22 | + perform_final_evaluation: true |
| 23 | + # Number of evaluations of the discovered BPS model |
| 24 | + num_final_evaluations: 10 |
| 25 | + # Metrics to evaluate the discovered BPS model (reported in an output file) |
| 26 | + evaluation_metrics: |
| 27 | + - 3_gram_distance |
| 28 | + - 2_gram_distance |
| 29 | + - absolute_event_distribution |
| 30 | + - relative_event_distribution |
| 31 | + - circadian_event_distribution |
| 32 | + - arrival_event_distribution |
| 33 | + - cycle_time_distribution |
| 34 | + # Whether to simulate the arrival times using the distribution of inter-arrival times observed in the training log, |
| 35 | + # or fitting a parameterized probabilistic distribution (e.g., norm, expon) with these observed values. |
| 36 | + use_observed_arrival_distribution: false |
| 37 | + # Whether to delete all files created during the optimization phases or not |
| 38 | + clean_intermediate_files: true |
| 39 | + # Whether to discover global/case/event attributes and their update rules or not |
| 40 | + discover_data_attributes: false |
| 41 | + |
| 42 | +################# |
| 43 | +# Preprocessing # |
| 44 | +################# |
| 45 | +preprocessing: |
| 46 | + # If the log has start times, threshold to consider two activities as concurrent when computing the enabled time |
| 47 | + # (if necessary). Two activities would be considered concurrent if their occurrences happening concurrently divided |
| 48 | + # by their total occurrences is higher than this threshold. |
| 49 | + enable_time_concurrency_threshold: 0.75 |
| 50 | + # If true, preprocess multitasking (i.e., one resource performing more than one activity at the same time) by |
| 51 | + # adjusting the timestamps (start/end) of those activities being executed at the same time by the same resource. |
| 52 | + multitasking: false |
| 53 | + # Thresholds for the heuristics' concurrency oracle (only used to estimate start times if missing). |
| 54 | + concurrency_df: 0.9 # Directly-Follows threshold |
| 55 | + concurrency_l2l: 0.9 # Length 2 loops threshold |
| 56 | + concurrency_l1l: 0.9 # Length 1 loops threshold |
| 57 | + |
| 58 | +################ |
| 59 | +# Control-flow # |
| 60 | +################ |
| 61 | +control_flow: |
| 62 | + # Metric to guide the optimization process (loss function to minimize) |
| 63 | + optimization_metric: n_gram_distance |
| 64 | + # Number of optimization iterations over the search space |
| 65 | + num_iterations: 20 |
| 66 | + # Number of times to evaluate each iteration (using the mean of all of them) |
| 67 | + num_evaluations_per_iteration: 3 |
| 68 | + # Methods for discovering gateway probabilities |
| 69 | + gateway_probabilities: |
| 70 | + - equiprobable |
| 71 | + - discovery |
| 72 | + # Discover process model with SplitMiner v1 (options: sm1 or sm2) |
| 73 | + mining_algorithm: sm1 |
| 74 | + # For Split Miner v1 and v2: Number of concurrent relations between events to be captured (between 0.0 and 1.0) |
| 75 | + epsilon: |
| 76 | + - 0.05 |
| 77 | + - 0.4 |
| 78 | + # Only for Split Miner v1: Threshold for filtering the incoming and outgoing edges (between 0.0 and 1.0) |
| 79 | + eta: |
| 80 | + - 0.2 |
| 81 | + - 0.7 |
| 82 | + # Only for Split Miner v1: Whether to replace non-trivial OR joins or not (true or false) |
| 83 | + replace_or_joins: |
| 84 | + - true |
| 85 | + - false |
| 86 | + # Only for Split Miner v1: Whether to prioritize parallelism over loops or not (true or false) |
| 87 | + prioritize_parallelism: |
| 88 | + - true |
| 89 | + - false |
| 90 | + # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes |
| 91 | + discover_branch_rules: true |
| 92 | + # Minimum f-score value to consider the discovered data-aware branching rules |
| 93 | + f_score: |
| 94 | + - 0.3 |
| 95 | + - 0.9 |
| 96 | + |
| 97 | +################## |
| 98 | +# Resource model # |
| 99 | +################## |
| 100 | +resource_model: |
| 101 | + # Metric to guide the optimization process (loss function to minimize) |
| 102 | + optimization_metric: circadian_emd |
| 103 | + # Number of optimization iterations over the search space |
| 104 | + num_iterations: 20 |
| 105 | + # Number of times to evaluate each iteration (using the mean of all of them) |
| 106 | + num_evaluations_per_iteration: 3 |
| 107 | + # Whether to discover prioritization or batching behavior |
| 108 | + discover_prioritization_rules: false |
| 109 | + discover_batching_rules: false |
| 110 | + # Resource profiles configuration |
| 111 | + resource_profiles: |
| 112 | + # Resource profile discovery type (fuzzy, differentiated, pool, undifferentiated) |
| 113 | + discovery_type: differentiated |
| 114 | + # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) |
| 115 | + granularity: |
| 116 | + - 15 |
| 117 | + - 60 |
| 118 | + # Minimum confidence of the intervals in the discovered calendar of a resource or set of resources (between 0.0 and 1.0) |
| 119 | + confidence: |
| 120 | + - 0.5 |
| 121 | + - 0.85 |
| 122 | + # Minimum support of the intervals in the discovered calendar of a resource or set of resources (between 0.0 and 1.0) |
| 123 | + support: |
| 124 | + - 0.05 |
| 125 | + - 0.5 |
| 126 | + # Participation of a resource in the process to discover a calendar for them, gathered together otherwise (between 0.0 and 1.0) |
| 127 | + participation: |
| 128 | + - 0.2 |
| 129 | + - 0.5 |
| 130 | + # Angle of the fuzzy trapezoid when computing the availability probability for an activity (angle from start to end) |
| 131 | + fuzzy_angle: |
| 132 | + - 0.1 |
| 133 | + - 0.9 |
| 134 | + |
| 135 | +##################### |
| 136 | +# Extraneous delays # |
| 137 | +##################### |
| 138 | +extraneous_activity_delays: |
| 139 | + # Metric to guide the optimization process (loss function to minimize) |
| 140 | + optimization_metric: relative_emd |
| 141 | + # Method to compute the extraneous delay (naive or eclipse-aware) |
| 142 | + discovery_method: eclipse-aware |
| 143 | + # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) |
| 144 | + num_iterations: 1 |
| 145 | + # Number of times to evaluate each iteration (using the mean of all of them) |
| 146 | + num_evaluations_per_iteration: 3 |
0 commit comments