Skip to content

Commit cfddc95

Browse files
Merge pull request #163 from AutomatedProcessImprovement/documentation
Documentation + runtime fix
2 parents 8c2f7ac + e0a232b commit cfddc95

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2320
-74
lines changed

.readthedocs.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Read the Docs configuration file
2+
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3+
4+
# Required
5+
version: 2
6+
7+
# Set the OS, Python version, and other tools you might need
8+
build:
9+
os: ubuntu-24.04
10+
tools:
11+
python: "3.9"
12+
13+
# Build documentation in the "docs/" directory with Sphinx
14+
sphinx:
15+
configuration: docs/conf.py
16+
17+
# Optionally, but recommended,
18+
# declare the Python requirements required to build your documentation
19+
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20+
python:
21+
install:
22+
- requirements: docs/requirements.txt

docs/Makefile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Minimal makefile for Sphinx documentation
2+
#
3+
4+
# You can set these variables from the command line, and also
5+
# from the environment for the first two.
6+
SPHINXOPTS ?=
7+
SPHINXBUILD ?= sphinx-build
8+
SOURCEDIR = source
9+
BUILDDIR = build
10+
11+
# Put it first so that "make" without argument is like "make help".
12+
help:
13+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14+
15+
.PHONY: help Makefile
16+
17+
# Catch-all target: route all unknown targets to Sphinx using the new
18+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19+
%: Makefile
20+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

docs/make.bat

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
@ECHO OFF
2+
3+
pushd %~dp0
4+
5+
REM Command file for Sphinx documentation
6+
7+
if "%SPHINXBUILD%" == "" (
8+
set SPHINXBUILD=sphinx-build
9+
)
10+
set SOURCEDIR=source
11+
set BUILDDIR=build
12+
13+
%SPHINXBUILD% >NUL 2>NUL
14+
if errorlevel 9009 (
15+
echo.
16+
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17+
echo.installed, then set the SPHINXBUILD environment variable to point
18+
echo.to the full path of the 'sphinx-build' executable. Alternatively you
19+
echo.may add the Sphinx directory to PATH.
20+
echo.
21+
echo.If you don't have Sphinx installed, grab it from
22+
echo.https://www.sphinx-doc.org/
23+
exit /b 1
24+
)
25+
26+
if "%1" == "" goto help
27+
28+
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29+
goto end
30+
31+
:help
32+
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33+
34+
:end
35+
popd

docs/requirements.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
click==8.1.3
2+
hyperopt==0.2.7
3+
lxml==5.3.0
4+
matplotlib==3.6.0
5+
networkx==3.2.1
6+
numpy==1.24.23
7+
pandas==2.1.0
8+
pendulum==3.0.0
9+
pydantic==2.3.0
10+
python-dotenv==1.0.0
11+
python-multipart==0.0.12
12+
pytz==2024.2
13+
PyYAML==6.0
14+
requests==2.28.2
15+
scipy==1.13.0
16+
statistics==1.0.3.5
17+
tqdm==4.64.1
18+
xmltodict==0.13.0
19+
prosimos==2.0.6
20+
extraneous-activity-delays==2.1.21
21+
openxes-cli-py==0.1.15
22+
pix-framework==0.13.17
23+
log-distance-measures==2.0.0
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
version: 5
2+
3+
##########
4+
# Common #
5+
##########
6+
common:
7+
# Path to the event log in CSV format
8+
train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
9+
# Specify the name for each of the columns in the CSV file (XES standard by default)
10+
log_ids:
11+
case: "case_id"
12+
activity: "activity"
13+
resource: "resource"
14+
enabled_time: "enabled_time" # If not present in the log, automatically estimated (see preprocessing)
15+
start_time: "start_time" # Should be present, but if not, can be estimated (see preprocessing)
16+
end_time: "end_time"
17+
# Use this process model and skip its discovery
18+
process_model_path: ../models/LoanApp_simplified.bpmn
19+
# Event log to evaluate the discovered BPS model with
20+
test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz
21+
# Flag to perform evaluation (if 'test_log_path' not provided) with a test partition of the input log
22+
perform_final_evaluation: true
23+
# Number of evaluations of the discovered BPS model
24+
num_final_evaluations: 10
25+
# Metrics to evaluate the discovered BPS model (reported in an output file)
26+
evaluation_metrics:
27+
- 3_gram_distance
28+
- 2_gram_distance
29+
- absolute_event_distribution
30+
- relative_event_distribution
31+
- circadian_event_distribution
32+
- arrival_event_distribution
33+
- cycle_time_distribution
34+
# Whether to simulate the arrival times using the distribution of inter-arrival times observed in the training log,
35+
# or fitting a parameterized probabilistic distribution (e.g., norm, expon) with these observed values.
36+
use_observed_arrival_distribution: false
37+
# Whether to delete all files created during the optimization phases or not
38+
clean_intermediate_files: true
39+
# Whether to discover global/case/event attributes and their update rules or not
40+
discover_data_attributes: false
41+
42+
#################
43+
# Preprocessing #
44+
#################
45+
preprocessing:
46+
# If the log has start times, threshold to consider two activities as concurrent when computing the enabled time
47+
# (if necessary). Two activities would be considered concurrent if their occurrences happening concurrently divided
48+
# by their total occurrences is higher than this threshold.
49+
enable_time_concurrency_threshold: 0.75
50+
# If true, preprocess multitasking (i.e., one resource performing more than one activity at the same time) by
51+
# adjusting the timestamps (start/end) of those activities being executed at the same time by the same resource.
52+
multitasking: false
53+
# Thresholds for the heuristics' concurrency oracle (only used to estimate start times if missing).
54+
concurrency_df: 0.9 # Directly-Follows threshold
55+
concurrency_l2l: 0.9 # Length 2 loops threshold
56+
concurrency_l1l: 0.9 # Length 1 loops threshold
57+
58+
################
59+
# Control-flow #
60+
################
61+
control_flow:
62+
# Metric to guide the optimization process (loss function to minimize)
63+
optimization_metric: n_gram_distance
64+
# Number of optimization iterations over the search space
65+
num_iterations: 20
66+
# Number of times to evaluate each iteration (using the mean of all of them)
67+
num_evaluations_per_iteration: 3
68+
# Methods for discovering gateway probabilities
69+
gateway_probabilities:
70+
- equiprobable
71+
- discovery
72+
# Discover process model with SplitMiner v1 (options: sm1 or sm2)
73+
mining_algorithm: sm1
74+
# For Split Miner v1 and v2: Number of concurrent relations between events to be captured (between 0.0 and 1.0)
75+
epsilon:
76+
- 0.05
77+
- 0.4
78+
# Only for Split Miner v1: Threshold for filtering the incoming and outgoing edges (between 0.0 and 1.0)
79+
eta:
80+
- 0.2
81+
- 0.7
82+
# Only for Split Miner v1: Whether to replace non-trivial OR joins or not (true or false)
83+
replace_or_joins:
84+
- true
85+
- false
86+
# Only for Split Miner v1: Whether to prioritize parallelism over loops or not (true or false)
87+
prioritize_parallelism:
88+
- true
89+
- false
90+
# Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes
91+
discover_branch_rules: true
92+
# Minimum f-score value to consider the discovered data-aware branching rules
93+
f_score:
94+
- 0.3
95+
- 0.9
96+
97+
##################
98+
# Resource model #
99+
##################
100+
resource_model:
101+
# Metric to guide the optimization process (loss function to minimize)
102+
optimization_metric: circadian_emd
103+
# Number of optimization iterations over the search space
104+
num_iterations: 20
105+
# Number of times to evaluate each iteration (using the mean of all of them)
106+
num_evaluations_per_iteration: 3
107+
# Whether to discover prioritization or batching behavior
108+
discover_prioritization_rules: false
109+
discover_batching_rules: false
110+
# Resource profiles configuration
111+
resource_profiles:
112+
# Resource profile discovery type (fuzzy, differentiated, pool, undifferentiated)
113+
discovery_type: differentiated
114+
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
115+
granularity:
116+
- 15
117+
- 60
118+
# Minimum confidence of the intervals in the discovered calendar of a resource or set of resources (between 0.0 and 1.0)
119+
confidence:
120+
- 0.5
121+
- 0.85
122+
# Minimum support of the intervals in the discovered calendar of a resource or set of resources (between 0.0 and 1.0)
123+
support:
124+
- 0.05
125+
- 0.5
126+
# Participation of a resource in the process to discover a calendar for them, gathered together otherwise (between 0.0 and 1.0)
127+
participation:
128+
- 0.2
129+
- 0.5
130+
# Angle of the fuzzy trapezoid when computing the availability probability for an activity (angle from start to end)
131+
fuzzy_angle:
132+
- 0.1
133+
- 0.9
134+
135+
#####################
136+
# Extraneous delays #
137+
#####################
138+
extraneous_activity_delays:
139+
# Metric to guide the optimization process (loss function to minimize)
140+
optimization_metric: relative_emd
141+
# Method to compute the extraneous delay (naive or eclipse-aware)
142+
discovery_method: eclipse-aware
143+
# Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
144+
num_iterations: 1
145+
# Number of times to evaluate each iteration (using the mean of all of them)
146+
num_evaluations_per_iteration: 3
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#################################################################################################################
2+
# Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow #
3+
# discovery, iii) 20 iterations of resource model (differentiated) discovery, and iv) direct discovery of #
4+
# extraneous delays. #
5+
#################################################################################################################
6+
# - Increase the num_iterations to (potentially) improve the quality of that discovered model #
7+
# - Visit 'complete_configuration.yml' example for a description of all configurable parameters #
8+
#################################################################################################################
9+
version: 5
10+
##########
11+
# Common #
12+
##########
13+
common:
14+
# Path to the event log in CSV format
15+
train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
16+
# Specify the name for each of the columns in the CSV file (XES standard by default)
17+
log_ids:
18+
case: "case_id"
19+
activity: "activity"
20+
resource: "resource"
21+
enabled_time: "enabled_time" # If not present in the log, automatically computed
22+
start_time: "start_time"
23+
end_time: "end_time"
24+
# Whether to discover case attributes or not
25+
discover_data_attributes: false
26+
#################
27+
# Preprocessing #
28+
#################
29+
preprocessing:
30+
# Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
31+
enable_time_concurrency_threshold: 0.75
32+
################
33+
# Control-flow #
34+
################
35+
control_flow:
36+
# Metric to guide the optimization process (loss function to minimize)
37+
optimization_metric: two_gram_distance
38+
# Number of optimization iterations over the search space
39+
num_iterations: 20
40+
# Number of times to evaluate each iteration (using the mean of all of them)
41+
num_evaluations_per_iteration: 3
42+
# Method for discovering gateway probabilities
43+
gateway_probabilities: discovery
44+
# Discover process model with SplitMiner v3
45+
mining_algorithm: sm1
46+
# Number of concurrent relations between events to be captured
47+
epsilon:
48+
- 0.05
49+
- 0.4
50+
# Threshold for filtering the incoming and outgoing edges
51+
eta:
52+
- 0.2
53+
- 0.7
54+
# Whether to replace non-trivial OR joins or not
55+
replace_or_joins:
56+
- true
57+
- false
58+
# Whether to prioritize parallelism over loops or not
59+
prioritize_parallelism:
60+
- true
61+
- false
62+
##################
63+
# Resource model #
64+
##################
65+
resource_model:
66+
# Metric to guide the optimization process (loss function to minimize)
67+
optimization_metric: circadian_emd
68+
# Number of optimization iterations over the search space
69+
num_iterations: 20
70+
# Number of times to evaluate each iteration (using the mean of all of them)
71+
num_evaluations_per_iteration: 3
72+
# Whether to discover prioritization or batching behavior
73+
discover_prioritization_rules: false
74+
discover_batching_rules: false
75+
# Resource profiles configuration
76+
resource_profiles:
77+
# Resource profile discovery type
78+
discovery_type: differentiated
79+
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
80+
granularity: 60
81+
# Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
82+
confidence:
83+
- 0.5
84+
- 0.85
85+
# Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
86+
support:
87+
- 0.05
88+
- 0.5
89+
# Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
90+
participation: 0.4
91+
#####################
92+
# Extraneous delays #
93+
#####################
94+
extraneous_activity_delays:
95+
# Method to compute the extraneous delay
96+
discovery_method: eclipse-aware
97+
# Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
98+
num_iterations: 1

0 commit comments

Comments
 (0)