Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/simod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
run: poetry run pylint -j 0 --exit-zero src/simod > pylint.txt

- name: Upload PyLint output
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: pylint.txt
path: ./pylint.txt
Expand Down Expand Up @@ -145,7 +145,7 @@ jobs:
poetry run pip-licenses --with-system --with-urls --format=markdown --output-file=licenses.md

- name: Upload licenses.md
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: licenses.md
path: licenses.md
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "simod"
version = "5.0.1"
version = "5.0.2"
authors = [
"Ihar Suvorau <ihar.suvorau@gmail.com>",
"David Chapela <david.chapela@ut.ee>",
Expand Down
22 changes: 22 additions & 0 deletions src/simod/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from log_distance_measures.circadian_event_distribution import (
circadian_event_distribution_distance,
)
from log_distance_measures.circadian_workforce_distribution import circadian_workforce_distribution_distance
from log_distance_measures.config import AbsoluteTimestampType
from log_distance_measures.control_flow_log_distance import control_flow_log_distance
from log_distance_measures.cycle_time_distribution import (
Expand Down Expand Up @@ -47,6 +48,8 @@ def compute_metric(
result = get_n_grams_distribution_distance(original_log, original_log_ids, simulated_log, simulated_log_ids, 3)
elif metric is Metric.CIRCADIAN_EMD:
result = get_circadian_emd(original_log, original_log_ids, simulated_log, simulated_log_ids)
elif metric is Metric.CIRCADIAN_WORKFORCE_EMD:
result = get_circadian_workforce_emd(original_log, original_log_ids, simulated_log, simulated_log_ids)
elif metric is Metric.ARRIVAL_EMD:
result = get_arrival_emd(original_log, original_log_ids, simulated_log, simulated_log_ids)
elif metric is Metric.RELATIVE_EMD:
Expand Down Expand Up @@ -122,6 +125,25 @@ def get_circadian_emd(
return emd


def get_circadian_workforce_emd(
original_log: pd.DataFrame,
original_log_ids: EventLogIDs,
simulated_log: pd.DataFrame,
simulated_log_ids: EventLogIDs,
) -> float:
"""
Distance measure computing how different the histograms of the active resources of two event logs are, comparing the
average number of active resources recorded each weekday at each hour (e.g., Monday 10am).
"""
emd = circadian_workforce_distribution_distance(
original_log,
original_log_ids,
simulated_log,
simulated_log_ids,
)
return emd


def get_arrival_emd(
original_log: pd.DataFrame,
original_log_ids: EventLogIDs,
Expand Down
33 changes: 33 additions & 0 deletions src/simod/runtime_meter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import json
import timeit


class RuntimeMeter:

runtime_start: dict
runtime_stop: dict
runtimes: dict

TOTAL: str = "SIMOD_TOTAL_RUNTIME"
INITIAL_MODEL: str = "discover-initial-BPS-model"
CONTROL_FLOW_MODEL: str = "optimize-control-flow-model"
RESOURCE_MODEL: str = "optimize-resource-model"
DATA_ATTRIBUTES_MODEL: str = "discover-data-attributes"
EXTRANEOUS_DELAYS: str = "discover-extraneous-delays"
FINAL_MODEL: str = "discover-final-BPS-model"
EVALUATION: str = "evaluate-final-BPS-model"

def __init__(self):
self.runtime_start = dict()
self.runtime_stop = dict()
self.runtimes = dict()

def start(self, stage_name: str):
self.runtime_start[stage_name] = timeit.default_timer()

def stop(self, stage_name: str):
self.runtime_stop[stage_name] = timeit.default_timer()
self.runtimes[stage_name] = self.runtime_stop[stage_name] - self.runtime_start[stage_name]

def to_json(self) -> str:
return json.dumps(self.runtimes)
6 changes: 6 additions & 0 deletions src/simod/settings/common_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Metric(str, Enum):
TWO_GRAM_DISTANCE = "two_gram_distance"
THREE_GRAM_DISTANCE = "three_gram_distance"
CIRCADIAN_EMD = "circadian_event_distribution"
CIRCADIAN_WORKFORCE_EMD = "circadian_workforce_distribution"
ARRIVAL_EMD = "arrival_event_distribution"
RELATIVE_EMD = "relative_event_distribution"
ABSOLUTE_EMD = "absolute_event_distribution"
Expand All @@ -40,6 +41,8 @@ def _from_str(cls, value: str) -> "Metric":
return cls.THREE_GRAM_DISTANCE
elif value.lower() in ["circadian_event_distribution", "circadian_emd"]:
return cls.CIRCADIAN_EMD
elif value.lower() in ["circadian_workforce_distribution", "workforce_emd", "workforce_distribution"]:
return cls.CIRCADIAN_WORKFORCE_EMD
elif value.lower() in ["arrival_event_distribution", "arrival_emd"]:
return cls.ARRIVAL_EMD
elif value.lower() in ["relative_event_distribution", "relative_emd"]:
Expand All @@ -66,6 +69,8 @@ def __str__(self):
return "THREE_GRAM_DISTANCE"
elif self == Metric.CIRCADIAN_EMD:
return "CIRCADIAN_EVENT_DISTRIBUTION"
elif self == Metric.CIRCADIAN_WORKFORCE_EMD:
return "CIRCADIAN_WORKFORCE_DISTRIBUTION"
elif self == Metric.ARRIVAL_EMD:
return "ARRIVAL_EVENT_DISTRIBUTION"
elif self == Metric.RELATIVE_EMD:
Expand Down Expand Up @@ -140,6 +145,7 @@ def from_dict(config: dict, config_dir: Optional[Path] = None) -> "CommonSetting
Metric.TWO_GRAM_DISTANCE,
Metric.THREE_GRAM_DISTANCE,
Metric.CIRCADIAN_EMD,
Metric.CIRCADIAN_WORKFORCE_EMD,
Metric.ARRIVAL_EMD,
Metric.RELATIVE_EMD,
Metric.ABSOLUTE_EMD,
Expand Down
58 changes: 41 additions & 17 deletions src/simod/simod.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
from pix_framework.io.bpmn import get_activities_names_from_bpmn

from simod.batching.discovery import discover_batching_rules
from simod.data_attributes.discovery import discover_data_attributes
from simod.branch_rules.discovery import discover_branch_rules, map_branch_rules_to_flows
from simod.cli_formatter import print_section, print_subsection
from simod.control_flow.discovery import discover_process_model, add_bpmn_diagram_to_model
from simod.control_flow.optimizer import ControlFlowOptimizer
from simod.control_flow.settings import HyperoptIterationParams as ControlFlowHyperoptIterationParams
from simod.data_attributes.discovery import discover_data_attributes
from simod.event_log.event_log import EventLog
from simod.extraneous_delays.optimizer import ExtraneousDelaysOptimizer
from simod.extraneous_delays.types import ExtraneousDelay
Expand All @@ -28,11 +29,11 @@
from simod.resource_model.optimizer import ResourceModelOptimizer
from simod.resource_model.repair import repair_with_missing_activities
from simod.resource_model.settings import HyperoptIterationParams as ResourceModelHyperoptIterationParams
from simod.runtime_meter import RuntimeMeter
from simod.settings.simod_settings import SimodSettings
from simod.simulation.parameters.BPS_model import BPSModel
from simod.simulation.prosimos import simulate_and_evaluate
from simod.utilities import get_process_model_path, get_simulation_parameters_path
from simod.branch_rules.discovery import discover_branch_rules, map_branch_rules_to_flows


class Simod:
Expand Down Expand Up @@ -87,6 +88,10 @@ def run(self):
Optimizes the BPS model with the given event log and settings.
"""

# Runtime object
runtimes = RuntimeMeter()
runtimes.start(RuntimeMeter.TOTAL)

# Model activities might be different from event log activities if the model has been provided,
# because we split the event log into train, test, and validation partitions.
# We use model_activities to repair resource_model later after its discovery from a reduced event log.
Expand All @@ -96,6 +101,7 @@ def run(self):

# --- Discover Default Case Arrival and Resource Allocation models --- #
print_section("Discovering initial BPS Model")
runtimes.start(RuntimeMeter.INITIAL_MODEL)
self._best_bps_model.case_arrival_model = discover_case_arrival_model(
self._event_log.train_validation_partition, # No optimization process here, use train + validation
self._event_log.log_ids,
Expand All @@ -115,43 +121,53 @@ def run(self):
event_log=self._event_log.train_validation_partition,
log_ids=self._event_log.log_ids,
)
runtimes.stop(RuntimeMeter.INITIAL_MODEL)

# --- Control-Flow Optimization --- #
print_section("Optimizing control-flow parameters")
runtimes.start(RuntimeMeter.CONTROL_FLOW_MODEL)
best_control_flow_params = self._optimize_control_flow()
self._best_bps_model.process_model = self._control_flow_optimizer.best_bps_model.process_model
self._best_bps_model.gateway_probabilities = self._control_flow_optimizer.best_bps_model.gateway_probabilities
self._best_bps_model.branch_rules = self._control_flow_optimizer.best_bps_model.branch_rules
runtimes.stop(RuntimeMeter.CONTROL_FLOW_MODEL)

# --- Data Attributes --- #
if (self._settings.common.discover_data_attributes or
self._settings.resource_model.discover_prioritization_rules):
print_section("Discovering data attributes")
runtimes.start(RuntimeMeter.DATA_ATTRIBUTES_MODEL)
global_attributes, case_attributes, event_attributes = discover_data_attributes(
self._event_log.train_validation_partition,
self._event_log.log_ids,
)
self._best_bps_model.global_attributes = global_attributes
self._best_bps_model.case_attributes = case_attributes
self._best_bps_model.event_attributes = event_attributes
runtimes.stop(RuntimeMeter.DATA_ATTRIBUTES_MODEL)

# --- Resource Model Discovery --- #
print_section("Optimizing resource model parameters")
runtimes.start(RuntimeMeter.RESOURCE_MODEL)
best_resource_model_params = self._optimize_resource_model(model_activities)
self._best_bps_model.resource_model = self._resource_model_optimizer.best_bps_model.resource_model
self._best_bps_model.calendar_granularity = self._resource_model_optimizer.best_bps_model.calendar_granularity
self._best_bps_model.prioritization_rules = self._resource_model_optimizer.best_bps_model.prioritization_rules
self._best_bps_model.batching_rules = self._resource_model_optimizer.best_bps_model.batching_rules
runtimes.stop(RuntimeMeter.RESOURCE_MODEL)

# --- Extraneous Delays Discovery --- #
if self._settings.extraneous_activity_delays is not None:
print_section("Discovering extraneous delays")
runtimes.start(RuntimeMeter.EXTRANEOUS_DELAYS)
timers = self._optimize_extraneous_activity_delays()
self._best_bps_model.extraneous_delays = timers
add_timers_to_bpmn_model(self._best_bps_model.process_model, timers) # Update BPMN model on disk
runtimes.stop(RuntimeMeter.EXTRANEOUS_DELAYS)

# --- Discover final BPS model --- #
print_section("Discovering final BPS model")
runtimes.start(RuntimeMeter.FINAL_MODEL)
self.final_bps_model = BPSModel( # Bypass all models already discovered with train+validation
process_model=get_process_model_path(self._best_result_dir, self._event_log.process_name),
case_arrival_model=self._best_bps_model.case_arrival_model,
Expand Down Expand Up @@ -187,19 +203,17 @@ def run(self):
bpmn_graph=best_bpmn_graph,
discovery_method=best_control_flow_params.gateway_probabilities_method,
)

# Branch Rules
if self._settings.control_flow.discover_branch_rules:
print_section("Discovering branch conditions")
self.final_bps_model.branch_rules = discover_branch_rules(
best_bpmn_graph,
self._event_log.train_validation_partition,
self._event_log.log_ids,
f_score=best_control_flow_params.f_score
)
best_bpmn_graph,
self._event_log.train_validation_partition,
self._event_log.log_ids,
f_score=best_control_flow_params.f_score
)
self.final_bps_model.gateway_probabilities = \
map_branch_rules_to_flows(self.final_bps_model.gateway_probabilities, self.final_bps_model.branch_rules)

# Resource model
print_subsection("Discovering best resource model")
self.final_bps_model.resource_model = discover_resource_model(
Expand Down Expand Up @@ -235,6 +249,9 @@ def run(self):
self.final_bps_model.extraneous_delays = self._best_bps_model.extraneous_delays
add_timers_to_bpmn_model(self.final_bps_model.process_model, self._best_bps_model.extraneous_delays)
self.final_bps_model.replace_activity_names_with_ids()
runtimes.stop(RuntimeMeter.FINAL_MODEL)
runtimes.stop(RuntimeMeter.TOTAL)

# Write JSON parameters to file
json_parameters_path = get_simulation_parameters_path(self._best_result_dir, self._event_log.process_name)
with json_parameters_path.open("w") as f:
Expand All @@ -243,14 +260,18 @@ def run(self):
# --- Evaluate final BPS model --- #
if self._settings.common.perform_final_evaluation:
print_subsection("Evaluate")
runtimes.start(RuntimeMeter.EVALUATION)
simulation_dir = self._best_result_dir / "evaluation"
simulation_dir.mkdir(parents=True, exist_ok=True)
self._evaluate_model(self.final_bps_model.process_model, json_parameters_path, simulation_dir)
runtimes.stop(RuntimeMeter.EVALUATION)

# --- Export settings and clean temporal files --- #
print_section(f"Exporting canonical model, runtimes, settings and cleaning up intermediate files")
canonical_model_path = self._best_result_dir / "canonical_model.json"
print_section(f"Exporting canonical model to {canonical_model_path}")
_export_canonical_model(canonical_model_path, best_control_flow_params, best_resource_model_params)
runtimes_model_path = self._best_result_dir / "runtimes.json"
_export_runtimes(runtimes_model_path, runtimes)
if self._settings.common.clean_intermediate_files:
self._clean_up()
self._settings.to_yaml(self._best_result_dir)
Expand Down Expand Up @@ -342,14 +363,17 @@ def _export_canonical_model(
control_flow_settings: ControlFlowHyperoptIterationParams,
calendar_settings: ResourceModelHyperoptIterationParams,
):
structure = control_flow_settings.to_dict()

calendars = calendar_settings.to_dict()

canon = {
"control_flow": structure,
"calendars": calendars,
"control_flow": control_flow_settings.to_dict(),
"calendars": calendar_settings.to_dict(),
}

with open(file_path, "w") as f:
json.dump(canon, f)


def _export_runtimes(
file_path: Path,
runtimes: RuntimeMeter
):
with open(file_path, "w") as f:
json.dump(runtimes.runtimes, f)
Loading