From 172b5dabc773c5cbdb38534e833dddeab0e007ce Mon Sep 17 00:00:00 2001 From: David Chapela de la Campa Date: Thu, 23 Jan 2025 19:15:01 +0200 Subject: [PATCH 1/4] #157 - Add runtime(s) report to a file --- src/simod/runtime_meter.py | 33 ++++++++++++++++++++++ src/simod/simod.py | 58 +++++++++++++++++++++++++++----------- 2 files changed, 74 insertions(+), 17 deletions(-) create mode 100644 src/simod/runtime_meter.py diff --git a/src/simod/runtime_meter.py b/src/simod/runtime_meter.py new file mode 100644 index 00000000..dc108cd0 --- /dev/null +++ b/src/simod/runtime_meter.py @@ -0,0 +1,33 @@ +import json +import timeit + + +class RuntimeMeter: + + runtime_start: dict + runtime_stop: dict + runtimes: dict + + TOTAL: str = "SIMOD_TOTAL_RUNTIME" + INITIAL_MODEL: str = "discover-initial-BPS-model" + CONTROL_FLOW_MODEL: str = "optimize-control-flow-model" + RESOURCE_MODEL: str = "optimize-resource-model" + DATA_ATTRIBUTES_MODEL: str = "discover-data-attributes" + EXTRANEOUS_DELAYS: str = "discover-extraneous-delays" + FINAL_MODEL: str = "discover-final-BPS-model" + EVALUATION: str = "evaluate-final-BPS-model" + + def __init__(self): + self.runtime_start = dict() + self.runtime_stop = dict() + self.runtimes = dict() + + def start(self, stage_name: str): + self.runtime_start[stage_name] = timeit.default_timer() + + def stop(self, stage_name: str): + self.runtime_stop[stage_name] = timeit.default_timer() + self.runtimes[stage_name] = self.runtime_stop[stage_name] - self.runtime_start[stage_name] + + def to_json(self) -> str: + return json.dumps(self.runtimes) diff --git a/src/simod/simod.py b/src/simod/simod.py index 4d16c683..d2036d47 100644 --- a/src/simod/simod.py +++ b/src/simod/simod.py @@ -15,11 +15,12 @@ from pix_framework.io.bpmn import get_activities_names_from_bpmn from simod.batching.discovery import discover_batching_rules -from simod.data_attributes.discovery import discover_data_attributes +from simod.branch_rules.discovery import discover_branch_rules, map_branch_rules_to_flows from simod.cli_formatter import print_section, print_subsection from simod.control_flow.discovery import discover_process_model, add_bpmn_diagram_to_model from simod.control_flow.optimizer import ControlFlowOptimizer from simod.control_flow.settings import HyperoptIterationParams as ControlFlowHyperoptIterationParams +from simod.data_attributes.discovery import discover_data_attributes from simod.event_log.event_log import EventLog from simod.extraneous_delays.optimizer import ExtraneousDelaysOptimizer from simod.extraneous_delays.types import ExtraneousDelay @@ -28,11 +29,11 @@ from simod.resource_model.optimizer import ResourceModelOptimizer from simod.resource_model.repair import repair_with_missing_activities from simod.resource_model.settings import HyperoptIterationParams as ResourceModelHyperoptIterationParams +from simod.runtime_meter import RuntimeMeter from simod.settings.simod_settings import SimodSettings from simod.simulation.parameters.BPS_model import BPSModel from simod.simulation.prosimos import simulate_and_evaluate from simod.utilities import get_process_model_path, get_simulation_parameters_path -from simod.branch_rules.discovery import discover_branch_rules, map_branch_rules_to_flows class Simod: @@ -87,6 +88,10 @@ def run(self): Optimizes the BPS model with the given event log and settings. """ + # Runtime object + runtimes = RuntimeMeter() + runtimes.start(RuntimeMeter.TOTAL) + # Model activities might be different from event log activities if the model has been provided, # because we split the event log into train, test, and validation partitions. # We use model_activities to repair resource_model later after its discovery from a reduced event log. @@ -96,6 +101,7 @@ def run(self): # --- Discover Default Case Arrival and Resource Allocation models --- # print_section("Discovering initial BPS Model") + runtimes.start(RuntimeMeter.INITIAL_MODEL) self._best_bps_model.case_arrival_model = discover_case_arrival_model( self._event_log.train_validation_partition, # No optimization process here, use train + validation self._event_log.log_ids, @@ -115,18 +121,22 @@ def run(self): event_log=self._event_log.train_validation_partition, log_ids=self._event_log.log_ids, ) + runtimes.stop(RuntimeMeter.INITIAL_MODEL) # --- Control-Flow Optimization --- # print_section("Optimizing control-flow parameters") + runtimes.start(RuntimeMeter.CONTROL_FLOW_MODEL) best_control_flow_params = self._optimize_control_flow() self._best_bps_model.process_model = self._control_flow_optimizer.best_bps_model.process_model self._best_bps_model.gateway_probabilities = self._control_flow_optimizer.best_bps_model.gateway_probabilities self._best_bps_model.branch_rules = self._control_flow_optimizer.best_bps_model.branch_rules + runtimes.stop(RuntimeMeter.CONTROL_FLOW_MODEL) # --- Data Attributes --- # if (self._settings.common.discover_data_attributes or self._settings.resource_model.discover_prioritization_rules): print_section("Discovering data attributes") + runtimes.start(RuntimeMeter.DATA_ATTRIBUTES_MODEL) global_attributes, case_attributes, event_attributes = discover_data_attributes( self._event_log.train_validation_partition, self._event_log.log_ids, @@ -134,24 +144,30 @@ def run(self): self._best_bps_model.global_attributes = global_attributes self._best_bps_model.case_attributes = case_attributes self._best_bps_model.event_attributes = event_attributes + runtimes.stop(RuntimeMeter.DATA_ATTRIBUTES_MODEL) # --- Resource Model Discovery --- # print_section("Optimizing resource model parameters") + runtimes.start(RuntimeMeter.RESOURCE_MODEL) best_resource_model_params = self._optimize_resource_model(model_activities) self._best_bps_model.resource_model = self._resource_model_optimizer.best_bps_model.resource_model self._best_bps_model.calendar_granularity = self._resource_model_optimizer.best_bps_model.calendar_granularity self._best_bps_model.prioritization_rules = self._resource_model_optimizer.best_bps_model.prioritization_rules self._best_bps_model.batching_rules = self._resource_model_optimizer.best_bps_model.batching_rules + runtimes.stop(RuntimeMeter.RESOURCE_MODEL) # --- Extraneous Delays Discovery --- # if self._settings.extraneous_activity_delays is not None: print_section("Discovering extraneous delays") + runtimes.start(RuntimeMeter.EXTRANEOUS_DELAYS) timers = self._optimize_extraneous_activity_delays() self._best_bps_model.extraneous_delays = timers add_timers_to_bpmn_model(self._best_bps_model.process_model, timers) # Update BPMN model on disk + runtimes.stop(RuntimeMeter.EXTRANEOUS_DELAYS) # --- Discover final BPS model --- # print_section("Discovering final BPS model") + runtimes.start(RuntimeMeter.FINAL_MODEL) self.final_bps_model = BPSModel( # Bypass all models already discovered with train+validation process_model=get_process_model_path(self._best_result_dir, self._event_log.process_name), case_arrival_model=self._best_bps_model.case_arrival_model, @@ -187,19 +203,17 @@ def run(self): bpmn_graph=best_bpmn_graph, discovery_method=best_control_flow_params.gateway_probabilities_method, ) - # Branch Rules if self._settings.control_flow.discover_branch_rules: print_section("Discovering branch conditions") self.final_bps_model.branch_rules = discover_branch_rules( - best_bpmn_graph, - self._event_log.train_validation_partition, - self._event_log.log_ids, - f_score=best_control_flow_params.f_score - ) + best_bpmn_graph, + self._event_log.train_validation_partition, + self._event_log.log_ids, + f_score=best_control_flow_params.f_score + ) self.final_bps_model.gateway_probabilities = \ map_branch_rules_to_flows(self.final_bps_model.gateway_probabilities, self.final_bps_model.branch_rules) - # Resource model print_subsection("Discovering best resource model") self.final_bps_model.resource_model = discover_resource_model( @@ -235,6 +249,9 @@ def run(self): self.final_bps_model.extraneous_delays = self._best_bps_model.extraneous_delays add_timers_to_bpmn_model(self.final_bps_model.process_model, self._best_bps_model.extraneous_delays) self.final_bps_model.replace_activity_names_with_ids() + runtimes.stop(RuntimeMeter.FINAL_MODEL) + runtimes.stop(RuntimeMeter.TOTAL) + # Write JSON parameters to file json_parameters_path = get_simulation_parameters_path(self._best_result_dir, self._event_log.process_name) with json_parameters_path.open("w") as f: @@ -243,14 +260,18 @@ def run(self): # --- Evaluate final BPS model --- # if self._settings.common.perform_final_evaluation: print_subsection("Evaluate") + runtimes.start(RuntimeMeter.EVALUATION) simulation_dir = self._best_result_dir / "evaluation" simulation_dir.mkdir(parents=True, exist_ok=True) self._evaluate_model(self.final_bps_model.process_model, json_parameters_path, simulation_dir) + runtimes.stop(RuntimeMeter.EVALUATION) # --- Export settings and clean temporal files --- # + print_section(f"Exporting canonical model, runtimes, settings and cleaning up intermediate files") canonical_model_path = self._best_result_dir / "canonical_model.json" - print_section(f"Exporting canonical model to {canonical_model_path}") _export_canonical_model(canonical_model_path, best_control_flow_params, best_resource_model_params) + runtimes_model_path = self._best_result_dir / "runtimes.json" + _export_runtimes(runtimes_model_path, runtimes) if self._settings.common.clean_intermediate_files: self._clean_up() self._settings.to_yaml(self._best_result_dir) @@ -342,14 +363,17 @@ def _export_canonical_model( control_flow_settings: ControlFlowHyperoptIterationParams, calendar_settings: ResourceModelHyperoptIterationParams, ): - structure = control_flow_settings.to_dict() - - calendars = calendar_settings.to_dict() - canon = { - "control_flow": structure, - "calendars": calendars, + "control_flow": control_flow_settings.to_dict(), + "calendars": calendar_settings.to_dict(), } - with open(file_path, "w") as f: json.dump(canon, f) + + +def _export_runtimes( + file_path: Path, + runtimes: RuntimeMeter +): + with open(file_path, "w") as f: + json.dump(runtimes.runtimes, f) From a74d20865d343c85917ac77e16fc50ec5c84fc2a Mon Sep 17 00:00:00 2001 From: David Chapela de la Campa Date: Thu, 23 Jan 2025 19:15:39 +0200 Subject: [PATCH 2/4] #157 - Add support to use Circadian Workforce Distance --- src/simod/metrics.py | 22 ++++++++++++++++++++++ src/simod/settings/common_settings.py | 6 ++++++ 2 files changed, 28 insertions(+) diff --git a/src/simod/metrics.py b/src/simod/metrics.py index 9aeacfd4..f66d7bc2 100644 --- a/src/simod/metrics.py +++ b/src/simod/metrics.py @@ -9,6 +9,7 @@ from log_distance_measures.circadian_event_distribution import ( circadian_event_distribution_distance, ) +from log_distance_measures.circadian_workforce_distribution import circadian_workforce_distribution_distance from log_distance_measures.config import AbsoluteTimestampType from log_distance_measures.control_flow_log_distance import control_flow_log_distance from log_distance_measures.cycle_time_distribution import ( @@ -47,6 +48,8 @@ def compute_metric( result = get_n_grams_distribution_distance(original_log, original_log_ids, simulated_log, simulated_log_ids, 3) elif metric is Metric.CIRCADIAN_EMD: result = get_circadian_emd(original_log, original_log_ids, simulated_log, simulated_log_ids) + elif metric is Metric.CIRCADIAN_WORKFORCE_EMD: + result = get_circadian_workforce_emd(original_log, original_log_ids, simulated_log, simulated_log_ids) elif metric is Metric.ARRIVAL_EMD: result = get_arrival_emd(original_log, original_log_ids, simulated_log, simulated_log_ids) elif metric is Metric.RELATIVE_EMD: @@ -122,6 +125,25 @@ def get_circadian_emd( return emd +def get_circadian_workforce_emd( + original_log: pd.DataFrame, + original_log_ids: EventLogIDs, + simulated_log: pd.DataFrame, + simulated_log_ids: EventLogIDs, +) -> float: + """ + Distance measure computing how different the histograms of the active resources of two event logs are, comparing the + average number of active resources recorded each weekday at each hour (e.g., Monday 10am). + """ + emd = circadian_workforce_distribution_distance( + original_log, + original_log_ids, + simulated_log, + simulated_log_ids, + ) + return emd + + def get_arrival_emd( original_log: pd.DataFrame, original_log_ids: EventLogIDs, diff --git a/src/simod/settings/common_settings.py b/src/simod/settings/common_settings.py index daf23d8d..d1a5a2c9 100644 --- a/src/simod/settings/common_settings.py +++ b/src/simod/settings/common_settings.py @@ -18,6 +18,7 @@ class Metric(str, Enum): TWO_GRAM_DISTANCE = "two_gram_distance" THREE_GRAM_DISTANCE = "three_gram_distance" CIRCADIAN_EMD = "circadian_event_distribution" + CIRCADIAN_WORKFORCE_EMD = "circadian_workforce_distribution" ARRIVAL_EMD = "arrival_event_distribution" RELATIVE_EMD = "relative_event_distribution" ABSOLUTE_EMD = "absolute_event_distribution" @@ -40,6 +41,8 @@ def _from_str(cls, value: str) -> "Metric": return cls.THREE_GRAM_DISTANCE elif value.lower() in ["circadian_event_distribution", "circadian_emd"]: return cls.CIRCADIAN_EMD + elif value.lower() in ["circadian_workforce_distribution", "workforce_emd", "workforce_distribution"]: + return cls.CIRCADIAN_WORKFORCE_EMD elif value.lower() in ["arrival_event_distribution", "arrival_emd"]: return cls.ARRIVAL_EMD elif value.lower() in ["relative_event_distribution", "relative_emd"]: @@ -66,6 +69,8 @@ def __str__(self): return "THREE_GRAM_DISTANCE" elif self == Metric.CIRCADIAN_EMD: return "CIRCADIAN_EVENT_DISTRIBUTION" + elif self == Metric.CIRCADIAN_WORKFORCE_EMD: + return "CIRCADIAN_WORKFORCE_DISTRIBUTION" elif self == Metric.ARRIVAL_EMD: return "ARRIVAL_EVENT_DISTRIBUTION" elif self == Metric.RELATIVE_EMD: @@ -140,6 +145,7 @@ def from_dict(config: dict, config_dir: Optional[Path] = None) -> "CommonSetting Metric.TWO_GRAM_DISTANCE, Metric.THREE_GRAM_DISTANCE, Metric.CIRCADIAN_EMD, + Metric.CIRCADIAN_WORKFORCE_EMD, Metric.ARRIVAL_EMD, Metric.RELATIVE_EMD, Metric.ABSOLUTE_EMD, From dd57c8416841b48161e76a4fd92c8a8ee2a03450 Mon Sep 17 00:00:00 2001 From: David Chapela de la Campa Date: Thu, 23 Jan 2025 19:16:35 +0200 Subject: [PATCH 3/4] #157 - Update version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8a44af17..7b52b757 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "simod" -version = "5.0.1" +version = "5.0.2" authors = [ "Ihar Suvorau ", "David Chapela ", From 678364de4a7b0bbdd70dec16239ef2afd0ffa51e Mon Sep 17 00:00:00 2001 From: David Chapela de la Campa Date: Thu, 23 Jan 2025 19:20:33 +0200 Subject: [PATCH 4/4] Update GitHub action workflow (upload artifact v3 deprecated) --- .github/workflows/simod.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/simod.yml b/.github/workflows/simod.yml index 9e2b3a1b..c71d4e33 100644 --- a/.github/workflows/simod.yml +++ b/.github/workflows/simod.yml @@ -58,7 +58,7 @@ jobs: run: poetry run pylint -j 0 --exit-zero src/simod > pylint.txt - name: Upload PyLint output - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: pylint.txt path: ./pylint.txt @@ -145,7 +145,7 @@ jobs: poetry run pip-licenses --with-system --with-urls --format=markdown --output-file=licenses.md - name: Upload licenses.md - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: licenses.md path: licenses.md