0.14.45

bartzbeielstein · bartzbeielstein · commit edb813997562 · 2024-07-14T10:55:31.000+02:00
diff --git a/notebooks/00_spotPython_tests.ipynb b/notebooks/00_spotPython_tests.ipynb
@@ -4288,7 +4288,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 12,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -4339,17 +4339,9 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 13,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Model Name: HoeffdingTreeRegressor, Model Instance: <class 'river.tree.hoeffding_tree_regressor.HoeffdingTreeRegressor'>\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "\n",
         "# Example of usage\n",
@@ -4359,186 +4351,19 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 14,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "module_name: light\n",
-            "submodule_name: regression\n",
-            "model_name: NNLinearRegressor\n",
-            "Model Name: NNLinearRegressor, Model Instance: <class 'spotPython.light.regression.nn_linear_regressor.NNLinearRegressor'>\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "model_name, model_instance = get_core_model_from_name(\"light.regression.NNLinearRegressor\")\n",
         "print(f\"Model Name: {model_name}, Model Instance: {model_instance}\")"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 15,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/Users/bartz/miniforge3/envs/spotCondaEnv/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'act_fn' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['act_fn'])`.\n",
-            "GPU available: True (mps), used: True\n",
-            "TPU available: False, using: 0 TPU cores\n",
-            "IPU available: False, using: 0 IPUs\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "HPU available: False, using: 0 HPUs\n",
-            "/Users/bartz/miniforge3/envs/spotCondaEnv/lib/python3.11/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n",
-            "\n",
-            "  | Name   | Type       | Params | In sizes | Out sizes\n",
-            "-------------------------------------------------------------\n",
-            "0 | layers | Sequential | 15.9 K | [8, 10]  | [8, 1]   \n",
-            "-------------------------------------------------------------\n",
-            "15.9 K    Trainable params\n",
-            "0         Non-trainable params\n",
-            "15.9 K    Total params\n",
-            "0.064     Total estimated model params size (MB)\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "torch.Size([8, 10])\n",
-            "torch.Size([8])\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/Users/bartz/miniforge3/envs/spotCondaEnv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "f2089c81a3034f8181ae924de01692ca",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Training: |          | 0/? [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "`Trainer.fit` stopped: `max_epochs=2` reached.\n",
-            "/Users/bartz/miniforge3/envs/spotCondaEnv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "459a96c4bed440dfafbc3d40c0e7a8d0",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Validation: |          | 0/? [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
-              "┃<span style=\"font-weight: bold\">      Validate metric      </span>┃<span style=\"font-weight: bold\">       DataLoader 0        </span>┃\n",
-              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
-              "│<span style=\"color: #008080; text-decoration-color: #008080\">         hp_metric         </span>│<span style=\"color: #800080; text-decoration-color: #800080\">       29042.5703125       </span>│\n",
-              "│<span style=\"color: #008080; text-decoration-color: #008080\">         val_loss          </span>│<span style=\"color: #800080; text-decoration-color: #800080\">       29042.5703125       </span>│\n",
-              "└───────────────────────────┴───────────────────────────┘\n",
-              "</pre>\n"
-            ],
-            "text/plain": [
-              "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
-              "┃\u001b[1m \u001b[0m\u001b[1m     Validate metric     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      DataLoader 0       \u001b[0m\u001b[1m \u001b[0m┃\n",
-              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
-              "│\u001b[36m \u001b[0m\u001b[36m        hp_metric        \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m      29042.5703125      \u001b[0m\u001b[35m \u001b[0m│\n",
-              "│\u001b[36m \u001b[0m\u001b[36m        val_loss         \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m      29042.5703125      \u001b[0m\u001b[35m \u001b[0m│\n",
-              "└───────────────────────────┴───────────────────────────┘\n"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/Users/bartz/miniforge3/envs/spotCondaEnv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "9a7e5a3ceb724b9e87b9f23341122ce4",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Testing: |          | 0/? [00:00<?, ?it/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
-              "┃<span style=\"font-weight: bold\">        Test metric        </span>┃<span style=\"font-weight: bold\">       DataLoader 0        </span>┃\n",
-              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
-              "│<span style=\"color: #008080; text-decoration-color: #008080\">         hp_metric         </span>│<span style=\"color: #800080; text-decoration-color: #800080\">       29042.5703125       </span>│\n",
-              "│<span style=\"color: #008080; text-decoration-color: #008080\">         val_loss          </span>│<span style=\"color: #800080; text-decoration-color: #800080\">       29042.5703125       </span>│\n",
-              "└───────────────────────────┴───────────────────────────┘\n",
-              "</pre>\n"
-            ],
-            "text/plain": [
-              "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
-              "┃\u001b[1m \u001b[0m\u001b[1m       Test metric       \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      DataLoader 0       \u001b[0m\u001b[1m \u001b[0m┃\n",
-              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
-              "│\u001b[36m \u001b[0m\u001b[36m        hp_metric        \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m      29042.5703125      \u001b[0m\u001b[35m \u001b[0m│\n",
-              "│\u001b[36m \u001b[0m\u001b[36m        val_loss         \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m      29042.5703125      \u001b[0m\u001b[35m \u001b[0m│\n",
-              "└───────────────────────────┴───────────────────────────┘\n"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/plain": [
-              "[{'val_loss': 29042.5703125, 'hp_metric': 29042.5703125}]"
-            ]
-          },
-          "execution_count": 15,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "from torch.utils.data import DataLoader\n",
         "from spotPython.data.diabetes import Diabetes\n",
@@ -4572,6 +4397,29 @@
         "trainer.test(net_light_base, test_loader)"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {},
+      "outputs": [
+        {
+          "ename": "NameError",
+          "evalue": "name 'MockDataSet' is not defined",
+          "output_type": "error",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+            "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mspotPython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minit\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_feature_names\n\u001b[0;32m----> 2\u001b[0m fun_control \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_set\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[43mMockDataSet\u001b[49m(names\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeature1\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeature2\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeature3\u001b[39m\u001b[38;5;124m\"\u001b[39m])}\n\u001b[1;32m      3\u001b[0m get_feature_names(fun_control)\n",
+            "\u001b[0;31mNameError\u001b[0m: name 'MockDataSet' is not defined"
+          ]
+        }
+      ],
+      "source": [
+        "from spotPython.utils.init import get_feature_names\n",
+        "fun_control = {\"data_set\": MockDataSet(names=[\"feature1\", \"feature2\", \"feature3\"])}\n",
+        "get_feature_names(fun_control)\n"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotpython"
-version = "0.14.44"
+version = "0.14.45"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
diff --git a/src/spotPython/utils/file.py b/src/spotPython/utils/file.py
@@ -181,24 +181,38 @@ def load_core_model_from_file(coremodel, dirname="userModel"):
     return core_model
 
 
-def get_experiment_from_PREFIX(PREFIX) -> tuple:
+def get_experiment_from_PREFIX(PREFIX, return_dict=True) -> dict:
     """
     Setup the experiment based on the PREFIX provided and return the relevant configuration
     and control objects.
 
     Args:
-        PREFIX (str): The prefix for the experiment filename.
+        PREFIX (str):
+            The prefix for the experiment filename.
+        return_dict (bool, optional):
+            Whether to return the configuration and control objects as a dictionary.
+            If False, a tuple is returned:
+            "(config, fun_control, design_control, surrogate_control, optimizer_control)."
+            Defaults to True.
 
     Returns:
-        tuple:
-            A tuple containing config, spot_tuner, fun_control, design_control, surrogate_control,
-            and optimizer_control.
+        dict: Dictionary containing the configuration and control objects.
 
     Example:
-        >>> config, _, _, _, _, _ = get_experiment_from_PREFIX("100")
+        >>> from spotPython.utils.file import get_experiment_from_PREFIX
+        >>> config = get_experiment_from_PREFIX("100")["config"]
 
     """
     experiment_name = get_experiment_filename(PREFIX)
     spot_tuner, fun_control, design_control, surrogate_control, optimizer_control = load_experiment(experiment_name)
     config = get_tuned_architecture(spot_tuner, fun_control)
-    return config, spot_tuner, fun_control, design_control, surrogate_control, optimizer_control
+    if return_dict:
+        return {
+            "config": config,
+            "fun_control": fun_control,
+            "design_control": design_control,
+            "surrogate_control": surrogate_control,
+            "optimizer_control": optimizer_control,
+        }
+    else:
+        return config, fun_control, design_control, surrogate_control, optimizer_control
diff --git a/src/spotPython/utils/init.py b/src/spotPython/utils/init.py
@@ -1,4 +1,5 @@
 import os
+from typing import List, Dict, Any
 import lightning as L
 from scipy.optimize import differential_evolution
 import numpy as np
@@ -683,3 +684,29 @@ def get_tensorboard_path(fun_control):
         tensorboard_path (str): The path to the folder where the tensorboard files are saved.
     """
     return fun_control["TENSORBOARD_PATH"]
+
+
+def get_feature_names(fun_control: Dict[str, Any]) -> List[str]:
+    """
+    Get the feature names from the fun_control dictionary.
+
+    Args:
+        fun_control (dict): The function control dictionary. Must contain a "data_set" key.
+
+    Returns:
+        List[str]: List of feature names.
+
+    Raises:
+        ValueError: If "data_set" is not in fun_control.
+        ValueError: If "data_set" is None.
+
+    Examples:
+        >>> from spotPython.utils.init import get_feature_names
+            get_feature_names(fun_control)
+    """
+    data_set = fun_control.get("data_set")
+
+    if data_set is None:
+        raise ValueError("'data_set' key not found or is None in 'fun_control'")
+
+    return data_set.names
diff --git a/test/test_get_feature_names.py b/test/test_get_feature_names.py
@@ -0,0 +1,31 @@
+import pytest
+from spotPython.utils.init import get_feature_names  # Replace 'your_module_name' with the actual module name
+
+
+class MockDataSet:
+    def __init__(self, names):
+        self.names = names
+
+
+def test_get_feature_names_success():
+    fun_control = {"data_set": MockDataSet(names=["feature1", "feature2", "feature3"])}
+    feature_names = get_feature_names(fun_control)
+    assert feature_names == ["feature1", "feature2", "feature3"]
+
+
+def test_get_feature_names_missing_data_set_key():
+    fun_control = {}
+    with pytest.raises(ValueError, match="'data_set' key not found or is None in 'fun_control'"):
+        get_feature_names(fun_control)
+
+
+def test_get_feature_names_data_set_none():
+    fun_control = {"data_set": None}
+    with pytest.raises(ValueError, match="'data_set' key not found or is None in 'fun_control'"):
+        get_feature_names(fun_control)
+
+
+def test_get_feature_names_empty_names():
+    fun_control = {"data_set": MockDataSet(names=[])}
+    feature_names = get_feature_names(fun_control)
+    assert feature_names == []

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotpython"`
`10`		`-version = "0.14.44"`
	`10`	`+version = "0.14.45"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`