sequential-parameter-optimization
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎notebooks/00_spotPython_tests.ipynb‎
Lines changed: 368 additions & 3875 deletions b/‎notebooks/00_spotPython_tests.ipynb‎
Lines changed: 368 additions & 3875 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/spotPython/data/diabetes.py‎
Lines changed: 125 additions & 0 deletions b/‎src/spotPython/data/diabetes.py‎
Lines changed: 125 additions & 0 deletions
diff --git a/‎src/spotPython/fun/hyperlight.py‎
Lines changed: 63 additions & 44 deletions b/‎src/spotPython/fun/hyperlight.py‎
Lines changed: 63 additions & 44 deletions
diff --git a/‎src/spotPython/hyperparameters/values.py‎
Lines changed: 38 additions & 28 deletions b/‎src/spotPython/hyperparameters/values.py‎
Lines changed: 38 additions & 28 deletions
diff --git a/‎src/spotPython/utils/init.py‎
Lines changed: 2 additions & 0 deletions b/‎src/spotPython/utils/init.py‎
Lines changed: 2 additions & 0 deletions
@@ -299,3 +299,4 @@ notebooks/data/spotPython/daten_sensitive.csv
 notebooks/data/spotPython/data_sensitive.csv
 notebooks/data/spotPython/data_sensitive_rmNA.csv
 notebooks/runs_OLD/*
+runs/lightning_logs/*
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotPython"
-version = "0.6.52"
+version = "0.6.53"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
 
@@ -0,0 +1,125 @@
+import torch
+from torch.utils.data import Dataset
+from sklearn.datasets import load_diabetes
+
+
+class Diabetes(Dataset):
+    """
+    A PyTorch Dataset for regression. A toy data set from scikit-learn.
+    Ten baseline variables, age, sex, body mass index, average blood pressure,
+    and six blood serum measurements were obtained for each of n = 442 diabetes patients,
+    as well as the response of interest,
+    a quantitative measure of disease progression one year after baseline.
+
+    Args:
+        feature_type (torch.dtype): The data type of the features. Defaults to torch.float.
+        target_type (torch.dtype): The data type of the targets. Defaults to torch.long.
+        train (bool): Whether the dataset is for training or not. Defaults to True.
+
+    Attributes:
+        data (Tensor): The data features.
+        targets (Tensor): The data targets.
+
+    Examples:
+        >>> from torch.utils.data import DataLoader
+            from spotPython.data.diabetes import Diabetes
+            import torch
+            dataset = Diabetes(feature_type=torch.float32, target_type=torch.float32)
+            # Set batch size for DataLoader
+            batch_size = 5
+            # Create DataLoader
+            dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
+
+            # Iterate over the data in the DataLoader
+            for batch in dataloader:
+                inputs, targets = batch
+                print(f"Batch Size: {inputs.size(0)}")
+                print("---------------")
+                print(f"Inputs: {inputs}")
+                print(f"Targets: {targets}")
+    """
+
+    def __init__(
+        self, feature_type: torch.dtype = torch.float, target_type: torch.dtype = torch.float, train: bool = True
+    ) -> None:
+        super().__init__()
+        self.feature_type = feature_type
+        self.target_type = target_type
+        self.train = train
+        self.data, self.targets = self._load_data()
+
+    def _load_data(self) -> tuple:
+        """Loads the data from scikit-learn and returns the features and targets.
+
+        Returns:
+            tuple: A tuple containing the features and targets.
+
+        Examples:
+            >>> from spotPython.data.diabetes import Diabetes
+                dataset = Diabetes()
+                print(dataset.data.shape)
+                print(dataset.targets.shape)
+                torch.Size([442, 10])
+                torch.Size([442])
+        """
+        feature_df, target_df = load_diabetes(return_X_y=True, as_frame=True)
+        # Convert DataFrames to PyTorch tensors
+        feature_tensor = torch.tensor(feature_df.values, dtype=self.feature_type)
+        target_tensor = torch.tensor(target_df.values, dtype=self.target_type)
+
+        return feature_tensor, target_tensor
+
+    def __getitem__(self, idx: int) -> tuple:
+        """
+        Returns the feature and target at the given index.
+
+        Args:
+            idx (int): The index.
+
+        Returns:
+            tuple: A tuple containing the feature and target at the given index.
+
+        Examples:
+            >>> from spotPython.light.csvdataset import CSVDataset
+                dataset = CSVDataset(filename='./data/spotPython/data.csv', target_column='prognosis')
+                print(dataset.data.shape)
+                print(dataset.targets.shape)
+                torch.Size([11, 65])
+                torch.Size([11])
+        """
+        feature = self.data[idx]
+        target = self.targets[idx]
+        return feature, target
+
+    def __len__(self) -> int:
+        """
+        Returns the length of the dataset.
+
+        Returns:
+            int: The length of the dataset.
+
+        Examples:
+            >>> from spotPython.light import CSVDataset
+            >>> dataset = CSVDataset()
+            >>> print(len(dataset))
+            60000
+
+        """
+        return len(self.data)
+
+    def extra_repr(self) -> str:
+        """
+        Returns a string representation of the dataset.
+
+        Returns:
+            str: A string representation of the dataset.
+
+        Examples:
+            >>> from spotPython.light import CSVDataset
+            >>> dataset = CSVDataset()
+            >>> print(dataset)
+            Split: Train
+
+        """
+        split = "Train" if self.train else "Test"
+        return f"Split: {split}"
@@ -1,12 +1,8 @@
 import logging
 import numpy as np
 from numpy.random import default_rng
-from numpy import array
 from spotPython.light.traintest import train_model
-from spotPython.hyperparameters.values import (
-    assign_values,
-    generate_one_config_from_var_dict,
-)
+from spotPython.hyperparameters.values import assign_values, generate_one_config_from_var_dict, get_var_name
 
 logger = logging.getLogger(__name__)
 py_handler = logging.FileHandler(f"{__name__}.log", mode="w")
@@ -32,37 +28,25 @@ class HyperLight:
     Examples:
         >>> hyper_light = HyperLight(seed=126, log_level=50)
         >>> print(hyper_light.seed)
-        126
+            126
     """
 
     def __init__(self, seed: int = 126, log_level: int = 50) -> None:
         self.seed = seed
         self.rng = default_rng(seed=self.seed)
-        self.fun_control = {
-            "seed": None,
-            "data": None,
-            "step": 10_000,
-            "horizon": None,
-            "grace_period": None,
-            "metric_river": None,
-            "metric_sklearn": None,
-            "weights": array([1, 0, 0]),
-            "weight_coeff": 0.0,
-            "log_level": log_level,
-            "var_name": [],
-            "var_type": [],
-        }
-        self.log_level = self.fun_control["log_level"]
-        logger.setLevel(self.log_level)
-        logger.info(f"Starting the logger at level {self.log_level} for module {__name__}:")
-
-    def check_X_shape(self, X: np.ndarray) -> np.ndarray:
+        self.log_level = log_level
+        logger.setLevel(log_level)
+        logger.info(f"Starting the logger at level {log_level} for module {__name__}:")
+
+    def check_X_shape(self, X: np.ndarray, fun_control: dict) -> np.ndarray:
         """
         Checks the shape of the input array X and raises an exception if it is not valid.
 
         Args:
             X (np.ndarray):
                 input array.
+            fun_control (dict):
+                dictionary containing control parameters for the hyperparameter tuning.
 
         Returns:
             np.ndarray:
@@ -73,17 +57,31 @@ def check_X_shape(self, X: np.ndarray) -> np.ndarray:
                 if the shape of the input array is not valid.
 
         Examples:
-            >>> hyper_light = HyperLight(seed=126, log_level=50)
-            >>> X = np.array([[1, 2], [3, 4]])
-            >>> hyper_light.check_X_shape(X)
-            array([[1, 2],
-                   [3, 4]])
+            >>> import numpy as np
+                from spotPython.utils.init import fun_control_init
+                from spotPython.light.netlightregression import NetLightRegression
+                from spotPython.hyperdict.light_hyper_dict import LightHyperDict
+                from spotPython.hyperparameters.values import add_core_model_to_fun_control
+                from spotPython.fun.hyperlight import HyperLight
+                from spotPython.hyperparameters.values import get_var_name
+                fun_control = fun_control_init()
+                add_core_model_to_fun_control(core_model=NetLightRegression,
+                                            fun_control=fun_control,
+                                            hyper_dict=LightHyperDict)
+                hyper_light = HyperLight(seed=126, log_level=50)
+                n_hyperparams = len(get_var_name(fun_control))
+                # generate a random np.array X with shape (2, n_hyperparams)
+                X = np.random.rand(2, n_hyperparams)
+                X == hyper_light.check_X_shape(X, fun_control)
+                array([[ True,  True,  True,  True,  True,  True,  True,  True,  True],
+                [ True,  True,  True,  True,  True,  True,  True,  True,  True]])
+
         """
         try:
             X.shape[1]
         except ValueError:
             X = np.array([X])
-        if X.shape[1] != len(self.fun_control["var_name"]):
+        if X.shape[1] != len(get_var_name(fun_control)):
             raise Exception("Invalid shape of input array X.")
         return X
 
@@ -102,30 +100,51 @@ def fun(self, X: np.ndarray, fun_control: dict = None) -> np.ndarray:
                 array containing the evaluation results.
 
         Examples:
-            >>> hyper_light = HyperLight(seed=126, log_level=50)
-                X = np.array([[1, 2], [3, 4]])
-                fun_control = {"weights": np.array([1, 0, 0])}
+            >>> from spotPython.utils.init import fun_control_init
+                from spotPython.light.netlightregression import NetLightRegression
+                from spotPython.hyperdict.light_hyper_dict import LightHyperDict
+                from spotPython.hyperparameters.values import
+                 (add_core_model_to_fun_control,
+                 get_default_hyperparameters_as_array)
+                from spotPython.fun.hyperlight import HyperLight
+                from spotPython.data.diabetes import Diabetes
+                from spotPython.hyperparameters.values import set_data_set
+                import numpy as np
+                fun_control = fun_control_init(
+                    _L_in=10,
+                    _L_out=1,)
+
+                dataset = Diabetes()
+                set_data_set(fun_control=fun_control,
+                                data_set=dataset)
+
+                add_core_model_to_fun_control(core_model=NetLightRegression,
+                                            fun_control=fun_control,
+                                            hyper_dict=LightHyperDict)
+                hyper_light = HyperLight(seed=126, log_level=50)
+                X = get_default_hyperparameters_as_array(fun_control)
+                # combine X and X to a np.array with shape (2, n_hyperparams)
+                # so that two values are returned
+                X = np.vstack((X, X))
                 hyper_light.fun(X, fun_control)
-                array([nan, nan])
+                array([27462.84179688, 20990.08007812])
         """
         z_res = np.array([], dtype=float)
-        if fun_control is not None:
-            self.fun_control.update(fun_control)
-        self.check_X_shape(X)
-        var_dict = assign_values(X, self.fun_control["var_name"])
+        self.check_X_shape(X=X, fun_control=fun_control)
+        var_dict = assign_values(X, get_var_name(fun_control))
         # type information and transformations are considered in generate_one_config_from_var_dict:
-        for config in generate_one_config_from_var_dict(var_dict, self.fun_control):
+        for config in generate_one_config_from_var_dict(var_dict, fun_control):
             logger.debug(f"\nconfig: {config}")
             # extract parameters like epochs, batch_size, lr, etc. from config
             # config_id = generate_config_id(config)
             try:
-                print("fun: Calling train_model")
-                df_eval = train_model(config, self.fun_control)
-                print("fun: train_model returned")
+                logger.debug("fun: Calling train_model")
+                df_eval = train_model(config, fun_control)
+                logger.debug("fun: train_model returned")
             except Exception as err:
                 logger.error(f"Error in fun(). Call to train_model failed. {err=}, {type(err)=}")
                 logger.error("Setting df_eval to np.nan")
                 df_eval = np.nan
-            z_val = self.fun_control["weights"] * df_eval
+            z_val = fun_control["weights"] * df_eval
             z_res = np.append(z_res, z_val)
         return z_res
@@ -483,34 +483,41 @@ def get_var_name(fun_control) -> list:
         (list):
             ist with names
     Examples:
-        >>> d = {"core_model_hyper_dict":{
-            "leaf_prediction": {
-                "levels": ["mean", "model", "adaptive"],
-                "type": "factor",
-                "default": "mean",
-                "core_model_parameter_type": "str"},
-            "leaf_model": {
-                "levels": ["linear_model.LinearRegression", "linear_model.PARegressor", "linear_model.Perceptron"],
-                "type": "factor",
-                "default": "LinearRegression",
-                "core_model_parameter_type": "instance"},
-            "splitter": {
-                "levels": ["EBSTSplitter", "TEBSTSplitter", "QOSplitter"],
-                "type": "factor",
-                "default": "EBSTSplitter",
-                "core_model_parameter_type": "instance()"},
-            "binary_split": {
-                "levels": [0, 1],
-                "type": "factor",
-                "default": 0,
-                "core_model_parameter_type": "bool"},
-            "stop_mem_management": {                                                         "levels": [0, 1],
-                "type": "factor",
-                "default": 0,
-                "core_model_parameter_type": "bool"}}}
-
-        get_var_name(d)
-        ['leaf_prediction', 'leaf_model', 'splitter', 'binary_split', 'stop_mem_management']
+        >>> from spotPython.hyperparameters.values import get_var_name
+            fun_control = {"core_model_hyper_dict":{
+                        "leaf_prediction": {
+                            "levels": ["mean", "model", "adaptive"],
+                            "type": "factor",
+                            "default": "mean",
+                            "core_model_parameter_type": "str"},
+                        "leaf_model": {
+                            "levels": ["linear_model.LinearRegression",
+                                        "linear_model.PARegressor",
+                                        "linear_model.Perceptron"],
+                            "type": "factor",
+                            "default": "LinearRegression",
+                            "core_model_parameter_type": "instance"},
+                        "splitter": {
+                            "levels": ["EBSTSplitter", "TEBSTSplitter", "QOSplitter"],
+                            "type": "factor",
+                            "default": "EBSTSplitter",
+                            "core_model_parameter_type": "instance()"},
+                        "binary_split": {
+                            "levels": [0, 1],
+                            "type": "factor",
+                            "default": 0,
+                            "core_model_parameter_type": "bool"},
+                        "stop_mem_management": {
+                            "levels": [0, 1],
+                            "type": "factor",
+                            "default": 0,
+                            "core_model_parameter_type": "bool"}}}
+            get_var_name(fun_control)
+            ['leaf_prediction',
+                'leaf_model',
+                'splitter',
+                'binary_split',
+                'stop_mem_management']
     """
     return list(fun_control["core_model_hyper_dict"].keys())
 
@@ -831,6 +838,9 @@ def get_default_hyperparameters_as_array(fun_control) -> np.array:
     Examples:
         >>> from river.tree import HoeffdingAdaptiveTreeRegressor
             from spotRiver.data.river_hyper_dict import RiverHyperDict
+            from spotPython.hyperparameters.values import (
+                get_default_hyperparameters_as_array,
+                add_core_model_to_fun_control)
             fun_control = {}
             add_core_model_to_fun_control(core_model=HoeffdingAdaptiveTreeRegressor,
                 fun_control=func_control,
 
@@ -146,6 +146,8 @@ def fun_control_init(
         "test": None,
         "task": task,
         "spot_tensorboard_path": spot_tensorboard_path,
+        "var_name": [],
+        "var_type": [],
         "weights": 1.0,
         "spot_writer": spot_writer,
     }
Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotPython"`
`10`		`-version = "0.6.52"`
	`10`	`+version = "0.6.53"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`
Original file line number	Diff line number	Diff line change
`@@ -146,6 +146,8 @@ def fun_control_init(`
`146`	`146`	`"test": None,`
`147`	`147`	`"task": task,`
`148`	`148`	`"spot_tensorboard_path": spot_tensorboard_path,`
	`149`	`+ "var_name": [],`
	`150`	`+ "var_type": [],`
`149`	`151`	`"weights": 1.0,`
`150`	`152`	`"spot_writer": spot_writer,`
`151`	`153`	`}`