Skip to content

Commit cc0e092

Browse files
Diabetes data set
1 parent 0f67364 commit cc0e092

10 files changed

Lines changed: 694 additions & 3948 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,3 +299,4 @@ notebooks/data/spotPython/daten_sensitive.csv
299299
notebooks/data/spotPython/data_sensitive.csv
300300
notebooks/data/spotPython/data_sensitive_rmNA.csv
301301
notebooks/runs_OLD/*
302+
runs/lightning_logs/*

notebooks/00_spotPython_tests.ipynb

Lines changed: 368 additions & 3875 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotPython"
10-
version = "0.6.52"
10+
version = "0.6.53"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotPython/data/diabetes.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import torch
2+
from torch.utils.data import Dataset
3+
from sklearn.datasets import load_diabetes
4+
5+
6+
class Diabetes(Dataset):
7+
"""
8+
A PyTorch Dataset for regression. A toy data set from scikit-learn.
9+
Ten baseline variables, age, sex, body mass index, average blood pressure,
10+
and six blood serum measurements were obtained for each of n = 442 diabetes patients,
11+
as well as the response of interest,
12+
a quantitative measure of disease progression one year after baseline.
13+
14+
Args:
15+
feature_type (torch.dtype): The data type of the features. Defaults to torch.float.
16+
target_type (torch.dtype): The data type of the targets. Defaults to torch.long.
17+
train (bool): Whether the dataset is for training or not. Defaults to True.
18+
19+
Attributes:
20+
data (Tensor): The data features.
21+
targets (Tensor): The data targets.
22+
23+
Examples:
24+
>>> from torch.utils.data import DataLoader
25+
from spotPython.data.diabetes import Diabetes
26+
import torch
27+
dataset = Diabetes(feature_type=torch.float32, target_type=torch.float32)
28+
# Set batch size for DataLoader
29+
batch_size = 5
30+
# Create DataLoader
31+
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
32+
33+
# Iterate over the data in the DataLoader
34+
for batch in dataloader:
35+
inputs, targets = batch
36+
print(f"Batch Size: {inputs.size(0)}")
37+
print("---------------")
38+
print(f"Inputs: {inputs}")
39+
print(f"Targets: {targets}")
40+
"""
41+
42+
def __init__(
43+
self, feature_type: torch.dtype = torch.float, target_type: torch.dtype = torch.float, train: bool = True
44+
) -> None:
45+
super().__init__()
46+
self.feature_type = feature_type
47+
self.target_type = target_type
48+
self.train = train
49+
self.data, self.targets = self._load_data()
50+
51+
def _load_data(self) -> tuple:
52+
"""Loads the data from scikit-learn and returns the features and targets.
53+
54+
Returns:
55+
tuple: A tuple containing the features and targets.
56+
57+
Examples:
58+
>>> from spotPython.data.diabetes import Diabetes
59+
dataset = Diabetes()
60+
print(dataset.data.shape)
61+
print(dataset.targets.shape)
62+
torch.Size([442, 10])
63+
torch.Size([442])
64+
"""
65+
feature_df, target_df = load_diabetes(return_X_y=True, as_frame=True)
66+
# Convert DataFrames to PyTorch tensors
67+
feature_tensor = torch.tensor(feature_df.values, dtype=self.feature_type)
68+
target_tensor = torch.tensor(target_df.values, dtype=self.target_type)
69+
70+
return feature_tensor, target_tensor
71+
72+
def __getitem__(self, idx: int) -> tuple:
73+
"""
74+
Returns the feature and target at the given index.
75+
76+
Args:
77+
idx (int): The index.
78+
79+
Returns:
80+
tuple: A tuple containing the feature and target at the given index.
81+
82+
Examples:
83+
>>> from spotPython.light.csvdataset import CSVDataset
84+
dataset = CSVDataset(filename='./data/spotPython/data.csv', target_column='prognosis')
85+
print(dataset.data.shape)
86+
print(dataset.targets.shape)
87+
torch.Size([11, 65])
88+
torch.Size([11])
89+
"""
90+
feature = self.data[idx]
91+
target = self.targets[idx]
92+
return feature, target
93+
94+
def __len__(self) -> int:
95+
"""
96+
Returns the length of the dataset.
97+
98+
Returns:
99+
int: The length of the dataset.
100+
101+
Examples:
102+
>>> from spotPython.light import CSVDataset
103+
>>> dataset = CSVDataset()
104+
>>> print(len(dataset))
105+
60000
106+
107+
"""
108+
return len(self.data)
109+
110+
def extra_repr(self) -> str:
111+
"""
112+
Returns a string representation of the dataset.
113+
114+
Returns:
115+
str: A string representation of the dataset.
116+
117+
Examples:
118+
>>> from spotPython.light import CSVDataset
119+
>>> dataset = CSVDataset()
120+
>>> print(dataset)
121+
Split: Train
122+
123+
"""
124+
split = "Train" if self.train else "Test"
125+
return f"Split: {split}"

src/spotPython/fun/hyperlight.py

Lines changed: 63 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
import logging
22
import numpy as np
33
from numpy.random import default_rng
4-
from numpy import array
54
from spotPython.light.traintest import train_model
6-
from spotPython.hyperparameters.values import (
7-
assign_values,
8-
generate_one_config_from_var_dict,
9-
)
5+
from spotPython.hyperparameters.values import assign_values, generate_one_config_from_var_dict, get_var_name
106

117
logger = logging.getLogger(__name__)
128
py_handler = logging.FileHandler(f"{__name__}.log", mode="w")
@@ -32,37 +28,25 @@ class HyperLight:
3228
Examples:
3329
>>> hyper_light = HyperLight(seed=126, log_level=50)
3430
>>> print(hyper_light.seed)
35-
126
31+
126
3632
"""
3733

3834
def __init__(self, seed: int = 126, log_level: int = 50) -> None:
3935
self.seed = seed
4036
self.rng = default_rng(seed=self.seed)
41-
self.fun_control = {
42-
"seed": None,
43-
"data": None,
44-
"step": 10_000,
45-
"horizon": None,
46-
"grace_period": None,
47-
"metric_river": None,
48-
"metric_sklearn": None,
49-
"weights": array([1, 0, 0]),
50-
"weight_coeff": 0.0,
51-
"log_level": log_level,
52-
"var_name": [],
53-
"var_type": [],
54-
}
55-
self.log_level = self.fun_control["log_level"]
56-
logger.setLevel(self.log_level)
57-
logger.info(f"Starting the logger at level {self.log_level} for module {__name__}:")
58-
59-
def check_X_shape(self, X: np.ndarray) -> np.ndarray:
37+
self.log_level = log_level
38+
logger.setLevel(log_level)
39+
logger.info(f"Starting the logger at level {log_level} for module {__name__}:")
40+
41+
def check_X_shape(self, X: np.ndarray, fun_control: dict) -> np.ndarray:
6042
"""
6143
Checks the shape of the input array X and raises an exception if it is not valid.
6244
6345
Args:
6446
X (np.ndarray):
6547
input array.
48+
fun_control (dict):
49+
dictionary containing control parameters for the hyperparameter tuning.
6650
6751
Returns:
6852
np.ndarray:
@@ -73,17 +57,31 @@ def check_X_shape(self, X: np.ndarray) -> np.ndarray:
7357
if the shape of the input array is not valid.
7458
7559
Examples:
76-
>>> hyper_light = HyperLight(seed=126, log_level=50)
77-
>>> X = np.array([[1, 2], [3, 4]])
78-
>>> hyper_light.check_X_shape(X)
79-
array([[1, 2],
80-
[3, 4]])
60+
>>> import numpy as np
61+
from spotPython.utils.init import fun_control_init
62+
from spotPython.light.netlightregression import NetLightRegression
63+
from spotPython.hyperdict.light_hyper_dict import LightHyperDict
64+
from spotPython.hyperparameters.values import add_core_model_to_fun_control
65+
from spotPython.fun.hyperlight import HyperLight
66+
from spotPython.hyperparameters.values import get_var_name
67+
fun_control = fun_control_init()
68+
add_core_model_to_fun_control(core_model=NetLightRegression,
69+
fun_control=fun_control,
70+
hyper_dict=LightHyperDict)
71+
hyper_light = HyperLight(seed=126, log_level=50)
72+
n_hyperparams = len(get_var_name(fun_control))
73+
# generate a random np.array X with shape (2, n_hyperparams)
74+
X = np.random.rand(2, n_hyperparams)
75+
X == hyper_light.check_X_shape(X, fun_control)
76+
array([[ True, True, True, True, True, True, True, True, True],
77+
[ True, True, True, True, True, True, True, True, True]])
78+
8179
"""
8280
try:
8381
X.shape[1]
8482
except ValueError:
8583
X = np.array([X])
86-
if X.shape[1] != len(self.fun_control["var_name"]):
84+
if X.shape[1] != len(get_var_name(fun_control)):
8785
raise Exception("Invalid shape of input array X.")
8886
return X
8987

@@ -102,30 +100,51 @@ def fun(self, X: np.ndarray, fun_control: dict = None) -> np.ndarray:
102100
array containing the evaluation results.
103101
104102
Examples:
105-
>>> hyper_light = HyperLight(seed=126, log_level=50)
106-
X = np.array([[1, 2], [3, 4]])
107-
fun_control = {"weights": np.array([1, 0, 0])}
103+
>>> from spotPython.utils.init import fun_control_init
104+
from spotPython.light.netlightregression import NetLightRegression
105+
from spotPython.hyperdict.light_hyper_dict import LightHyperDict
106+
from spotPython.hyperparameters.values import
107+
(add_core_model_to_fun_control,
108+
get_default_hyperparameters_as_array)
109+
from spotPython.fun.hyperlight import HyperLight
110+
from spotPython.data.diabetes import Diabetes
111+
from spotPython.hyperparameters.values import set_data_set
112+
import numpy as np
113+
fun_control = fun_control_init(
114+
_L_in=10,
115+
_L_out=1,)
116+
117+
dataset = Diabetes()
118+
set_data_set(fun_control=fun_control,
119+
data_set=dataset)
120+
121+
add_core_model_to_fun_control(core_model=NetLightRegression,
122+
fun_control=fun_control,
123+
hyper_dict=LightHyperDict)
124+
hyper_light = HyperLight(seed=126, log_level=50)
125+
X = get_default_hyperparameters_as_array(fun_control)
126+
# combine X and X to a np.array with shape (2, n_hyperparams)
127+
# so that two values are returned
128+
X = np.vstack((X, X))
108129
hyper_light.fun(X, fun_control)
109-
array([nan, nan])
130+
array([27462.84179688, 20990.08007812])
110131
"""
111132
z_res = np.array([], dtype=float)
112-
if fun_control is not None:
113-
self.fun_control.update(fun_control)
114-
self.check_X_shape(X)
115-
var_dict = assign_values(X, self.fun_control["var_name"])
133+
self.check_X_shape(X=X, fun_control=fun_control)
134+
var_dict = assign_values(X, get_var_name(fun_control))
116135
# type information and transformations are considered in generate_one_config_from_var_dict:
117-
for config in generate_one_config_from_var_dict(var_dict, self.fun_control):
136+
for config in generate_one_config_from_var_dict(var_dict, fun_control):
118137
logger.debug(f"\nconfig: {config}")
119138
# extract parameters like epochs, batch_size, lr, etc. from config
120139
# config_id = generate_config_id(config)
121140
try:
122-
print("fun: Calling train_model")
123-
df_eval = train_model(config, self.fun_control)
124-
print("fun: train_model returned")
141+
logger.debug("fun: Calling train_model")
142+
df_eval = train_model(config, fun_control)
143+
logger.debug("fun: train_model returned")
125144
except Exception as err:
126145
logger.error(f"Error in fun(). Call to train_model failed. {err=}, {type(err)=}")
127146
logger.error("Setting df_eval to np.nan")
128147
df_eval = np.nan
129-
z_val = self.fun_control["weights"] * df_eval
148+
z_val = fun_control["weights"] * df_eval
130149
z_res = np.append(z_res, z_val)
131150
return z_res

src/spotPython/hyperparameters/values.py

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -483,34 +483,41 @@ def get_var_name(fun_control) -> list:
483483
(list):
484484
ist with names
485485
Examples:
486-
>>> d = {"core_model_hyper_dict":{
487-
"leaf_prediction": {
488-
"levels": ["mean", "model", "adaptive"],
489-
"type": "factor",
490-
"default": "mean",
491-
"core_model_parameter_type": "str"},
492-
"leaf_model": {
493-
"levels": ["linear_model.LinearRegression", "linear_model.PARegressor", "linear_model.Perceptron"],
494-
"type": "factor",
495-
"default": "LinearRegression",
496-
"core_model_parameter_type": "instance"},
497-
"splitter": {
498-
"levels": ["EBSTSplitter", "TEBSTSplitter", "QOSplitter"],
499-
"type": "factor",
500-
"default": "EBSTSplitter",
501-
"core_model_parameter_type": "instance()"},
502-
"binary_split": {
503-
"levels": [0, 1],
504-
"type": "factor",
505-
"default": 0,
506-
"core_model_parameter_type": "bool"},
507-
"stop_mem_management": { "levels": [0, 1],
508-
"type": "factor",
509-
"default": 0,
510-
"core_model_parameter_type": "bool"}}}
511-
512-
get_var_name(d)
513-
['leaf_prediction', 'leaf_model', 'splitter', 'binary_split', 'stop_mem_management']
486+
>>> from spotPython.hyperparameters.values import get_var_name
487+
fun_control = {"core_model_hyper_dict":{
488+
"leaf_prediction": {
489+
"levels": ["mean", "model", "adaptive"],
490+
"type": "factor",
491+
"default": "mean",
492+
"core_model_parameter_type": "str"},
493+
"leaf_model": {
494+
"levels": ["linear_model.LinearRegression",
495+
"linear_model.PARegressor",
496+
"linear_model.Perceptron"],
497+
"type": "factor",
498+
"default": "LinearRegression",
499+
"core_model_parameter_type": "instance"},
500+
"splitter": {
501+
"levels": ["EBSTSplitter", "TEBSTSplitter", "QOSplitter"],
502+
"type": "factor",
503+
"default": "EBSTSplitter",
504+
"core_model_parameter_type": "instance()"},
505+
"binary_split": {
506+
"levels": [0, 1],
507+
"type": "factor",
508+
"default": 0,
509+
"core_model_parameter_type": "bool"},
510+
"stop_mem_management": {
511+
"levels": [0, 1],
512+
"type": "factor",
513+
"default": 0,
514+
"core_model_parameter_type": "bool"}}}
515+
get_var_name(fun_control)
516+
['leaf_prediction',
517+
'leaf_model',
518+
'splitter',
519+
'binary_split',
520+
'stop_mem_management']
514521
"""
515522
return list(fun_control["core_model_hyper_dict"].keys())
516523

@@ -831,6 +838,9 @@ def get_default_hyperparameters_as_array(fun_control) -> np.array:
831838
Examples:
832839
>>> from river.tree import HoeffdingAdaptiveTreeRegressor
833840
from spotRiver.data.river_hyper_dict import RiverHyperDict
841+
from spotPython.hyperparameters.values import (
842+
get_default_hyperparameters_as_array,
843+
add_core_model_to_fun_control)
834844
fun_control = {}
835845
add_core_model_to_fun_control(core_model=HoeffdingAdaptiveTreeRegressor,
836846
fun_control=func_control,

src/spotPython/utils/init.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ def fun_control_init(
146146
"test": None,
147147
"task": task,
148148
"spot_tensorboard_path": spot_tensorboard_path,
149+
"var_name": [],
150+
"var_type": [],
149151
"weights": 1.0,
150152
"spot_writer": spot_writer,
151153
}

0 commit comments

Comments
 (0)