Skip to content

Commit 7e72f7d

Browse files
Update 00_spotPython_tests.ipynb
1 parent 379554b commit 7e72f7d

1 file changed

Lines changed: 189 additions & 144 deletions

File tree

notebooks/00_spotPython_tests.ipynb

Lines changed: 189 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -4055,39 +4055,9 @@
40554055
},
40564056
{
40574057
"cell_type": "code",
4058-
"execution_count": 2,
4058+
"execution_count": null,
40594059
"metadata": {},
4060-
"outputs": [
4061-
{
4062-
"name": "stderr",
4063-
"output_type": "stream",
4064-
"text": [
4065-
"Seed set to 123\n"
4066-
]
4067-
},
4068-
{
4069-
"name": "stdout",
4070-
"output_type": "stream",
4071-
"text": [
4072-
"Before modification:\n",
4073-
"| name | type | default | lower | upper | transform |\n",
4074-
"|-----------------|--------|-----------|---------|---------|-------------|\n",
4075-
"| n_estimators | int | 10 | 2 | 1000 | None |\n",
4076-
"| step | float | 1 | 0.1 | 10 | None |\n",
4077-
"| use_aggregation | factor | 1 | 0 | 1 | None |\n",
4078-
"Setting hyperparameter n_estimators to value [2, 5].\n",
4079-
"Variable type is int.\n",
4080-
"Core type is None.\n",
4081-
"Calling modify_hyper_parameter_bounds().\n",
4082-
"After modification:\n",
4083-
"| name | type | default | lower | upper | transform |\n",
4084-
"|-----------------|--------|-----------|---------|---------|-------------|\n",
4085-
"| n_estimators | int | 10 | 2 | 5 | None |\n",
4086-
"| step | float | 1 | 0.1 | 10 | None |\n",
4087-
"| use_aggregation | factor | 1 | 0 | 1 | None |\n"
4088-
]
4089-
}
4090-
],
4060+
"outputs": [],
40914061
"source": [
40924062
"from spotRiver.hyperdict.river_hyper_dict import RiverHyperDict\n",
40934063
"from spotPython.utils.init import fun_control_init\n",
@@ -4106,59 +4076,9 @@
41064076
},
41074077
{
41084078
"cell_type": "code",
4109-
"execution_count": 6,
4079+
"execution_count": null,
41104080
"metadata": {},
4111-
"outputs": [
4112-
{
4113-
"name": "stderr",
4114-
"output_type": "stream",
4115-
"text": [
4116-
"Seed set to 123\n"
4117-
]
4118-
},
4119-
{
4120-
"name": "stdout",
4121-
"output_type": "stream",
4122-
"text": [
4123-
"Before modification:\n",
4124-
"| name | type | default | lower | upper | transform |\n",
4125-
"|------------------------|--------|------------------|---------|----------|------------------------|\n",
4126-
"| grace_period | int | 200 | 10 | 1000 | None |\n",
4127-
"| max_depth | int | 20 | 2 | 20 | transform_power_2_int |\n",
4128-
"| delta | float | 1e-07 | 1e-08 | 1e-06 | None |\n",
4129-
"| tau | float | 0.05 | 0.01 | 0.1 | None |\n",
4130-
"| leaf_prediction | factor | mean | 0 | 2 | None |\n",
4131-
"| leaf_model | factor | LinearRegression | 0 | 2 | None |\n",
4132-
"| model_selector_decay | float | 0.95 | 0.9 | 0.99 | None |\n",
4133-
"| splitter | factor | EBSTSplitter | 0 | 2 | None |\n",
4134-
"| min_samples_split | int | 5 | 2 | 10 | None |\n",
4135-
"| binary_split | factor | 0 | 0 | 1 | None |\n",
4136-
"| max_size | float | 500.0 | 100 | 1000 | None |\n",
4137-
"| memory_estimate_period | int | 6 | 3 | 8 | transform_power_10_int |\n",
4138-
"| stop_mem_management | factor | 0 | 0 | 1 | None |\n",
4139-
"| remove_poor_attrs | factor | 0 | 0 | 1 | None |\n",
4140-
"| merit_preprune | factor | 1 | 0 | 1 | None |\n",
4141-
"After modification:\n",
4142-
"| name | type | default | lower | upper | transform |\n",
4143-
"|------------------------|--------|------------------|---------|----------|------------------------|\n",
4144-
"| grace_period | int | 200 | 10 | 1000 | None |\n",
4145-
"| max_depth | int | 20 | 2 | 20 | transform_power_2_int |\n",
4146-
"| delta | float | 1e-07 | 1e-08 | 1e-06 | None |\n",
4147-
"| tau | float | 0.05 | 0.01 | 0.1 | None |\n",
4148-
"| leaf_prediction | factor | mean | 0 | 2 | None |\n",
4149-
"| leaf_model | factor | LinearRegression | 0 | 1 | None |\n",
4150-
"| model_selector_decay | float | 0.95 | 0.9 | 0.99 | None |\n",
4151-
"| splitter | factor | EBSTSplitter | 0 | 2 | None |\n",
4152-
"| min_samples_split | int | 5 | 2 | 10 | None |\n",
4153-
"| binary_split | factor | 0 | 0 | 1 | None |\n",
4154-
"| max_size | float | 500.0 | 100 | 1000 | None |\n",
4155-
"| memory_estimate_period | int | 6 | 3 | 8 | transform_power_10_int |\n",
4156-
"| stop_mem_management | factor | 0 | 0 | 1 | None |\n",
4157-
"| remove_poor_attrs | factor | 0 | 0 | 1 | None |\n",
4158-
"| merit_preprune | factor | 1 | 0 | 1 | None |\n"
4159-
]
4160-
}
4161-
],
4081+
"outputs": [],
41624082
"source": [
41634083
"import pprint\n",
41644084
"from spotRiver.hyperdict.river_hyper_dict import RiverHyperDict\n",
@@ -4179,24 +4099,9 @@
41794099
},
41804100
{
41814101
"cell_type": "code",
4182-
"execution_count": 7,
4102+
"execution_count": null,
41834103
"metadata": {},
4184-
"outputs": [
4185-
{
4186-
"name": "stderr",
4187-
"output_type": "stream",
4188-
"text": [
4189-
"Seed set to 123\n"
4190-
]
4191-
},
4192-
{
4193-
"name": "stdout",
4194-
"output_type": "stream",
4195-
"text": [
4196-
"{'grace_period': {'type': 'int', 'default': 200, 'transform': 'None', 'lower': 10, 'upper': 1000}, 'max_depth': {'type': 'int', 'default': 20, 'transform': 'transform_power_2_int', 'lower': 2, 'upper': 20}, 'delta': {'type': 'float', 'default': 1e-07, 'transform': 'None', 'lower': 1e-08, 'upper': 1e-06}, 'tau': {'type': 'float', 'default': 0.05, 'transform': 'None', 'lower': 0.01, 'upper': 0.1}, 'leaf_prediction': {'levels': ['mean', 'model', 'adaptive'], 'type': 'factor', 'default': 'mean', 'transform': 'None', 'core_model_parameter_type': 'str', 'lower': 0, 'upper': 2}, 'leaf_model': {'levels': ['LinearRegression', 'Perceptron'], 'type': 'factor', 'default': 'LinearRegression', 'transform': 'None', 'class_name': 'river.linear_model', 'core_model_parameter_type': 'instance()', 'lower': 0, 'upper': 1}, 'model_selector_decay': {'type': 'float', 'default': 0.95, 'transform': 'None', 'lower': 0.9, 'upper': 0.99}, 'splitter': {'levels': ['EBSTSplitter', 'TEBSTSplitter', 'QOSplitter'], 'type': 'factor', 'default': 'EBSTSplitter', 'transform': 'None', 'class_name': 'river.tree.splitter', 'core_model_parameter_type': 'instance()', 'lower': 0, 'upper': 2}, 'min_samples_split': {'type': 'int', 'default': 5, 'transform': 'None', 'lower': 2, 'upper': 10}, 'binary_split': {'levels': [0, 1], 'type': 'factor', 'default': 0, 'transform': 'None', 'core_model_parameter_type': 'bool', 'lower': 0, 'upper': 1}, 'max_size': {'type': 'float', 'default': 500.0, 'transform': 'None', 'lower': 100.0, 'upper': 1000.0}, 'memory_estimate_period': {'type': 'int', 'default': 6, 'transform': 'transform_power_10_int', 'lower': 3, 'upper': 8}, 'stop_mem_management': {'levels': [0, 1], 'type': 'factor', 'default': 0, 'transform': 'None', 'core_model_parameter_type': 'bool', 'lower': 0, 'upper': 1}, 'remove_poor_attrs': {'levels': [0, 1], 'type': 'factor', 'default': 0, 'transform': 'None', 'core_model_parameter_type': 'bool', 'lower': 0, 'upper': 1}, 'merit_preprune': {'levels': [0, 1], 'type': 'factor', 'default': 1, 'transform': 'None', 'core_model_parameter_type': 'bool', 'lower': 0, 'upper': 1}}\n"
4197-
]
4198-
}
4199-
],
4104+
"outputs": [],
42004105
"source": [
42014106
"fun_control = fun_control_init(\n",
42024107
" core_model_name=\"tree.HoeffdingTreeRegressor\",\n",
@@ -4213,50 +4118,9 @@
42134118
},
42144119
{
42154120
"cell_type": "code",
4216-
"execution_count": 9,
4121+
"execution_count": null,
42174122
"metadata": {},
4218-
"outputs": [
4219-
{
4220-
"name": "stderr",
4221-
"output_type": "stream",
4222-
"text": [
4223-
"Seed set to 123\n"
4224-
]
4225-
},
4226-
{
4227-
"name": "stdout",
4228-
"output_type": "stream",
4229-
"text": [
4230-
"Before modification:\n",
4231-
"| name | type | default | lower | upper | transform |\n",
4232-
"|-----------------|--------|-----------|---------|---------|-------------|\n",
4233-
"| n_estimators | int | 10 | 2 | 1000 | None |\n",
4234-
"| step | float | 1 | 0.1 | 10 | None |\n",
4235-
"| use_aggregation | factor | 1 | 0 | 1 | None |\n",
4236-
"Setting hyperparameter use_aggregation to value [0, 0].\n",
4237-
"Variable type is factor.\n",
4238-
"Core type is bool.\n",
4239-
"Calling modify_boolean_hyper_parameter_levels().\n",
4240-
"After modification:\n",
4241-
"| name | type | default | lower | upper | transform |\n",
4242-
"|-----------------|--------|-----------|---------|---------|-------------|\n",
4243-
"| n_estimators | int | 10 | 2 | 1000 | None |\n",
4244-
"| step | float | 1 | 0.1 | 10 | None |\n",
4245-
"| use_aggregation | factor | 1 | 0 | 0 | None |\n"
4246-
]
4247-
},
4248-
{
4249-
"ename": "",
4250-
"evalue": "",
4251-
"output_type": "error",
4252-
"traceback": [
4253-
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
4254-
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
4255-
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
4256-
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
4257-
]
4258-
}
4259-
],
4123+
"outputs": [],
42604124
"source": [
42614125
"from spotRiver.hyperdict.river_hyper_dict import RiverHyperDict\n",
42624126
"from spotPython.utils.init import fun_control_init\n",
@@ -4273,6 +4137,187 @@
42734137
"print(gen_design_table(fun_control))"
42744138
]
42754139
},
4140+
{
4141+
"cell_type": "markdown",
4142+
"metadata": {},
4143+
"source": [
4144+
"# Scaler"
4145+
]
4146+
},
4147+
{
4148+
"cell_type": "markdown",
4149+
"metadata": {},
4150+
"source": [
4151+
"## Dataset"
4152+
]
4153+
},
4154+
{
4155+
"cell_type": "code",
4156+
"execution_count": 19,
4157+
"metadata": {},
4158+
"outputs": [],
4159+
"source": [
4160+
"import torch\n",
4161+
"from torch.utils.data import Dataset\n",
4162+
"\n",
4163+
"class MyDataset(Dataset):\n",
4164+
" def __init__(self, data, labels):\n",
4165+
" self.data = data\n",
4166+
" self.labels = labels\n",
4167+
"\n",
4168+
" def __len__(self):\n",
4169+
" return len(self.data)\n",
4170+
"\n",
4171+
" def __getitem__(self, idx):\n",
4172+
" return self.data[idx], self.labels[idx]"
4173+
]
4174+
},
4175+
{
4176+
"cell_type": "markdown",
4177+
"metadata": {},
4178+
"source": [
4179+
"## DataModule"
4180+
]
4181+
},
4182+
{
4183+
"cell_type": "code",
4184+
"execution_count": 20,
4185+
"metadata": {},
4186+
"outputs": [],
4187+
"source": [
4188+
"import pytorch_lightning as pl\n",
4189+
"from sklearn.preprocessing import StandardScaler\n",
4190+
"from torch.utils.data import DataLoader, random_split\n",
4191+
"\n",
4192+
"class MyDataModule(pl.LightningDataModule):\n",
4193+
" def __init__(self, full_dataset, train_size=0.8, batch_size=32, num_workers=4):\n",
4194+
" super().__init__()\n",
4195+
" self.dataset = full_dataset\n",
4196+
" self.train_size = train_size\n",
4197+
" self.batch_size = batch_size\n",
4198+
" self.num_workers = num_workers\n",
4199+
" self.scaler = StandardScaler()\n",
4200+
"\n",
4201+
" def setup(self, stage=None):\n",
4202+
" # Split the dataset\n",
4203+
" train_len = int(len(self.dataset) * self.train_size)\n",
4204+
" val_len = len(self.dataset) - train_len\n",
4205+
" self.train_set, self.val_set = random_split(self.dataset, [train_len, val_len])\n",
4206+
" \n",
4207+
" # Fit scaler on training data\n",
4208+
" train_data = torch.stack([item[0] for item in self.train_set])\n",
4209+
" print(f\"train_data before scaling\\n: {train_data}\") \n",
4210+
" self.scaler.fit(train_data)\n",
4211+
" \n",
4212+
" # Transform training data\n",
4213+
" scaled_train_data = self.scaler.transform(train_data)\n",
4214+
" self.train_set = self._update_dataset(self.train_set, scaled_train_data)\n",
4215+
" print(f\"train_data after scaling\\n: {self.train_set}\") \n",
4216+
" \n",
4217+
" # Transform validation data\n",
4218+
" val_data = torch.stack([item[0] for item in self.val_set])\n",
4219+
" scaled_val_data = self.scaler.transform(val_data)\n",
4220+
" self.val_set = self._update_dataset(self.val_set, scaled_val_data)\n",
4221+
"\n",
4222+
" def _update_dataset(self, original_dataset, scaled_data):\n",
4223+
" updated_dataset = []\n",
4224+
" for i, (data, label) in enumerate(original_dataset):\n",
4225+
" updated_dataset.append((torch.tensor(scaled_data[i]), label))\n",
4226+
" return updated_dataset\n",
4227+
"\n",
4228+
" def train_dataloader(self):\n",
4229+
" return DataLoader(self.train_set, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=True)\n",
4230+
"\n",
4231+
" def val_dataloader(self):\n",
4232+
" return DataLoader(self.val_set, batch_size=self.batch_size, num_workers=self.num_workers)\n",
4233+
"\n",
4234+
" def test_dataloader(self):\n",
4235+
" test_data = torch.stack([item[0] for item in self.test_set])\n",
4236+
" scaled_test_data = self.scaler.transform(test_data)\n",
4237+
" self.test_set = self._update_dataset(self.test_set, scaled_test_data)\n",
4238+
" return DataLoader(self.test_set, batch_size=self.batch_size, num_workers=self.num_workers)\n",
4239+
"\n",
4240+
" def prepare_data(self):\n",
4241+
" # Here you can download datasets if needed\n",
4242+
" pass"
4243+
]
4244+
},
4245+
{
4246+
"cell_type": "markdown",
4247+
"metadata": {},
4248+
"source": [
4249+
"## Example"
4250+
]
4251+
},
4252+
{
4253+
"cell_type": "code",
4254+
"execution_count": 21,
4255+
"metadata": {},
4256+
"outputs": [
4257+
{
4258+
"name": "stdout",
4259+
"output_type": "stream",
4260+
"text": [
4261+
"data: tensor([[0.1279, 0.1770, 0.1569],\n",
4262+
" [0.6378, 0.3699, 0.0971],\n",
4263+
" [0.1516, 0.7931, 0.8748],\n",
4264+
" [0.8640, 0.3450, 0.7994],\n",
4265+
" [0.1711, 0.5990, 0.5109],\n",
4266+
" [0.2568, 0.1260, 0.3945],\n",
4267+
" [0.9566, 0.3997, 0.4479],\n",
4268+
" [0.5616, 0.4342, 0.3842],\n",
4269+
" [0.9247, 0.1204, 0.4356],\n",
4270+
" [0.2621, 0.9219, 0.2392]])\n",
4271+
"labels: tensor([0., 1., 0., 1., 0., 1., 0., 1., 0., 1.])\n",
4272+
"train_data before scaling\n",
4273+
": tensor([[0.1279, 0.1770, 0.1569],\n",
4274+
" [0.1516, 0.7931, 0.8748],\n",
4275+
" [0.6378, 0.3699, 0.0971],\n",
4276+
" [0.5616, 0.4342, 0.3842],\n",
4277+
" [0.2621, 0.9219, 0.2392],\n",
4278+
" [0.1711, 0.5990, 0.5109],\n",
4279+
" [0.9566, 0.3997, 0.4479],\n",
4280+
" [0.2568, 0.1260, 0.3945]])\n",
4281+
"train_data after scaling\n",
4282+
": [(tensor([-0.9444, -1.1516, -1.0144], dtype=torch.float64), tensor(0.)), (tensor([-0.8594, 1.2085, 2.1341], dtype=torch.float64), tensor(0.)), (tensor([ 0.8881, -0.4127, -1.2768], dtype=torch.float64), tensor(1.)), (tensor([ 0.6144, -0.1661, -0.0176], dtype=torch.float64), tensor(1.)), (tensor([-0.4621, 1.7019, -0.6533], dtype=torch.float64), tensor(1.)), (tensor([-0.7891, 0.4651, 0.5384], dtype=torch.float64), tensor(0.)), (tensor([ 2.0336, -0.2985, 0.2620], dtype=torch.float64), tensor(0.)), (tensor([-0.4812, -1.3467, 0.0277], dtype=torch.float64), tensor(1.))]\n",
4283+
"Batch data shape: torch.Size([8, 3])\n",
4284+
"tensor([[-0.7891, 0.4651, 0.5384],\n",
4285+
" [ 0.8881, -0.4127, -1.2768],\n",
4286+
" [ 2.0336, -0.2985, 0.2620],\n",
4287+
" [-0.8594, 1.2085, 2.1341],\n",
4288+
" [-0.9444, -1.1516, -1.0144],\n",
4289+
" [-0.4812, -1.3467, 0.0277],\n",
4290+
" [ 0.6144, -0.1661, -0.0176],\n",
4291+
" [-0.4621, 1.7019, -0.6533]], dtype=torch.float64)\n",
4292+
"tensor([0., 1., 0., 0., 0., 1., 1., 1.])\n"
4293+
]
4294+
}
4295+
],
4296+
"source": [
4297+
"# generate a 3-dimensional tensor with 1000 samples\n",
4298+
"n = 10\n",
4299+
"data = torch.rand((n, 3))\n",
4300+
"print(f\"data: {data}\")\n",
4301+
"labels = torch.tensor([i % 2 for i in range(n)], dtype=torch.float32)\n",
4302+
"print(f\"labels: {labels}\")\n",
4303+
"full_dataset = MyDataset(data, labels)\n",
4304+
"\n",
4305+
"# Creating DataModule instance\n",
4306+
"data_module = MyDataModule(full_dataset)\n",
4307+
"\n",
4308+
"# Setup the data module\n",
4309+
"data_module.setup()\n",
4310+
"\n",
4311+
"# Example of fetching a single batch\n",
4312+
"train_loader = data_module.train_dataloader()\n",
4313+
"for batch in train_loader:\n",
4314+
" print(f\"Batch data shape: {batch[0].shape}\")\n",
4315+
" x, y = batch\n",
4316+
" print(x)\n",
4317+
" print(y)\n",
4318+
" break"
4319+
]
4320+
},
42764321
{
42774322
"cell_type": "code",
42784323
"execution_count": null,

0 commit comments

Comments
 (0)