val or best_val

bartzbeielstein · bartzbeielstein · commit ff895819554c · 2023-05-07T18:07:13.000+02:00
diff --git a/notebooks/14_spot_ray_hpt_torch_cifar10.ipynb b/notebooks/14_spot_ray_hpt_torch_cifar10.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -22,9 +22,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'12-torch_p040025_1min_5init_2023-05-07_18-06-33'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import pickle\n",
     "import socket\n",
@@ -57,16 +68,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "spotPython                                0.0.60\n",
+      "spotRiver                                 0.0.92\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "pip list | grep  \"spot[RiverPython]\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -77,7 +98,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -154,9 +175,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.0.0\n",
+      "MPS device:  mps\n"
+     ]
+    }
+   ],
    "source": [
     "print(torch.__version__)\n",
     "# Check that MPS is available\n",
@@ -183,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -208,7 +238,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -229,17 +259,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Files already downloaded and verified\n",
+      "Files already downloaded and verified\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "((50000, 32, 32, 3), (10000, 32, 32, 3))"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "train, test = load_data()\n",
     "train.data.shape, test.data.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -264,7 +313,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -307,7 +356,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -436,9 +485,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'l1': {'type': 'int',\n",
+       "  'default': 5,\n",
+       "  'transform': 'transform_power_2_int',\n",
+       "  'lower': 2,\n",
+       "  'upper': 9},\n",
+       " 'l2': {'type': 'int',\n",
+       "  'default': 5,\n",
+       "  'transform': 'transform_power_2_int',\n",
+       "  'lower': 2,\n",
+       "  'upper': 9},\n",
+       " 'lr': {'type': 'float',\n",
+       "  'default': 0.001,\n",
+       "  'transform': 'None',\n",
+       "  'lower': 1e-05,\n",
+       "  'upper': 0.01},\n",
+       " 'batch_size': {'type': 'int',\n",
+       "  'default': 4,\n",
+       "  'transform': 'transform_power_2_int',\n",
+       "  'lower': 1,\n",
+       "  'upper': 4},\n",
+       " 'epochs': {'type': 'int',\n",
+       "  'default': 3,\n",
+       "  'transform': 'transform_power_2_int',\n",
+       "  'lower': 1,\n",
+       "  'upper': 4},\n",
+       " 'k_folds': {'type': 'int',\n",
+       "  'default': 2,\n",
+       "  'transform': 'None',\n",
+       "  'lower': 0,\n",
+       "  'upper': 0}}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "fun_control = modify_hyper_parameter_bounds(fun_control, \"k_folds\", bounds=[0, 0])\n",
     "fun_control[\"core_model_hyper_dict\"]"
@@ -462,7 +551,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -501,7 +590,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -552,7 +641,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -567,9 +656,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| name       | type   |   default |   lower |   upper | transform             |\n",
+      "|------------|--------|-----------|---------|---------|-----------------------|\n",
+      "| l1         | int    |     5     |   2     |    9    | transform_power_2_int |\n",
+      "| l2         | int    |     5     |   2     |    9    | transform_power_2_int |\n",
+      "| lr         | float  |     0.001 |   1e-05 |    0.01 | None                  |\n",
+      "| batch_size | int    |     4     |   1     |    4    | transform_power_2_int |\n",
+      "| epochs     | int    |     3     |   1     |    4    | transform_power_2_int |\n",
+      "| k_folds    | int    |     2     |   0     |    0    | None                  |\n"
+     ]
+    }
+   ],
    "source": [
     "print(gen_design_table(fun_control))"
    ]
@@ -587,9 +691,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[5.e+00, 5.e+00, 1.e-03, 4.e+00, 3.e+00, 2.e+00]])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from spotPython.hyperparameters.values import get_default_hyperparameters_as_array\n",
     "hyper_dict=TorchHyperDict().load()\n",
@@ -599,9 +714,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 1, Batch:  1000. Batch Size: 8. Training Loss: 2.061\n",
+      "Epoch: 1, Batch:  2000. Batch Size: 8. Training Loss: 0.885\n",
+      "Epoch: 1, Batch:  3000. Batch Size: 8. Training Loss: 0.549\n"
+     ]
+    }
+   ],
    "source": [
     "spot_torch = spot.Spot(fun=fun,\n",
     "                   lower = lower,\n",
diff --git a/src/spotPython/torch/netcore.py b/src/spotPython/torch/netcore.py
@@ -88,19 +88,20 @@ def evaluate_hold_out(self, dataset, shuffle, test_dataset=None):
             device = getDevice()
             self.to(device)
             criterion = nn.CrossEntropyLoss()
-            optimizer = optim.Adam(self.parameters(), lr=lr)
+            # TODO: optimizer = optim.Adam(self.parameters(), lr=lr)
+            optimizer = optim.SGD(self.parameters(), lr=lr, momentum=0.9)
             if test_dataset is None:
                 trainloader, valloader = self.create_train_val_data_loaders(dataset, shuffle)
             else:
                 trainloader, valloader = self.create_train_test_data_loaders(dataset, shuffle, test_dataset)
-            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
+            # TODO: scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
             # Early stopping parameters
             patience = 5
             best_val_loss = float("inf")
             counter = 0
             for epoch in range(epochs):
                 self.train_hold_out(trainloader, criterion, optimizer, device=device, epoch=epoch)
-                scheduler.step()
+                # TODO: scheduler.step()
                 # Early stopping check
                 val_accuracy, val_loss = self.validate_hold_out(valloader=valloader, criterion=criterion, device=device)
                 if val_loss < best_val_loss:
@@ -111,13 +112,13 @@ def evaluate_hold_out(self, dataset, shuffle, test_dataset=None):
                     if counter >= patience:
                         print(f"Early stopping at epoch {epoch}")
                         break
-            df_eval = best_val_loss
+            df_eval = val_loss
             df_preds = np.nan
         except Exception as err:
             print(f"Error in Net_Core. Call to evaluate_hold_out() failed. {err=}, {type(err)=}")
             df_eval = np.nan
             df_preds = np.nan
-        print(f"Returned to Spot: Best validation loss: {df_eval}")
+        print(f"Returned to Spot: Validation loss: {df_eval}")
         return df_eval, df_preds
 
     def create_train_val_data_loaders(self, dataset, shuffle):