Skip to content

Commit 00e5db0

Browse files
v0.0.62
1 parent ff89581 commit 00e5db0

9 files changed

Lines changed: 126 additions & 2414 deletions

File tree

notebooks/12_spot_hpt_torch_cifar10.ipynb

Lines changed: 8 additions & 2197 deletions
Large diffs are not rendered by default.

notebooks/14_spot_ray_hpt_torch_cifar10.ipynb

Lines changed: 67 additions & 187 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotPython"
10-
version = "0.0.60"
10+
version = "0.0.62"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotPython/data/torch_hyper_dict.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"type": "int",
3030
"default": 3,
3131
"transform": "transform_power_2_int",
32-
"lower": 1,
32+
"lower": 3,
3333
"upper": 4},
3434
"k_folds": {
3535
"type": "int",
@@ -68,7 +68,7 @@
6868
"type": "int",
6969
"default": 3,
7070
"transform": "transform_power_2_int",
71-
"lower": 1,
71+
"lower": 3,
7272
"upper": 4},
7373
"k_folds": {
7474
"type": "int",

src/spotPython/fun/hypertorch.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,23 @@ def fun_torch(self, X, fun_control=None):
7373
model = self.fun_control["core_model"](**config)
7474
try:
7575
if self.fun_control["eval"] == "train_cv":
76-
df_eval, _ = model.evaluate_cv(dataset=fun_control["train"], shuffle=self.fun_control["shuffle"])
76+
df_eval, _ = model.evaluate_cv(
77+
dataset=fun_control["train"],
78+
shuffle=self.fun_control["shuffle"],
79+
device=self.fun_control["device"],
80+
)
7781
elif self.fun_control["eval"] == "test_hold_out":
7882
df_eval, _ = model.evaluate_hold_out(
7983
dataset=fun_control["train"],
8084
shuffle=self.fun_control["shuffle"],
8185
test_dataset=fun_control["test"],
86+
device=self.fun_control["device"],
8287
)
8388
else: # eval == "train_hold_out"
8489
df_eval, _ = model.evaluate_hold_out(
85-
dataset=fun_control["train"], shuffle=self.fun_control["shuffle"]
90+
dataset=fun_control["train"],
91+
shuffle=self.fun_control["shuffle"],
92+
device=self.fun_control["device"],
8693
)
8794
except Exception as err:
8895
print(f"Error in fun_torch(). Call to evaluate_model failed. {err=}, {type(err)=}")

src/spotPython/spot/spot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ def plot_progress(
536536
ax = fig.add_subplot(211)
537537
ax.plot(
538538
range(1, n_init + 1),
539-
s_c[:n_init],
539+
s_y[:n_init],
540540
style[0],
541541
range(1, n_init + 1),
542542
[s_c[:n_init].min()] * n_init,

src/spotPython/torch/netcore.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ def validate_fold(self, valloader, criterion, device):
5252
val_steps += 1
5353
return 100.0 * (correct / total)
5454

55-
def evaluate_cv(self, dataset, shuffle=False):
55+
def evaluate_cv(self, dataset, shuffle=False, num_workers=0, device=None):
5656
try:
57-
device = getDevice()
57+
device = getDevice(device=device)
5858
# if torch.cuda.device_count() > 1:
5959
# self = nn.DataParallel(self)
6060
self.to(device)
@@ -65,13 +65,15 @@ def evaluate_cv(self, dataset, shuffle=False):
6565
train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
6666
val_subsampler = torch.utils.data.SubsetRandomSampler(val_ids)
6767
trainloader = torch.utils.data.DataLoader(
68-
dataset, batch_size=self.batch_size, sampler=train_subsampler, num_workers=4
68+
dataset, batch_size=self.batch_size, sampler=train_subsampler, num_workers=num_workers
6969
)
7070
valloader = torch.utils.data.DataLoader(
71-
dataset, batch_size=self.batch_size, sampler=val_subsampler, num_workers=4
71+
dataset, batch_size=self.batch_size, sampler=val_subsampler, num_workers=num_workers
7272
)
7373
self.reset_weights()
74+
# Train fold for several epochs:
7475
self.train_fold(trainloader, criterion, optimizer, device)
76+
# Validate fold:
7577
self.results[fold] = self.validate_fold(valloader, criterion, device)
7678
df_eval = sum(self.results.values()) / len(self.results.values())
7779
df_preds = np.nan
@@ -81,11 +83,11 @@ def evaluate_cv(self, dataset, shuffle=False):
8183
df_preds = np.nan
8284
return df_eval, df_preds
8385

84-
def evaluate_hold_out(self, dataset, shuffle, test_dataset=None):
86+
def evaluate_hold_out(self, dataset, shuffle, test_dataset=None, device=None):
8587
lr = self.lr
8688
epochs = self.epochs
8789
try:
88-
device = getDevice()
90+
device = getDevice(device=device)
8991
self.to(device)
9092
criterion = nn.CrossEntropyLoss()
9193
# TODO: optimizer = optim.Adam(self.parameters(), lr=lr)
@@ -99,10 +101,14 @@ def evaluate_hold_out(self, dataset, shuffle, test_dataset=None):
99101
patience = 5
100102
best_val_loss = float("inf")
101103
counter = 0
104+
# We only have "one fold" which is trained for several epochs
105+
# (we do not have to reset the weights for each fold):
102106
for epoch in range(epochs):
103-
self.train_hold_out(trainloader, criterion, optimizer, device=device, epoch=epoch)
107+
print(f"Epoch: {epoch + 1}")
108+
# training loss from one epoch:
109+
_ = self.train_hold_out(trainloader, criterion, optimizer, device=device)
104110
# TODO: scheduler.step()
105-
# Early stopping check
111+
# Early stopping check. Calculate validation loss from one epoch:
106112
val_accuracy, val_loss = self.validate_hold_out(valloader=valloader, criterion=criterion, device=device)
107113
if val_loss < best_val_loss:
108114
best_val_loss = val_loss
@@ -119,29 +125,30 @@ def evaluate_hold_out(self, dataset, shuffle, test_dataset=None):
119125
df_eval = np.nan
120126
df_preds = np.nan
121127
print(f"Returned to Spot: Validation loss: {df_eval}")
128+
print("----------------------------------------------")
122129
return df_eval, df_preds
123130

124-
def create_train_val_data_loaders(self, dataset, shuffle):
131+
def create_train_val_data_loaders(self, dataset, shuffle, num_workers=0):
125132
test_abs = int(len(dataset) * 0.6)
126133
train_subset, val_subset = random_split(dataset, [test_abs, len(dataset) - test_abs])
127134
trainloader = torch.utils.data.DataLoader(
128-
train_subset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=8, pin_memory=True
135+
train_subset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=num_workers
129136
)
130137
valloader = torch.utils.data.DataLoader(
131-
val_subset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=8, pin_memory=True
138+
val_subset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=num_workers
132139
)
133140
return trainloader, valloader
134141

135-
def create_train_test_data_loaders(self, dataset, shuffle, test_dataset):
142+
def create_train_test_data_loaders(self, dataset, shuffle, test_dataset, num_workers=0):
136143
trainloader = torch.utils.data.DataLoader(
137-
dataset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=8, pin_memory=True
144+
dataset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=num_workers
138145
)
139146
testloader = torch.utils.data.DataLoader(
140-
test_dataset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=8, pin_memory=True
147+
test_dataset, batch_size=int(self.batch_size), shuffle=shuffle, num_workers=num_workers
141148
)
142149
return trainloader, testloader
143150

144-
def train_hold_out(self, trainloader, criterion, optimizer, device, epoch):
151+
def train_hold_out(self, trainloader, criterion, optimizer, device):
145152
running_loss = 0.0
146153
epoch_steps = 0
147154
for i, data in enumerate(trainloader, 0):
@@ -158,10 +165,11 @@ def train_hold_out(self, trainloader, criterion, optimizer, device, epoch):
158165
epoch_steps += 1
159166
if i % 1000 == 999: # print every 1000 mini-batches
160167
print(
161-
"Epoch: %d, Batch: %5d. Batch Size: %d. Training Loss: %.3f"
162-
% (epoch + 1, i + 1, int(self.batch_size), running_loss / epoch_steps)
168+
"Batch: %5d. Batch Size: %d. Training Loss (running): %.3f"
169+
% (i + 1, int(self.batch_size), running_loss / epoch_steps)
163170
)
164171
running_loss = 0.0
172+
return loss.item()
165173

166174
def validate_hold_out(self, valloader, criterion, device):
167175
val_loss = 0.0

src/spotPython/utils/device.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
import torch
22

33

4-
def getDevice():
4+
def getDevice(device=None):
55
"""Get cpu, gpu or mps device for training.
6+
Args:
7+
device (str): Device for training. If None,
8+
the device is selected automatically.
69
Returns:
710
device (str): Device for training.
811
Example:
912
>>> from spotPython.utils.device import getDevice
1013
>>> getDevice()
1114
'cuda:0'
1215
"""
13-
device = "cpu"
14-
if torch.cuda.is_available():
15-
device = "cuda:0"
16-
elif torch.backends.mps.is_available():
17-
device = "mps"
16+
if device is None:
17+
device = "cpu"
18+
if torch.cuda.is_available():
19+
device = "cuda:0"
20+
elif torch.backends.mps.is_available():
21+
device = "mps"
1822
return device

src/spotPython/utils/init.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ def fun_control_init():
1212
'n_samples': None,
1313
'target_column': None,
1414
'shuffle': None,
15-
'k_folds': None,}
15+
'k_folds': None,
16+
'device': None}
1617
"""
1718
fun_control = {
1819
"data": None,
@@ -23,5 +24,6 @@ def fun_control_init():
2324
"shuffle": None,
2425
"eval": None,
2526
"k_folds": None,
27+
"device": None,
2628
}
2729
return fun_control

0 commit comments

Comments
 (0)