-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy path5-train_layer_3.py
More file actions
57 lines (49 loc) · 2.1 KB
/
5-train_layer_3.py
File metadata and controls
57 lines (49 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
NOTES_TRAIN = 150
NOTES_VALIDATE = 5
NUM_TREES = 200
NUM_USERS = 55540
print("Importing Libraries...")
import time
import torch
import numpy as np
from lightgbm import LGBMClassifier, log_evaluation
from joblib import dump
print("Importing Data...")
trainData = torch.load('./data/train.pt')
validateData = torch.load('./data/validate.pt')
groups = open("./data/groups.txt").read().split("\n")
groups = [list(map(int, group.split(","))) for group in groups]
def getClassifyData(data):
dataX = data[:, 1:]
dataY = data[:, 0]
return dataX, dataY
for round in range(len(groups)):
group = groups[round]
print("Starting Round " + str(round+1) + "/" + str(len(groups)) + "...")
print("Selecting Data...")
trainFrame = []
validateFrame = []
for idx in group:
trainFrame.append(trainData[150*idx:NOTES_TRAIN+150*idx])
validateFrame.append(validateData[5*idx:NOTES_VALIDATE+5*idx])
print("Processing Data...")
trainX, trainY = getClassifyData(torch.cat(trainFrame))
validateX, validateY = getClassifyData(torch.cat(validateFrame))
print("Training Model " + str(round+1) + "/" + str(len(groups)) + "...")
clf = LGBMClassifier(boosting_type='goss', colsample_bytree=0.6933333333333332, learning_rate=0.1, \
max_bin=63, max_depth=-1, min_child_weight=7, min_data_in_leaf=20, \
min_split_gain=0.9473684210526315, n_estimators=NUM_TREES, \
num_leaves=33, reg_alpha=0.7894736842105263, reg_lambda=0.894736842105263, \
subsample=1, n_jobs=16, objective='multiclassova', device_type='gpu')
start_time = time.time()
clf.fit(trainX, trainY.long(),
eval_set=[(validateX, validateY)],
eval_metric='multi_error',
callbacks=[log_evaluation()])
end_time = time.time()
print("Training Finished in %s Minutes" % ((end_time - start_time) / 60))
print("Saving Model " + str(round+1) + "/" + str(len(groups)) + "...")
dump(clf, './models/layer3/model' + str(round) + '.pkl')
file = open("./stats/training/layer3/" + str(round) + ".txt", "w")
file.write(str(end_time - start_time))
file.close()