-
Notifications
You must be signed in to change notification settings - Fork 0
/
fitting.py
295 lines (239 loc) · 10.6 KB
/
fitting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
from dataprocessing import generate_tasks
from meta import Meta
from tqdm import tqdm
import optuna as opt
import numpy as np
import argparse
import wandb
import torch
import os
def get_layer(in_features, out_features, dropout):
""" Returns a linear layer of the model with
batch normalization and activation included.
Args:
in_features (int): Number of input features
out_features (int): Number of output features
dropout (float): Dropout value for the layer
Returns:
Configuration of the layer
"""
return [
('bn', [in_features]),
('linear', [out_features, in_features]),
('dropout', [dropout]),
('leakyrelu', [1e-2, False])
]
def get_model(hidden_layers, dropout):
""" Generates the configuration of a model.
Args:
hidden_layers ([int]): Number of features per hidden layer
dropout (float): Dropout value for the model
Returns:
Configuration to build the model
"""
# Initialize model
config = []
in_features = 69
# Populate model according to hidden layers
for out_features in hidden_layers:
config += get_layer(in_features, out_features, dropout)
in_features = out_features
# Add final layer to the model
config += [
('bn', [in_features]),
('linear', [1, in_features]),
('sigmoid', [])
]
return config
def get_model_name(k_sup, k_que, dropout, hidden_layers):
""" Computes model name from main parameters
Args:
k_sup (int): Number of support samples per batch
k_que (int): Number of query samples per batch
dropout (float): Dropout value for the model
hidden_layers ([int]): Number of features per hidden layer
Returns:
str: The name of the model
"""
num_layers = len(hidden_layers)
name = f"K{k_sup}Q{k_que}-HL{num_layers}-D{dropout:.2f}"
for i in range(num_layers):
features = hidden_layers[i]
name += f"F{features}"
return name
def evaluate(model, tasks, val_samples, desc="Eval Test"):
""" Evaluates a meta-model on a set of tasks.
Args:
model (PyTorch model): The PyTorch model to evaluate on.
tasks (dict): The dictionary of tasks to use.
val_samples (int): Number of samples to use to perform evaluation per task.
desc (str, optional): Description for tqdm bar. Defaults to "Eval Test".
Returns:
(float, float, float): accuracy, loss and roc of the model
"""
accs, losses, rocs = [], [], []
eval_bar = tqdm(tasks, desc=desc, total=len(tasks), leave=False)
for task in eval_bar:
# Create task bar and metric placeholders
val_steps = val_samples // model.k_sup
steps_bar = tqdm(range(val_steps), desc=f"Evaluating task {task}", total=val_steps, leave=False)
task_accs, task_losses, task_rocs = [], [], []
# Finetune the model and get loss and accuracy
for _ in steps_bar:
loss, acc, roc = model.evaluate(tasks[task])
task_accs.append(acc)
task_losses.append(loss)
task_rocs.append(roc)
# Calculate average loss and accuracies
loss = sum(task_losses) / len(task_losses)
acc = sum(task_accs) / len(task_accs)
roc = sum(task_rocs) / len(task_rocs)
# Append avg metrics to lists
accs.append(acc)
losses.append(loss)
rocs.append(roc)
# Calculate average metrics across tasks
acc = np.array(accs).astype(np.float32).mean(axis=0)
loss = np.array(losses).astype(np.float32).mean(axis=0)
roc = np.array(rocs).astype(np.float32).mean(axis=0)
return acc, loss, roc
def fit(model, train_tasks, val_tasks, args):
"""
Fits the input model to the training tasks and evaluates it in the validation tasks.
This fit is done using Few-Shot Meta-learning using the Meta-SGD algorithm.
Args:
model: The model to train
train_tasks (dict): A dictionary of tasks to train on
val_tasks (dict): A dictionary of tasks to validate on
args (dict): A dictionary of arguments that dictates the training process
Returns:
float: The best validation loss achieved during the training process
"""
global current_best_model
# Start the training
metrics = {}
best_val_roc = 0
best_val_loss = float("inf")
patience = args.patience
epoch_bar = tqdm(range(args.epochs), desc=f"Training {model.name}", total=args.epochs, leave=False)
for epoch in epoch_bar:
# Create steps bar
steps = args.epoch_samples // args.k_sup
steps_bar = tqdm(range(steps), desc=f"Epoch {epoch}", total=steps, leave=False)
# Perform training for each task
tr_accs, tr_losses, tr_rocs = [], [], []
for _ in steps_bar:
tr_loss, tr_acc, tr_roc = model(train_tasks)
tr_accs.append(tr_acc)
tr_losses.append(tr_loss)
tr_rocs.append(tr_roc)
# Get training mean metrics
tr_acc = np.array(tr_accs).astype(np.float32).mean(axis=0)
tr_loss = np.array(tr_losses).astype(np.float32).mean(axis=0)
tr_roc = np.array(tr_rocs).astype(np.float32).mean(axis=0)
# Get validation metrics
val_acc, val_loss, val_roc = evaluate(model, val_tasks, args.val_samples, "Eval Val")
# Update best metrics
if val_loss < best_val_loss:
# Update patience
best_val_loss = val_loss
patience = args.patience
# Save best model weights
if args.save_models:
model.save_params("models/" + model.name + ".pt")
else:
patience -= 1
if val_roc > best_val_roc:
best_val_roc = val_roc
current_best_model = model
# Update Task tqdm bar
metrics['tr_acc'] = tr_acc
metrics['val_acc'] = val_acc
metrics['tr_roc'] = tr_roc
metrics['val_roc'] = val_roc
metrics['tr_loss'] = tr_loss
metrics['val_loss'] = val_loss
metrics['patience'] = patience
if args.log:
wandb.log(metrics)
# Update tqdm
epoch_bar.set_postfix(metrics)
if patience == 0:
break
return best_val_roc
def objective(trial, train_tasks, val_tasks, args):
"""
Optuna objective function to optimize for best validation loss.
"""
global best_model, current_best_model
# Manually seed torch and numpy for reproducible results
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)
# Defining trial parameters
num_layers = trial.suggest_int("num_hidden_layers", 1, 4)
dropout = trial.suggest_float("dropout", 0.05, 0.2)
hidden_layers = []
for i in range(num_layers):
num_features = trial.suggest_int(f"num_features_layer_{i}", 20, 150)
hidden_layers.append(num_features)
# Define model parameters
name = get_model_name(args.k_sup, args.k_que, dropout, hidden_layers)
config = get_model(hidden_layers, dropout)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# Create PyTorch model
model = Meta(name, config, args.k_sup, args.k_que, device, args.meta_lr, args.lr_type, args.inner_lr).to(device)
# Setup Weights and Biases logger, config hyperparams and watch model
if args.log:
wandb.init(name=name, project="Meta-HEP", config=args, reinit=True)
wandb.watch(model)
# Fit the model and return best loss
roc = fit(model, train_tasks, val_tasks, args)
# Change best model if roc is better
if roc > best_model["roc"]:
best_model["roc"] = roc
best_model["model"] = current_best_model
return roc
if __name__ == "__main__":
# Define parser parameters
parser = argparse.ArgumentParser()
parser.add_argument("--num_trials", type=int, help="number of configurations to try before stopping optuna search", default=50)
parser.add_argument("--k_sup", type=int, help="number of data samples per support batch", default=100)
parser.add_argument("--k_que", type=int, help="number of data samples per query batch", default=200)
parser.add_argument("--epochs", type=int, help="maximum number of epochs", default=1000)
parser.add_argument("--epoch_samples", type=int, help="number of training samples per epoch", default=5000)
parser.add_argument("--val_samples", type=int, help="number of samples per validation", default=2000)
parser.add_argument("--patience", type=int, help="number of steps the model has to improve before stopping", default=8)
parser.add_argument("--meta_lr", type=float, help="exterior starting learning rate", default=1e-3)
parser.add_argument("--inner_lr", type=float, help="interior starting learning rate", default=1e-2)
parser.add_argument("--lr_type", type=str, help="type of interior learning rate: \"scalar\", \"vector\" or \"matrix\"", default="vector")
parser.add_argument("--seed", type=int, help="seed for reproducible results", default=42)
parser.add_argument("--log", type=int, help="flag for enabling or disabling wandb logging", default=1)
parser.add_argument("--save_models", type=int, help="flag for saving best models", default=0)
args = parser.parse_args()
# Datapath and background file for data-files
datapath = "processed-data/"
bkg_file = datapath + "bkg.h5"
# Signal files for each task split
train_signals = ["hg3000_hq1000", "hg3000_hq1400", "wohg_hq1200"]
val_signals = ["hg3000_hq1200", "wohg_hq1000"]
test_signals = ["wohg_hq1400", "fcnc"]
# Add datapath and extention to files for each split
train_signals = [datapath + p + ".h5" for p in train_signals]
val_signals = [datapath + p + ".h5" for p in val_signals]
# Generate tasks from the signal
train_tasks = generate_tasks(train_signals, bkg_file, args.k_sup, args.k_que)
val_tasks = generate_tasks(val_signals, bkg_file, args.k_sup, args.k_que)
# Make weights and biases silent
if args.log: os.environ["WANDB_SILENT"] = "true"
# Variable to hold best model
best_model = {"roc": 0, "model": None}
current_best_model = None
# Define and perform optuna study
study_name = f"K{args.k_sup}Q{args.k_que} optimization"
study = opt.create_study(study_name=study_name, storage='sqlite:///meta-model.db', load_if_exists=True, direction="maximize")
optimize = lambda trial: objective(trial, train_tasks, val_tasks, args)
study.optimize(optimize, n_trials=args.num_trials)
# Save model with the best trial
filename = "models/" + best_model["model"].name + ".pt"
best_model["model"].save(filename)