-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
128 lines (106 loc) · 4.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import torch
import pandas as pd
import numpy as np
import config
from tqdm import tqdm
import warnings
import torch.nn.functional as F
def make_prediction(model, loader, output_csv="submission.csv"):
preds = []
filenames = []
model.eval()
for x, y, files in tqdm(loader):
x = x.to(config.DEVICE)
with torch.no_grad():
predictions = model(x)
# Convert MSE floats to integer predictions
predictions[predictions < 0.5] = 0
predictions[(predictions >= 0.5) & (predictions < 1.5)] = 1
predictions[(predictions >= 1.5) & (predictions < 2.5)] = 2
predictions[(predictions >= 2.5) & (predictions < 3.5)] = 3
predictions[(predictions >= 3.5) & (predictions < 10000000)] = 4
predictions = predictions.long().squeeze(1)
preds.append(predictions.cpu().numpy())
filenames += files
df = pd.DataFrame({"image": filenames, "level": np.concatenate(preds, axis=0)})
df.to_csv(output_csv, index=False)
model.train()
print("Done with predictions")
def check_accuracy(loader, model, device="cuda"):
model.eval()
all_preds, all_labels = [], []
num_correct = 0
num_samples = 0
for x, y, filename in tqdm(loader):
x = x.to(device=device)
y = y.to(device=device)
with torch.no_grad():
predictions = model(x)
# Convert MSE floats to integer predictions
predictions[predictions < 0.5] = 0
predictions[(predictions >= 0.5) & (predictions < 1.5)] = 1
predictions[(predictions >= 1.5) & (predictions < 2.5)] = 2
predictions[(predictions >= 2.5) & (predictions < 3.5)] = 3
predictions[(predictions >= 3.5) & (predictions < 100)] = 4
predictions = predictions.long().view(-1)
y = y.view(-1)
num_correct += (predictions == y).sum()
num_samples += predictions.shape[0]
# add to lists
all_preds.append(predictions.detach().cpu().numpy())
all_labels.append(y.detach().cpu().numpy())
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
model.train()
return np.concatenate(all_preds, axis=0, dtype=np.int64), np.concatenate(
all_labels, axis=0, dtype=np.int64
)
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
def load_checkpoint(checkpoint, model, optimizer, lr):
print("=> Loading checkpoint")
model.load_state_dict(checkpoint["state_dict"])
#optimizer.load_state_dict(checkpoint["optimizer"])
# If we don't do this then it will just have learning rate of old checkpoint
# and it will lead to many hours of debugging \:
for param_group in optimizer.param_groups:
param_group["lr"] = lr
def get_csv_for_blend(loader, model, output_csv_file):
warnings.warn("Important to have shuffle=False (and to ensure batch size is even size) when running get_csv_for_blend also set val_transforms to train_loader!")
model.eval()
filename_first = []
filename_second = []
labels_first = []
labels_second = []
all_features = []
for idx, (images, y, image_files) in enumerate(tqdm(loader)):
images = images.to(config.DEVICE)
with torch.no_grad():
features = F.adaptive_avg_pool2d(
model.extract_features(images), output_size=1
)
features_logits = features.reshape(features.shape[0] // 2, 2, features.shape[1])
preds = model(images).reshape(images.shape[0] // 2, 2, 1)
new_features = (
torch.cat([features_logits, preds], dim=2)
.view(preds.shape[0], -1)
.cpu()
.numpy()
)
all_features.append(new_features)
filename_first += image_files[::2]
filename_second += image_files[1::2]
labels_first.append(y[::2].cpu().numpy())
labels_second.append(y[1::2].cpu().numpy())
all_features = np.concatenate(all_features, axis=0)
df = pd.DataFrame(
data=all_features, columns=[f"f_{idx}" for idx in range(all_features.shape[1])]
)
df["label_first"] = np.concatenate(labels_first, axis=0)
df["label_second"] = np.concatenate(labels_second, axis=0)
df["file_first"] = filename_first
df["file_second"] = filename_second
df.to_csv(output_csv_file, index=False)
model.train()