-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathprepare_training_data.py
62 lines (48 loc) · 1.75 KB
/
prepare_training_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import clip
import torch
import pathlib
import numpy as np
from PIL import Image
from tqdm import tqdm
import pandas as pd
def normalized(a, axis=-1, order=2):
l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
l2[l2 == 0] = 1
return a / np.expand_dims(l2, axis)
def prepare_training_data(root_folder,database_file,train_from,clip_model="ViT-L/14"):
prefix = database_file.split(".")[0]
path = pathlib.Path(root_folder)
database_path = path / database_file
database = pd.read_csv(database_path)
if train_from == "label":
df = database[database.label!=0].reset_index(drop=True)
elif train_from == "score":
df = database[database.score!=0].reset_index(drop=True)
out_path = pathlib.Path(root_folder)
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load(clip_model, device=device)
x = []
y = []
for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
if train_from == "label":
average_rating = float(row.label)
elif train_from == "score":
average_rating = float(row.score)
if average_rating < 1:
continue
try:
image = preprocess(Image.open(row.path)).unsqueeze(0).to(device)
except:
continue
with torch.no_grad():
image_features = model.encode_image(image)
im_emb_arr = image_features.cpu().detach().numpy()
x.append(normalized(im_emb_arr))
y.append([average_rating])
x = np.vstack(x)
y = np.vstack(y)
x_out = f"{prefix}_x_{clip_model.replace('/', '').lower()}_ebeddings.npy"
y_out = f"{prefix}_y_{train_from}.npy"
np.save(out_path / x_out, x)
np.save(out_path / y_out, y)
return