-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
76 lines (61 loc) · 2.67 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import sys
import torch
import metrics
import haversine
from loguru import logger
from collections import defaultdict
from torch.utils.tensorboard import SummaryWriter
def print_progressbar(current, total, width=80):
progress_message = "Downloading: %d%% [%d / %d] bytes" % (current / total * 100, current, total)
# Don't use print() as it will print in new line every time.
sys.stdout.write("\r" + progress_message)
sys.stdout.flush()
class TensorboardWriter(SummaryWriter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.n_iter = defaultdict(lambda: 0)
def add_scalar(self, tag, scalar_value, global_step=None, walltime=None):
if not global_step:
global_step = self.n_iter[tag]
self.n_iter[tag] += 1
super().add_scalar(tag, scalar_value, global_step, walltime)
def eval_als_model(model, user_item_data, gowalla_test):
from config import config
def inner(iteration, elapsed):
preds = []
ground_truth = []
n_recommend = max(config['METRICS_REPORT'])
test_interactions = gowalla_test.groupby('userId')['loc_id'].apply(list).to_dict()
for userId in gowalla_test['userId'].unique():
preds.append(
list(map(lambda x: x[0], model.recommend(userId, user_item_data, n_recommend))))
ground_truth.append(test_interactions[userId])
logger.info(f'{iteration} iteration:')
max_length = max(map(len, metrics.metric_dict.keys())) + max(
map(lambda x: len(str(x)), config['METRICS_REPORT']))
for metric_name, metric_func in metrics.metric_dict.items():
for k in config['METRICS_REPORT']:
metric_name_total = f'{metric_name}@{k}'
metric_value = metric_func(preds, ground_truth, k).mean()
logger.info(f'{metric_name_total: >{max_length + 1}} = {metric_value}')
return inner
def calc_nearest(df):
df = df.set_index('loc_id')
item_lat = df['lat'].to_dict()
item_long = df['long'].to_dict()
locations = {item: (item_long[item], item_lat[item]) for item in item_lat}
def inner(item_id, k=20):
loc = locations[item_id]
distances = [
(item, haversine.haversine(loc, location)) for item, location in locations.items()]
return list(map(lambda x: x[0], sorted(distances, key=lambda x: x[1])[:k]))
return inner
def collate_function(batch):
users = []
pos_items = []
neg_items = []
for user, pos, neg in batch:
users.extend([user for _ in pos])
pos_items.extend(pos)
neg_items.extend(neg)
return list(map(torch.tensor, [users, pos_items, neg_items]))