Skip to content

Commit

Permalink
Fix .rank() method for multiple models (#615)
Browse files Browse the repository at this point in the history
The new Recommender.rank() function adds k as required value, which breaks some models that do not use k in ranking evaluation (e.g., ComparER, EFM, LRPPM).

This commit updates .rank() for mentioned models with topK option.
  • Loading branch information
hieuddo committed Apr 24, 2024
1 parent cbdc8f2 commit ae7ba86
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 50 deletions.
50 changes: 31 additions & 19 deletions cornac/models/comparer/recom_comparer_obj.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -663,39 +663,51 @@ class ComparERObj(Recommender):
item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :])
return item_score

def rank(self, user_id, item_ids=None):
def rank(self, user_idx, item_indices=None, k=-1):
"""Rank all test items for a given user.
Parameters
----------
user_id: int, required
user_idx: int, required
The index of the user for whom to perform item raking.
item_ids: 1d array, optional, default: None
item_indices: 1d array, optional, default: None
A list of candidate item indices to be ranked by the user.
If `None`, list of ranked known item indices and their scores will be returned
k: int, required
Cut-off length for recommendations, k=-1 will return ranked list of all items.
This is more important for ANN to know the limit to avoid exhaustive ranking.
Returns
-------
Tuple of `item_rank`, and `item_scores`. The order of values
in item_scores are corresponding to the order of their ids in item_ids
(ranked_items, item_scores): tuple
`ranked_items` contains item indices being ranked by their scores.
`item_scores` contains scores of items corresponding to index in `item_indices` input.
"""
X_ = self.U1[user_id, :].dot(self.V.T)
X_ = self.U1[user_idx, :].dot(self.V.T)
most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects]
most_cared_X_ = X_[most_cared_aspects_indices]
most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T)
explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale)
item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id)

if item_ids is None:
item_scores = item_scores
item_rank = item_scores.argsort()[::-1]
else:
num_items = max(self.num_items, max(item_ids) + 1)
item_scores = np.ones(num_items) * np.min(item_scores)
item_scores[:self.num_items] = item_scores
item_rank = item_scores.argsort()[::-1]
item_rank = intersects(item_rank, item_ids, assume_unique=True)
item_scores = item_scores[item_ids]
return item_rank, item_scores
all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx)

# rank items based on their scores
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
29 changes: 19 additions & 10 deletions cornac/models/comparer/recom_comparer_sub.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ class ComparERSub(MTER):

return correct, skipped, loss, bpr_loss

def rank(self, user_idx, item_indices=None):
def rank(self, user_idx, item_indices=None, k=-1):
if self.alpha > 0 and self.n_top_aspects > 0:
n_top_aspects = min(self.n_top_aspects, self.num_aspects)
ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx])
Expand All @@ -786,12 +786,21 @@ class ComparERSub(MTER):
all_item_scores[: self.num_items] = known_item_scores

# rank items based on their scores
if item_indices is None:
item_scores = all_item_scores[: self.num_items]
item_rank = item_scores.argsort()[::-1]
else:
item_scores = all_item_scores[item_indices]
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]

return item_rank, item_scores
return super().rank(user_idx, item_indices)
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
return super().rank(user_idx, item_indices, k)
36 changes: 25 additions & 11 deletions cornac/models/efm/recom_efm.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ class EFM(Recommender):
item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :])
return item_score

def rank(self, user_idx, item_indices=None):
def rank(self, user_idx, item_indices=None, k=-1):
"""Rank all test items for a given user.
Parameters
Expand All @@ -480,10 +480,15 @@ class EFM(Recommender):
A list of candidate item indices to be ranked by the user.
If `None`, list of ranked known item indices and their scores will be returned
k: int, required
Cut-off length for recommendations, k=-1 will return ranked list of all items.
This is more important for ANN to know the limit to avoid exhaustive ranking.
Returns
-------
Tuple of `item_rank`, and `item_scores`. The order of values
in item_scores are corresponding to the order of their ids in item_ids
(ranked_items, item_scores): tuple
`ranked_items` contains item indices being ranked by their scores.
`item_scores` contains scores of items corresponding to index in `item_indices` input.
"""
X_ = self.U1[user_idx, :].dot(self.V.T)
Expand All @@ -504,11 +509,20 @@ class EFM(Recommender):
all_item_scores[: self.num_items] = known_item_scores

# rank items based on their scores
if item_indices is None:
item_scores = all_item_scores[: self.num_items]
item_rank = item_scores.argsort()[::-1]
else:
item_scores = all_item_scores[item_indices]
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]

return item_rank, item_scores
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
29 changes: 19 additions & 10 deletions cornac/models/lrppm/recom_lrppm.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ class LRPPM(Recommender):
item_score = self.I[i_idx].dot(self.U[u_idx])
return item_score

def rank(self, user_idx, item_indices=None):
def rank(self, user_idx, item_indices=None, k=-1):
if self.alpha > 0 and self.num_top_aspects > 0:
n_items = self.num_items
num_top_aspects = min(self.num_top_aspects, self.num_aspects)
Expand All @@ -540,12 +540,21 @@ class LRPPM(Recommender):
all_item_scores[: self.num_items] = known_item_scores

# rank items based on their scores
if item_indices is None:
item_scores = all_item_scores[: self.num_items]
item_rank = item_scores.argsort()[::-1]
else:
item_scores = all_item_scores[item_indices]
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]

return item_rank, item_scores
return super().rank(user_idx, item_indices)
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
return super().rank(user_idx, item_indices, k)

0 comments on commit ae7ba86

Please sign in to comment.