Fix .rank() method for multiple models (#615)

The new Recommender.rank() function adds k as required value, which breaks some models that do not use k in ranking evaluation (e.g., ComparER, EFM, LRPPM). This commit updates .rank() for mentioned models with topK option.
PreferredAI · Apr 24, 2024 · ae7ba86 · ae7ba86
1 parent cbdc8f2
commit ae7ba86
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 50 deletions.
diff --git a/cornac/models/comparer/recom_comparer_obj.pyx b/cornac/models/comparer/recom_comparer_obj.pyx
@@ -663,39 +663,51 @@ class ComparERObj(Recommender):
  item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :])
  return item_score
 
- def rank(self, user_id, item_ids=None):
+ def rank(self, user_idx, item_indices=None, k=-1):
  """Rank all test items for a given user.
 
  Parameters
  ----------
- user_id: int, required
+ user_idx: int, required
  The index of the user for whom to perform item raking.
 
- item_ids: 1d array, optional, default: None
+ item_indices: 1d array, optional, default: None
  A list of candidate item indices to be ranked by the user.
  If `None`, list of ranked known item indices and their scores will be returned
 
+ k: int, required
+ Cut-off length for recommendations, k=-1 will return ranked list of all items.
+ This is more important for ANN to know the limit to avoid exhaustive ranking.
+
  Returns
  -------
- Tuple of `item_rank`, and `item_scores`. The order of values
- in item_scores are corresponding to the order of their ids in item_ids
+ (ranked_items, item_scores): tuple
+ `ranked_items` contains item indices being ranked by their scores.
+ `item_scores` contains scores of items corresponding to index in `item_indices` input.
 
  """
- X_ = self.U1[user_id, :].dot(self.V.T)
+ X_ = self.U1[user_idx, :].dot(self.V.T)
  most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects]
  most_cared_X_ = X_[most_cared_aspects_indices]
  most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T)
  explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale)
- item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id)
-
- if item_ids is None:
- item_scores = item_scores
- item_rank = item_scores.argsort()[::-1]
- else:
- num_items = max(self.num_items, max(item_ids) + 1)
- item_scores = np.ones(num_items) * np.min(item_scores)
- item_scores[:self.num_items] = item_scores
- item_rank = item_scores.argsort()[::-1]
- item_rank = intersects(item_rank, item_ids, assume_unique=True)
- item_scores = item_scores[item_ids]
- return item_rank, item_scores
+ all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx)
+
+ # rank items based on their scores
+ item_indices = (
+ np.arange(self.num_items)
+ if item_indices is None
+ else np.asarray(item_indices)
+ )
+ item_scores = all_item_scores[item_indices]
+
+ if k != -1: # O(n + k log k), faster for small k which is usually the case
+ partitioned_idx = np.argpartition(item_scores, -k)
+ top_k_idx = partitioned_idx[-k:]
+ sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+ partitioned_idx[-k:] = sorted_top_k_idx
+ ranked_items = item_indices[partitioned_idx[::-1]]
+ else: # O(n log n)
+ ranked_items = item_indices[item_scores.argsort()[::-1]]
+
+ return ranked_items, item_scores
diff --git a/cornac/models/comparer/recom_comparer_sub.pyx b/cornac/models/comparer/recom_comparer_sub.pyx
@@ -759,7 +759,7 @@ class ComparERSub(MTER):
 
  return correct, skipped, loss, bpr_loss
 
- def rank(self, user_idx, item_indices=None):
+ def rank(self, user_idx, item_indices=None, k=-1):
  if self.alpha > 0 and self.n_top_aspects > 0:
  n_top_aspects = min(self.n_top_aspects, self.num_aspects)
  ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx])
@@ -786,12 +786,21 @@ class ComparERSub(MTER):
  all_item_scores[: self.num_items] = known_item_scores
 
  # rank items based on their scores
- if item_indices is None:
- item_scores = all_item_scores[: self.num_items]
- item_rank = item_scores.argsort()[::-1]
- else:
- item_scores = all_item_scores[item_indices]
- item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
-
- return item_rank, item_scores
- return super().rank(user_idx, item_indices)
+ item_indices = (
+ np.arange(self.num_items)
+ if item_indices is None
+ else np.asarray(item_indices)
+ )
+ item_scores = all_item_scores[item_indices]
+
+ if k != -1: # O(n + k log k), faster for small k which is usually the case
+ partitioned_idx = np.argpartition(item_scores, -k)
+ top_k_idx = partitioned_idx[-k:]
+ sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+ partitioned_idx[-k:] = sorted_top_k_idx
+ ranked_items = item_indices[partitioned_idx[::-1]]
+ else: # O(n log n)
+ ranked_items = item_indices[item_scores.argsort()[::-1]]
+
+ return ranked_items, item_scores
+ return super().rank(user_idx, item_indices, k)
diff --git a/cornac/models/efm/recom_efm.pyx b/cornac/models/efm/recom_efm.pyx
@@ -468,7 +468,7 @@ class EFM(Recommender):
  item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :])
  return item_score
 
- def rank(self, user_idx, item_indices=None):
+ def rank(self, user_idx, item_indices=None, k=-1):
  """Rank all test items for a given user.
 
  Parameters
@@ -480,10 +480,15 @@ class EFM(Recommender):
  A list of candidate item indices to be ranked by the user.
  If `None`, list of ranked known item indices and their scores will be returned
 
+ k: int, required
+ Cut-off length for recommendations, k=-1 will return ranked list of all items.
+ This is more important for ANN to know the limit to avoid exhaustive ranking.
+
  Returns
  -------
- Tuple of `item_rank`, and `item_scores`. The order of values
- in item_scores are corresponding to the order of their ids in item_ids
+ (ranked_items, item_scores): tuple
+ `ranked_items` contains item indices being ranked by their scores.
+ `item_scores` contains scores of items corresponding to index in `item_indices` input.
 
  """
  X_ = self.U1[user_idx, :].dot(self.V.T)
@@ -504,11 +509,20 @@ class EFM(Recommender):
  all_item_scores[: self.num_items] = known_item_scores
 
  # rank items based on their scores
- if item_indices is None:
- item_scores = all_item_scores[: self.num_items]
- item_rank = item_scores.argsort()[::-1]
- else:
- item_scores = all_item_scores[item_indices]
- item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
-
- return item_rank, item_scores
+ item_indices = (
+ np.arange(self.num_items)
+ if item_indices is None
+ else np.asarray(item_indices)
+ )
+ item_scores = all_item_scores[item_indices]
+
+ if k != -1: # O(n + k log k), faster for small k which is usually the case
+ partitioned_idx = np.argpartition(item_scores, -k)
+ top_k_idx = partitioned_idx[-k:]
+ sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+ partitioned_idx[-k:] = sorted_top_k_idx
+ ranked_items = item_indices[partitioned_idx[::-1]]
+ else: # O(n log n)
+ ranked_items = item_indices[item_scores.argsort()[::-1]]
+
+ return ranked_items, item_scores
diff --git a/cornac/models/lrppm/recom_lrppm.pyx b/cornac/models/lrppm/recom_lrppm.pyx
@@ -516,7 +516,7 @@ class LRPPM(Recommender):
  item_score = self.I[i_idx].dot(self.U[u_idx])
  return item_score
 
- def rank(self, user_idx, item_indices=None):
+ def rank(self, user_idx, item_indices=None, k=-1):
  if self.alpha > 0 and self.num_top_aspects > 0:
  n_items = self.num_items
  num_top_aspects = min(self.num_top_aspects, self.num_aspects)
@@ -540,12 +540,21 @@ class LRPPM(Recommender):
  all_item_scores[: self.num_items] = known_item_scores
 
  # rank items based on their scores
- if item_indices is None:
- item_scores = all_item_scores[: self.num_items]
- item_rank = item_scores.argsort()[::-1]
- else:
- item_scores = all_item_scores[item_indices]
- item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
-
- return item_rank, item_scores
- return super().rank(user_idx, item_indices)
+ item_indices = (
+ np.arange(self.num_items)
+ if item_indices is None
+ else np.asarray(item_indices)
+ )
+ item_scores = all_item_scores[item_indices]
+
+ if k != -1: # O(n + k log k), faster for small k which is usually the case
+ partitioned_idx = np.argpartition(item_scores, -k)
+ top_k_idx = partitioned_idx[-k:]
+ sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+ partitioned_idx[-k:] = sorted_top_k_idx
+ ranked_items = item_indices[partitioned_idx[::-1]]
+ else: # O(n log n)
+ ranked_items = item_indices[item_scores.argsort()[::-1]]
+
+ return ranked_items, item_scores
+ return super().rank(user_idx, item_indices, k)