Add RecVAE model (#568)

* Added RecVAE * add recvae example * update readme * update README * fix conflict * remove local ndcg func * removed unnecessary things from the source code. * Add None Seed * Remove unsued code. Fix bottleneck * Added docstring for main class * sort the imports alphabetically * fix typo * reorder import s * CSR: Compressed Sparse Row, CSC: Compressed Sparse Colum * reorder imports * Update models.rst * Update README.md --------- Co-authored-by: SEUNGHEE.oh <seunghee.oh@hp.com> Co-authored-by: Quoc-Tuan Truong <tqtg@users.noreply.github.com>
PreferredAI · Dec 26, 2023 · 477405a · 477405a
1 parent b39f415
commit 477405a
Show file tree

Hide file tree

Showing 9 changed files with 481 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -152,6 +152,7 @@ The recommender models supported by Cornac are listed below. Why don't you join
 | 2020 | [Adversarial Training Towards Robust Multimedia Recommender System (AMR)](cornac/models/amr), [paper](https://ieeexplore.ieee.org/document/8618394) | [requirements.txt](cornac/models/amr/requirements.txt) | [amr_clothing.py](examples/amr_clothing.py)
 |      | [Hybrid neural recommendation with joint deep representation learning of ratings and reviews (HRDR)](cornac/models/hrdr), [paper](https://www.sciencedirect.com/science/article/abs/pii/S0925231219313207) | [requirements.txt](cornac/models/hrdr/requirements.txt) | [hrdr_example.py](examples/hrdr_example.py)
 |      | [LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation](cornac/models/lightgcn), [paper](https://arxiv.org/pdf/2002.02126.pdf) | [requirements.txt](cornac/models/lightgcn/requirements.txt) | [lightgcn_example.py](examples/lightgcn_example.py)
+|      | [New Variational Autoencoder for Top-N Recommendations with Implicit Feedback (RecVAE)](cornac/models/recvae), [paper](https://doi.org/10.1145/3336191.3371831) | [requirements.txt](cornac/models/recvae/requirements.txt) | [recvae_example.py](examples/recvae_example.py)
 |      | [Temporal-Item-Frequency-based User-KNN (TIFUKNN)](cornac/models/tifuknn), [paper](https://arxiv.org/pdf/2006.00556.pdf) | N/A | [tifuknn_tafeng.py](examples/tifuknn_tafeng.py)
 | 2019 | [Embarrassingly Shallow Autoencoders for Sparse Data (EASEᴿ)](cornac/models/ease), [paper](https://arxiv.org/pdf/1905.03375.pdf) | N/A | [ease_movielens.py](examples/ease_movielens.py)
 |      | [Neural Graph Collaborative Filtering (NGCF)](cornac/models/ngcf), [paper](https://arxiv.org/pdf/1905.08108.pdf) | [requirements.txt](cornac/models/ngcf/requirements.txt) | [ngcf_example.py](examples/ngcf_example.py)

diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py
@@ -64,6 +64,7 @@
 from .online_ibpr import OnlineIBPR
 from .pcrl import PCRL
 from .pmf import PMF
+from .recvae import RecVAE
 from .sbpr import SBPR
 from .skm import SKMeans
 from .sorec import SoRec

diff --git a/cornac/models/recvae/__init__.py b/cornac/models/recvae/__init__.py
@@ -0,0 +1 @@
+from .recom_recvae import RecVAE
diff --git a/cornac/models/recvae/recom_recvae.py b/cornac/models/recvae/recom_recvae.py
@@ -0,0 +1,279 @@
+# Copyright 2018 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+from tqdm.auto import trange
+
+from ..recommender import Recommender
+from ...exception import ScoreException
+
+
+class RecVAE(Recommender):
+    """
+    RecVAE, a recommender system based on a Variational Autoencoder.
+
+    Parameters
+    ----------
+    name : str, optional, default: 'RecVae'
+        Name of the recommender model.
+
+    hidden_dim : int, optional, default: 600
+        Dimension of the hidden layer in the VAE architecture.
+
+    latent_dim : int, optional, default: 200
+        Dimension of the latent layer in the VAE architecture.
+
+    batch_size : int, optional, default: 500
+        Size of the batches used during training.
+
+    beta : float, optional
+        Weighting factor for the KL divergence term in the VAE loss function.
+
+    gamma : float, optional, default: 0.005
+        Weighting factor for the regularization term in the loss function.
+
+    lr : float, optional, default: 5e-4
+        Learning rate for the optimizer.
+
+    n_epochs : int, optional, default: 50
+        Number of epochs to train the model.
+
+    n_enc_epochs : int, optional, default: 3
+        Number of epochs to train the encoder part of VAE.
+
+    n_dec_epochs : int, optional, default: 1
+        Number of epochs to train the decoder part of VAE.
+
+    not_alternating : boolean, optional, default: False
+        If True, the model training will not alternate between encoder and decoder.
+
+    trainable : boolean, optional, default: True
+        When False, the model will not be re-trained, and input of pre-trained parameters are required.
+
+    verbose : boolean, optional, default: False
+        When True, running logs are displayed.
+
+    seed : int, optional
+        Random seed for weight initialization and training reproducibility.
+
+    use_gpu : boolean, optional, default: True
+        When True, training utilizes GPU if available.
+
+    References
+    ----------
+    * RecVAE GitHub Repository: https://github.com/ilya-shenbin/RecVAE
+    * Paper Link: https://arxiv.org/abs/1912.11160
+    
+
+    """
+
+    def __init__(
+        self,
+        name="RecVae",
+
+        hidden_dim = 600,
+        latent_dim = 200,
+        batch_size = 500,
+        beta = None,
+        gamma = 0.005,
+        lr = 5e-4,
+        n_epochs = 50,
+        n_enc_epochs = 3,
+        n_dec_epochs = 1,
+        not_alternating = False,
+
+        trainable=True,
+        verbose=False,
+        seed=None,
+        use_gpu=True,
+    ):
+
+
+
+        Recommender.__init__(self,name=name, trainable=trainable, verbose=verbose)
+
+        self.hidden_dim = hidden_dim
+        self.latent_dim = latent_dim
+        self.batch_size = batch_size
+        self.beta = beta
+        self.gamma = gamma
+        self.lr = lr
+        self.n_epochs = n_epochs
+        self.n_enc_epochs = n_enc_epochs
+        self.n_dec_epochs = n_dec_epochs
+        self.not_alternating = not_alternating
+        self.seed = seed
+
+
+        import torch
+        if use_gpu and torch.cuda.is_available():
+            self.device = torch.device("cuda:0")
+        else:
+            self.device = torch.device("cpu") 
+
+
+    def run(self,model, opts, train_set, my_batch_size, n_epochs, beta, gamma, dropout_rate):
+        import torch
+        train_data = train_set.csr_matrix 
+        model.train()
+        for _ in range(n_epochs):
+            for i, batch_ids in enumerate(
+                train_set.user_iter(my_batch_size, shuffle=True)
+            ):
+
+                ratings = torch.Tensor((train_data[batch_ids,:]).toarray()).to(self.device)                
+
+                for optimizer in opts:
+                    optimizer.zero_grad()
+
+                _, loss = model(ratings, beta=beta, gamma=gamma, dropout_rate=dropout_rate)
+                loss.backward()
+
+                for optimizer in opts:
+                    optimizer.step()
+
+
+    def fit(self, train_set, val_set=None):
+        """Fit the model to observations.
+
+        Parameters
+        ----------
+        train_set: :obj:`cornac.data.Dataset`, required
+            User-Item preference data as well as additional modalities.
+
+        val_set: :obj:`cornac.data.Dataset`, optional, default: None
+            User-Item preference data for model selection purposes (e.g., early stopping).
+
+        Returns
+        -------
+        self : object
+        """
+        Recommender.fit(self, train_set, val_set)
+
+        from .recvae import VAE
+        import torch
+        from torch import optim
+
+        from ...metrics import NDCG
+        from ...eval_methods import ranking_eval
+
+        if self.trainable:
+
+            if self.verbose:
+                print("Learning...")
+            if self.seed is not None:
+                np.random.seed(self.seed)
+                torch.manual_seed(self.seed)
+                torch.cuda.manual_seed(self.seed)
+                torch.cuda.manual_seed_all(self.seed)
+                torch.backends.cudnn.deterministic = True
+                torch.backends.cudnn.benchmark = False
+                torch.backends.cudnn.enabled = False
+
+
+            model_kwargs = {
+                'hidden_dim': self.hidden_dim,
+                'latent_dim': self.latent_dim,
+                'input_dim': train_set.num_items,
+            }
+
+            self.recvae_model = VAE(**model_kwargs).to(self.device)
+
+
+
+            learning_kwargs = {
+                'model': self.recvae_model,
+                'train_set': train_set,
+                'my_batch_size': self.batch_size,
+                'beta': self.beta,
+                'gamma': self.gamma
+            }
+
+            self.mydata = train_set
+            decoder_params = set(self.recvae_model.decoder.parameters())
+            encoder_params = set(self.recvae_model.encoder.parameters())
+
+            optimizer_encoder = optim.Adam(encoder_params, lr=self.lr)
+            optimizer_decoder = optim.Adam(decoder_params, lr=self.lr)
+
+            progress_bar = trange(1, self.n_epochs + 1, desc="RecVAE", disable=not self.verbose)
+
+            for _ in progress_bar:
+                if self.not_alternating:
+                    self.run(opts=[optimizer_encoder, optimizer_decoder], n_epochs=1, dropout_rate=0.5, **learning_kwargs)
+                else:
+                    self.run(opts=[optimizer_encoder], n_epochs=self.n_enc_epochs, dropout_rate=0.5, **learning_kwargs)
+                    self.recvae_model.update_prior()
+                    self.run(opts=[optimizer_decoder], n_epochs=self.n_dec_epochs, dropout_rate=0, **learning_kwargs)
+
+
+                ndcg_100 = ranking_eval(
+                model=self,
+                metrics=[NDCG(k=100)],
+                train_set=train_set,
+                test_set=train_set,
+                )[0][0]
+
+
+                progress_bar.set_postfix(ndcg100 = ndcg_100)
+
+            if self.verbose:
+                print(f"Learning completed : [{ndcg_100}]")
+
+        elif self.verbose:
+            print("%s is trained already (trainable = False)" % (self.name))
+
+
+        return self
+
+    def score(self, user_idx, item_idx=None):
+        """Predict the scores/ratings of a user for an item.
+
+        Parameters
+        ----------
+        user_id: int, required
+            The index of the user for whom to perform score prediction.
+
+        item_id: int, optional, default: None
+            The index of the item for that to perform score prediction.
+            If None, scores for all known items will be returned.
+
+        Returns
+        -------
+        res : A scalar or a Numpy array
+            Relative scores that the user gives to the item or to all known items
+        """
+        import torch
+
+        ratings_in = self.mydata.matrix[user_idx,:]
+        ratings_pred = self.recvae_model(torch.Tensor(ratings_in.toarray()).to(self.device), calculate_loss=False).cpu().detach().numpy().flatten()
+
+
+        if item_idx is None:
+            if not self.knows_user(user_idx):
+                raise ScoreException(
+                    "Can't make score prediction for (user_id=%d)" % user_idx
+                )
+
+            return ratings_pred
+        else:
+            if not (self.knows_user(user_idx) and self.knows_item(item_idx)):
+                raise ScoreException(
+                    "Can't make score prediction for (user_id=%d, item_id=%d)"
+                    % (user_idx, item_idx)
+                )
+
+            return ratings_pred[item_idx]
+