generated from fastai/fastpages
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from recohut/stage
commit
- Loading branch information
Showing
32 changed files
with
32 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2022-01-24-cdl.ipynb","provenance":[{"file_id":"https://github.com/recohut/nbs/blob/main/raw/T249728%20%7C%20CDL%20on%20ML-1m.ipynb","timestamp":1644665590116},{"file_id":"16z8K9gL8SxhneDP4Mi3L4Opbck-aYxO8","timestamp":1638115875999}],"collapsed_sections":[],"toc_visible":true,"mount_file_id":"16z8K9gL8SxhneDP4Mi3L4Opbck-aYxO8","authorship_tag":"ABX9TyPNtUryDQj38/+BfqMfah1L"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# CDL on ML-1m"],"metadata":{"id":"kFnMXHQetSFs"}},{"cell_type":"code","metadata":{"id":"RuiURj-F17mq"},"source":["import numpy as np\n","import pandas as pd\n","import os, sys\n","from collections import defaultdict\n","\n","import torch\n","import torch.nn as nn\n","from torch.utils.data import Dataset, DataLoader\n","import torch.optim as optim\n","import torch.nn.functional as F"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"aB3rWtCGOm0U","executionInfo":{"status":"ok","timestamp":1638115805771,"user_tz":-330,"elapsed":1255,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"c00a910d-f4e2-4f29-d461-e3e7bed154a7"},"source":["!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/ml_100k_train.npy\n","!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/ml_100k_test.npy\n","!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/movies.csv"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ml_100k_train.npy 100%[===================>] 12.10M --.-KB/s in 0.1s \n","ml_100k_test.npy 100%[===================>] 12.10M --.-KB/s in 0.1s \n","movies.csv 100%[===================>] 232.34K --.-KB/s in 0.03s \n"]}]},{"cell_type":"code","metadata":{"id":"1nkyosaO7P7h"},"source":["def add_noise(x, corrupt_ratio):\n"," noise = np.random.binomial(1, corrupt_ratio, size=x.shape)\n"," return x + noise\n","\n","class CDLDataset(Dataset):\n"," def __init__(self, xc, x0):\n"," super(CDLDataset, self).__init__()\n"," self.xc = xc\n"," self.x0 = x0\n"," \n"," def __len__(self):\n"," return self.xc.shape[0]\n","\n"," def __getitem__(self, idx):\n"," return {'clean':torch.FloatTensor(self.xc[idx, :]),\n"," 'corrupt':torch.FloatTensor(self.x0[idx, :]),\n"," 'idx':idx}"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"P3OBm1mg7sZM"},"source":["class SDAE(nn.Module):\n"," def __init__(self, input_dim, hidden_dim, embed_dim):\n"," super(SDAE, self).__init__()\n"," self.input_dim = input_dim\n"," self.hidden_dim = hidden_dim\n"," self.embed_dim = embed_dim\n"," self.enc1 = nn.Linear(input_dim, hidden_dim)\n"," self.enc2 = nn.Linear(hidden_dim, hidden_dim)\n"," self.enc3 = nn.Linear(hidden_dim, embed_dim)\n"," \n"," self.dec1 = nn.Linear(embed_dim, hidden_dim)\n"," self.dec2 = nn.Linear(hidden_dim, hidden_dim)\n"," self.dec3 = nn.Linear(hidden_dim, input_dim)\n","\n"," def forward(self, x):\n"," x = F.relu(self.enc1(x))\n"," x = F.relu(self.enc2(x))\n"," latent = F.relu(self.enc3(x))\n"," x = F.relu(self.dec1(latent))\n"," x = F.relu(self.dec2(x))\n"," x = self.dec3(x)\n"," return latent, x"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"4PCM4TvX7dgY"},"source":["class CDL:\n"," def __init__(self, train_imp, test_imp, input_dim, hidden_dim, dim_f, dataloader, seed, device, config):\n"," self.dim_f = dim_f\n"," self.user_num = train_imp.shape[0]\n"," self.item_num = train_imp.shape[1]\n"," self.input_dim = input_dim\n","\n"," self.R_tr = train_imp\n"," self.R_tst = test_imp\n"," self.C = np.where(self.R_tr > 0, 1, 0)\n"," self.C_u = np.zeros((self.item_num, self.item_num))\n"," self.C_i = np.zeros((self.user_num, self.user_num))\n","\n"," np.random.seed(seed)\n"," self.X = np.random.standard_normal((self.user_num, dim_f))\n"," self.Y = np.random.standard_normal((self.item_num, dim_f))\n"," \n"," self.loss_tr = defaultdict(float)\n"," self.loss_ae = defaultdict(float)\n"," self.loss_tst = defaultdict(float)\n","\n"," self.ae = SDAE(input_dim=input_dim, hidden_dim=hidden_dim, embed_dim=dim_f).to(device)\n"," self.optimizer = optim.Adam(self.ae.parameters(), lr=config.learning_rate, weight_decay=config.lambda_w)\n"," self.dataloader = dataloader\n","\n"," self.lambda_u = config.lambda_u\n"," self.lambda_w = config.lambda_w\n"," self.lambda_v = config.lambda_v\n"," self.lambda_n = config.lambda_n\n","\n"," self.device = device\n"," self.config = config\n"," \n"," def ae_train(self):\n"," latent_np = np.zeros((self.item_num, self.dim_f))\n"," loss_ae = []\n"," for batch in self.dataloader:\n"," y = batch['clean'].to(self.device)\n"," x = batch['corrupt'].to(self.device)\n"," idx = batch['idx']\n"," latent, pred = self.ae(x)\n"," latent_ = latent.detach().cpu().numpy()\n"," latent_np[idx.numpy()] = latent_\n","\n"," loss = self.loss_fn(pred, y, idx.to(self.device), latent_)\n"," loss.backward()\n"," self.optimizer.step()\n"," loss_ae.append(loss.item())\n","\n"," return latent_np, np.mean(loss_ae)\n","\n"," def fit(self):\n"," start = datetime.now()\n"," for epoch in range(self.config.epochs):\n"," start_epoch = datetime.now()\n"," self.ae.train()\n"," self.latent_feat, self.loss_ae[epoch] = self.ae_train()\n"," n = 0\n"," for u in range(self.user_num):\n"," yty = np.dot(self.Y.T, self.Y)\n"," self.X[u, :] = self.update_user_vector(u, yty)\n","\n"," for i in range(self.item_num):\n"," xtx = np.dot(self.X.T, self.X)\n"," self.Y[i, :] = self.update_item_vector(i, xtx)\n"," \n"," phat = self.scoring()\n"," \n"," train_loss = self.evaluate(train_eval=True)\n"," test_loss = self.evaluate(train_eval=False)\n","\n"," self.loss_tr[epoch] = train_loss\n"," self.loss_tst[epoch] = test_loss\n","\n","\n"," print(f'EPOCH {epoch+1} : TRAINING RANK {self.loss_tr[epoch]:.5f}, VALID RANK {self.loss_tst[epoch]:.5f}')\n","\n"," print(f'Time per one epoch {datetime.now() - start_epoch}')\n"," end = datetime.now()\n"," print(f'Training takes time {end-start}')\n"," \n"," def scoring(self):\n"," \n"," return np.dot(self.X, self.Y.T)\n"," \n"," def update_user_vector(self, u, yty):\n"," np.fill_diagonal(self.C_u, (self.C[u, :] - 1))\n"," comp1 = yty\n"," comp2 = np.dot(self.Y.T, self.C_u).dot(self.Y)\n"," comp3 = np.identity(self.config.dim_f) * self.config.lambda_u\n"," comp = np.linalg.inv(comp1 + comp2 + comp3)\n"," self.C_u = self.C_u + np.identity(self.C_u.shape[0])\n"," comp = np.dot(comp, self.Y.T).dot(self.C_u)\n"," \n"," return np.dot(comp, self.R_tr[u, :])\n","\n"," def update_item_vector(self, i, xtx): \n"," np.fill_diagonal(self.C_i, (self.C[:, i] - 1))\n"," comp1 = xtx\n"," comp2 = np.dot(self.X.T, self.C_i).dot(self.X)\n"," comp3 = np.identity(self.config.dim_f) * self.config.lambda_v\n"," comp = np.linalg.inv(comp1 + comp2 + comp3)\n"," self.C_i = self.C_i + np.identity(self.C_i.shape[0])\n"," comp4 = self.X.T.dot(self.C_i).dot(self.R_tr[:, i])\n"," comp5 = self.lambda_v * self.latent_feat[i, :]\n"," \n"," return np.dot(comp, comp4+comp5)\n"," \n"," def loss_fn(self, pred, xc, idx, latent_feat):\n"," X = torch.tensor(self.X).to(self.device)\n"," Y = torch.tensor(self.Y).to(self.device)[idx, :]\n"," R = torch.tensor(self.R_tr).float().to(self.device)[:, idx]\n"," C = torch.tensor(self.C).float().to(self.device)[:, idx]\n"," latent = torch.tensor(latent_feat).to(self.device)\n","\n"," comp1 = (X**2).sum(axis=1).sum() * self.lambda_u/2\n"," comp2 = ((Y - latent)**2).sum(axis=1).sum() * self.lambda_v/2\n"," comp3 = ((pred - xc)**2).sum(axis=1).sum() * self.lambda_n/2\n"," comp4 = torch.sum((torch.mm(X, Y.T) - R)**2 * C/2)\n","\n"," return comp1+comp2+comp3+comp4\n","\n"," def evaluate(self, train_eval):\n"," if train_eval:\n"," R = self.R_tr\n"," else:\n"," R = self.R_tst\n","\n"," phat = self.scoring()\n"," rank_mat = np.zeros(phat.shape)\n"," for u in range(self.user_num):\n"," pred_u = phat[u, :] * -1\n"," rank = pred_u.argsort().argsort()\n"," rank = rank / self.item_num\n"," rank_mat[u, :] = rank\n","\n"," return np.sum(R * rank_mat) / np.sum(R)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"xHtxulpK8yts"},"source":["class Config:\n"," learning_rate = 0.001\n"," early_stopping_round = 0\n"," epochs = 15\n"," seed = 1995\n"," dim_f = 10\n"," batch_size = 16\n"," lambda_u = 1\n"," lambda_w = 5e-4\n"," lambda_v = 1\n"," lambda_n = 1\n"," device = 'cuda' if torch.cuda.is_available() else 'cpu'\n"," rankM = 50\n","\n","config = Config()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"oGgWfwh7T8oe"},"source":["train = np.load('ml_100k_train.npy')\n","test = np.load('ml_100k_test.npy')\n","xc = pd.read_csv('movies.csv').iloc[:,5:].values\n","\n","train = np.where(train > 0, 1, 0)\n","test = np.where(test > 0, 1, 0)\n","x0 = add_noise(xc, corrupt_ratio=0.1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"MWpUNtyJ5eiV","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1630644729721,"user_tz":-330,"elapsed":724462,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"c04371e6-3de2-4704-e8b8-776f6a1bcea2"},"source":["config.n_item = train.shape[1]\n","config.n_user = train.shape[0]\n","idx = np.arange(config.n_item)\n","\n","config.neg_item_tr = {i :idx[~train[i, :].astype(bool)] for i in range(config.n_user)}\n","config.neg_item_tst = {i :idx[~test[i, :].astype(bool)] for i in range(config.n_user)}\n","\n","config.pos_item_tr_bool = {i :train[i, :].astype(bool) for i in range(config.n_user)}\n","config.pos_item_tst_bool = {i :test[i, :].astype(bool) for i in range(config.n_user)}\n","\n","dataset = CDLDataset(xc, x0)\n","trainloader = DataLoader(dataset, config.batch_size, drop_last=False, shuffle=False)\n","\n","model = CDL(\n"," train_imp=train, \n"," test_imp=test, \n"," input_dim=xc.shape[1], \n"," hidden_dim=config.dim_f, \n"," dim_f=config.dim_f,\n"," dataloader=trainloader,\n"," seed=1995,\n"," device=config.device,\n"," config=config\n",")\n","\n","model.fit()"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["EPOCH 1 : TRAINING RANK 0.20875, VALID RANK 0.20875\n","Time per one epoch 0:00:50.467119\n","EPOCH 2 : TRAINING RANK 0.14438, VALID RANK 0.14438\n","Time per one epoch 0:00:48.940005\n","EPOCH 3 : TRAINING RANK 0.16976, VALID RANK 0.16976\n","Time per one epoch 0:00:47.577796\n","EPOCH 4 : TRAINING RANK 0.18687, VALID RANK 0.18687\n","Time per one epoch 0:00:48.211371\n","EPOCH 5 : TRAINING RANK 0.20023, VALID RANK 0.20023\n","Time per one epoch 0:00:49.610613\n","EPOCH 6 : TRAINING RANK 0.20773, VALID RANK 0.20773\n","Time per one epoch 0:00:48.596721\n","EPOCH 7 : TRAINING RANK 0.21310, VALID RANK 0.21310\n","Time per one epoch 0:00:49.022005\n","EPOCH 8 : TRAINING RANK 0.21953, VALID RANK 0.21953\n","Time per one epoch 0:00:48.815897\n","EPOCH 9 : TRAINING RANK 0.23348, VALID RANK 0.23348\n","Time per one epoch 0:00:48.488231\n","EPOCH 10 : TRAINING RANK 0.36056, VALID RANK 0.36056\n","Time per one epoch 0:00:48.563701\n","EPOCH 11 : TRAINING RANK 0.54516, VALID RANK 0.54516\n","Time per one epoch 0:00:48.318132\n","EPOCH 12 : TRAINING RANK 0.39444, VALID RANK 0.39444\n","Time per one epoch 0:00:46.785170\n","EPOCH 13 : TRAINING RANK 0.74885, VALID RANK 0.74885\n","Time per one epoch 0:00:46.347805\n","EPOCH 14 : TRAINING RANK 0.79147, VALID RANK 0.79147\n","Time per one epoch 0:00:46.180976\n","EPOCH 15 : TRAINING RANK 0.80281, VALID RANK 0.80281\n","Time per one epoch 0:00:47.471120\n","Training takes time 0:12:03.398521\n"]}]},{"cell_type":"markdown","metadata":{"id":"vhB5APuTQNQI"},"source":["---"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jpKEP2FGQSKE","executionInfo":{"status":"ok","timestamp":1638115654441,"user_tz":-330,"elapsed":3216,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"c36674e7-df1a-4143-f355-9786cdfdf417"},"source":["!pip install -q watermark\n","%reload_ext watermark\n","%watermark -a \"Sparsh A.\" -m -iv -u -t -d"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Author: Sparsh A.\n","\n","Last updated: 2021-11-28 16:07:35\n","\n","Compiler : GCC 7.5.0\n","OS : Linux\n","Release : 5.4.104+\n","Machine : x86_64\n","Processor : x86_64\n","CPU cores : 2\n","Architecture: 64bit\n","\n","torch : 1.10.0+cu111\n","IPython : 5.5.0\n","sys : 3.7.12 (default, Sep 10 2021, 00:21:48) \n","[GCC 7.5.0]\n","numpy : 1.19.5\n","pandas : 1.1.5\n","matplotlib: 3.2.2\n","\n"]}]},{"cell_type":"markdown","metadata":{"id":"-k-HLCIjQNQR"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"Mxkw17lSQNQS"},"source":["**END**"]}]} |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.