Skip to content

Commit

Permalink
Merge pull request #18 from recohut/stage
Browse files Browse the repository at this point in the history
commit
  • Loading branch information
sparsh-ai authored Feb 12, 2022
2 parents 5580982 + 86d6bb3 commit 3b4b08e
Show file tree
Hide file tree
Showing 32 changed files with 32 additions and 0 deletions.
1 change: 1 addition & 0 deletions _notebooks/2022-01-24-advertising.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-agnn.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-amazon-crossdomain.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-apprentice-mountaincar.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-cartpole-torch.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-cdl.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2022-01-24-cdl.ipynb","provenance":[{"file_id":"https://github.com/recohut/nbs/blob/main/raw/T249728%20%7C%20CDL%20on%20ML-1m.ipynb","timestamp":1644665590116},{"file_id":"16z8K9gL8SxhneDP4Mi3L4Opbck-aYxO8","timestamp":1638115875999}],"collapsed_sections":[],"toc_visible":true,"mount_file_id":"16z8K9gL8SxhneDP4Mi3L4Opbck-aYxO8","authorship_tag":"ABX9TyPNtUryDQj38/+BfqMfah1L"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# CDL on ML-1m"],"metadata":{"id":"kFnMXHQetSFs"}},{"cell_type":"code","metadata":{"id":"RuiURj-F17mq"},"source":["import numpy as np\n","import pandas as pd\n","import os, sys\n","from collections import defaultdict\n","\n","import torch\n","import torch.nn as nn\n","from torch.utils.data import Dataset, DataLoader\n","import torch.optim as optim\n","import torch.nn.functional as F"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"aB3rWtCGOm0U","executionInfo":{"status":"ok","timestamp":1638115805771,"user_tz":-330,"elapsed":1255,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"c00a910d-f4e2-4f29-d461-e3e7bed154a7"},"source":["!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/ml_100k_train.npy\n","!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/ml_100k_test.npy\n","!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/movies.csv"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ml_100k_train.npy 100%[===================>] 12.10M --.-KB/s in 0.1s \n","ml_100k_test.npy 100%[===================>] 12.10M --.-KB/s in 0.1s \n","movies.csv 100%[===================>] 232.34K --.-KB/s in 0.03s \n"]}]},{"cell_type":"code","metadata":{"id":"1nkyosaO7P7h"},"source":["def add_noise(x, corrupt_ratio):\n"," noise = np.random.binomial(1, corrupt_ratio, size=x.shape)\n"," return x + noise\n","\n","class CDLDataset(Dataset):\n"," def __init__(self, xc, x0):\n"," super(CDLDataset, self).__init__()\n"," self.xc = xc\n"," self.x0 = x0\n"," \n"," def __len__(self):\n"," return self.xc.shape[0]\n","\n"," def __getitem__(self, idx):\n"," return {'clean':torch.FloatTensor(self.xc[idx, :]),\n"," 'corrupt':torch.FloatTensor(self.x0[idx, :]),\n"," 'idx':idx}"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"P3OBm1mg7sZM"},"source":["class SDAE(nn.Module):\n"," def __init__(self, input_dim, hidden_dim, embed_dim):\n"," super(SDAE, self).__init__()\n"," self.input_dim = input_dim\n"," self.hidden_dim = hidden_dim\n"," self.embed_dim = embed_dim\n"," self.enc1 = nn.Linear(input_dim, hidden_dim)\n"," self.enc2 = nn.Linear(hidden_dim, hidden_dim)\n"," self.enc3 = nn.Linear(hidden_dim, embed_dim)\n"," \n"," self.dec1 = nn.Linear(embed_dim, hidden_dim)\n"," self.dec2 = nn.Linear(hidden_dim, hidden_dim)\n"," self.dec3 = nn.Linear(hidden_dim, input_dim)\n","\n"," def forward(self, x):\n"," x = F.relu(self.enc1(x))\n"," x = F.relu(self.enc2(x))\n"," latent = F.relu(self.enc3(x))\n"," x = F.relu(self.dec1(latent))\n"," x = F.relu(self.dec2(x))\n"," x = self.dec3(x)\n"," return latent, x"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"4PCM4TvX7dgY"},"source":["class CDL:\n"," def __init__(self, train_imp, test_imp, input_dim, hidden_dim, dim_f, dataloader, seed, device, config):\n"," self.dim_f = dim_f\n"," self.user_num = train_imp.shape[0]\n"," self.item_num = train_imp.shape[1]\n"," self.input_dim = input_dim\n","\n"," self.R_tr = train_imp\n"," self.R_tst = test_imp\n"," self.C = np.where(self.R_tr > 0, 1, 0)\n"," self.C_u = np.zeros((self.item_num, self.item_num))\n"," self.C_i = np.zeros((self.user_num, self.user_num))\n","\n"," np.random.seed(seed)\n"," self.X = np.random.standard_normal((self.user_num, dim_f))\n"," self.Y = np.random.standard_normal((self.item_num, dim_f))\n"," \n"," self.loss_tr = defaultdict(float)\n"," self.loss_ae = defaultdict(float)\n"," self.loss_tst = defaultdict(float)\n","\n"," self.ae = SDAE(input_dim=input_dim, hidden_dim=hidden_dim, embed_dim=dim_f).to(device)\n"," self.optimizer = optim.Adam(self.ae.parameters(), lr=config.learning_rate, weight_decay=config.lambda_w)\n"," self.dataloader = dataloader\n","\n"," self.lambda_u = config.lambda_u\n"," self.lambda_w = config.lambda_w\n"," self.lambda_v = config.lambda_v\n"," self.lambda_n = config.lambda_n\n","\n"," self.device = device\n"," self.config = config\n"," \n"," def ae_train(self):\n"," latent_np = np.zeros((self.item_num, self.dim_f))\n"," loss_ae = []\n"," for batch in self.dataloader:\n"," y = batch['clean'].to(self.device)\n"," x = batch['corrupt'].to(self.device)\n"," idx = batch['idx']\n"," latent, pred = self.ae(x)\n"," latent_ = latent.detach().cpu().numpy()\n"," latent_np[idx.numpy()] = latent_\n","\n"," loss = self.loss_fn(pred, y, idx.to(self.device), latent_)\n"," loss.backward()\n"," self.optimizer.step()\n"," loss_ae.append(loss.item())\n","\n"," return latent_np, np.mean(loss_ae)\n","\n"," def fit(self):\n"," start = datetime.now()\n"," for epoch in range(self.config.epochs):\n"," start_epoch = datetime.now()\n"," self.ae.train()\n"," self.latent_feat, self.loss_ae[epoch] = self.ae_train()\n"," n = 0\n"," for u in range(self.user_num):\n"," yty = np.dot(self.Y.T, self.Y)\n"," self.X[u, :] = self.update_user_vector(u, yty)\n","\n"," for i in range(self.item_num):\n"," xtx = np.dot(self.X.T, self.X)\n"," self.Y[i, :] = self.update_item_vector(i, xtx)\n"," \n"," phat = self.scoring()\n"," \n"," train_loss = self.evaluate(train_eval=True)\n"," test_loss = self.evaluate(train_eval=False)\n","\n"," self.loss_tr[epoch] = train_loss\n"," self.loss_tst[epoch] = test_loss\n","\n","\n"," print(f'EPOCH {epoch+1} : TRAINING RANK {self.loss_tr[epoch]:.5f}, VALID RANK {self.loss_tst[epoch]:.5f}')\n","\n"," print(f'Time per one epoch {datetime.now() - start_epoch}')\n"," end = datetime.now()\n"," print(f'Training takes time {end-start}')\n"," \n"," def scoring(self):\n"," \n"," return np.dot(self.X, self.Y.T)\n"," \n"," def update_user_vector(self, u, yty):\n"," np.fill_diagonal(self.C_u, (self.C[u, :] - 1))\n"," comp1 = yty\n"," comp2 = np.dot(self.Y.T, self.C_u).dot(self.Y)\n"," comp3 = np.identity(self.config.dim_f) * self.config.lambda_u\n"," comp = np.linalg.inv(comp1 + comp2 + comp3)\n"," self.C_u = self.C_u + np.identity(self.C_u.shape[0])\n"," comp = np.dot(comp, self.Y.T).dot(self.C_u)\n"," \n"," return np.dot(comp, self.R_tr[u, :])\n","\n"," def update_item_vector(self, i, xtx): \n"," np.fill_diagonal(self.C_i, (self.C[:, i] - 1))\n"," comp1 = xtx\n"," comp2 = np.dot(self.X.T, self.C_i).dot(self.X)\n"," comp3 = np.identity(self.config.dim_f) * self.config.lambda_v\n"," comp = np.linalg.inv(comp1 + comp2 + comp3)\n"," self.C_i = self.C_i + np.identity(self.C_i.shape[0])\n"," comp4 = self.X.T.dot(self.C_i).dot(self.R_tr[:, i])\n"," comp5 = self.lambda_v * self.latent_feat[i, :]\n"," \n"," return np.dot(comp, comp4+comp5)\n"," \n"," def loss_fn(self, pred, xc, idx, latent_feat):\n"," X = torch.tensor(self.X).to(self.device)\n"," Y = torch.tensor(self.Y).to(self.device)[idx, :]\n"," R = torch.tensor(self.R_tr).float().to(self.device)[:, idx]\n"," C = torch.tensor(self.C).float().to(self.device)[:, idx]\n"," latent = torch.tensor(latent_feat).to(self.device)\n","\n"," comp1 = (X**2).sum(axis=1).sum() * self.lambda_u/2\n"," comp2 = ((Y - latent)**2).sum(axis=1).sum() * self.lambda_v/2\n"," comp3 = ((pred - xc)**2).sum(axis=1).sum() * self.lambda_n/2\n"," comp4 = torch.sum((torch.mm(X, Y.T) - R)**2 * C/2)\n","\n"," return comp1+comp2+comp3+comp4\n","\n"," def evaluate(self, train_eval):\n"," if train_eval:\n"," R = self.R_tr\n"," else:\n"," R = self.R_tst\n","\n"," phat = self.scoring()\n"," rank_mat = np.zeros(phat.shape)\n"," for u in range(self.user_num):\n"," pred_u = phat[u, :] * -1\n"," rank = pred_u.argsort().argsort()\n"," rank = rank / self.item_num\n"," rank_mat[u, :] = rank\n","\n"," return np.sum(R * rank_mat) / np.sum(R)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"xHtxulpK8yts"},"source":["class Config:\n"," learning_rate = 0.001\n"," early_stopping_round = 0\n"," epochs = 15\n"," seed = 1995\n"," dim_f = 10\n"," batch_size = 16\n"," lambda_u = 1\n"," lambda_w = 5e-4\n"," lambda_v = 1\n"," lambda_n = 1\n"," device = 'cuda' if torch.cuda.is_available() else 'cpu'\n"," rankM = 50\n","\n","config = Config()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"oGgWfwh7T8oe"},"source":["train = np.load('ml_100k_train.npy')\n","test = np.load('ml_100k_test.npy')\n","xc = pd.read_csv('movies.csv').iloc[:,5:].values\n","\n","train = np.where(train > 0, 1, 0)\n","test = np.where(test > 0, 1, 0)\n","x0 = add_noise(xc, corrupt_ratio=0.1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"MWpUNtyJ5eiV","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1630644729721,"user_tz":-330,"elapsed":724462,"user":{"displayName":"Sparsh Agarwal","photoUrl":"","userId":"13037694610922482904"}},"outputId":"c04371e6-3de2-4704-e8b8-776f6a1bcea2"},"source":["config.n_item = train.shape[1]\n","config.n_user = train.shape[0]\n","idx = np.arange(config.n_item)\n","\n","config.neg_item_tr = {i :idx[~train[i, :].astype(bool)] for i in range(config.n_user)}\n","config.neg_item_tst = {i :idx[~test[i, :].astype(bool)] for i in range(config.n_user)}\n","\n","config.pos_item_tr_bool = {i :train[i, :].astype(bool) for i in range(config.n_user)}\n","config.pos_item_tst_bool = {i :test[i, :].astype(bool) for i in range(config.n_user)}\n","\n","dataset = CDLDataset(xc, x0)\n","trainloader = DataLoader(dataset, config.batch_size, drop_last=False, shuffle=False)\n","\n","model = CDL(\n"," train_imp=train, \n"," test_imp=test, \n"," input_dim=xc.shape[1], \n"," hidden_dim=config.dim_f, \n"," dim_f=config.dim_f,\n"," dataloader=trainloader,\n"," seed=1995,\n"," device=config.device,\n"," config=config\n",")\n","\n","model.fit()"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["EPOCH 1 : TRAINING RANK 0.20875, VALID RANK 0.20875\n","Time per one epoch 0:00:50.467119\n","EPOCH 2 : TRAINING RANK 0.14438, VALID RANK 0.14438\n","Time per one epoch 0:00:48.940005\n","EPOCH 3 : TRAINING RANK 0.16976, VALID RANK 0.16976\n","Time per one epoch 0:00:47.577796\n","EPOCH 4 : TRAINING RANK 0.18687, VALID RANK 0.18687\n","Time per one epoch 0:00:48.211371\n","EPOCH 5 : TRAINING RANK 0.20023, VALID RANK 0.20023\n","Time per one epoch 0:00:49.610613\n","EPOCH 6 : TRAINING RANK 0.20773, VALID RANK 0.20773\n","Time per one epoch 0:00:48.596721\n","EPOCH 7 : TRAINING RANK 0.21310, VALID RANK 0.21310\n","Time per one epoch 0:00:49.022005\n","EPOCH 8 : TRAINING RANK 0.21953, VALID RANK 0.21953\n","Time per one epoch 0:00:48.815897\n","EPOCH 9 : TRAINING RANK 0.23348, VALID RANK 0.23348\n","Time per one epoch 0:00:48.488231\n","EPOCH 10 : TRAINING RANK 0.36056, VALID RANK 0.36056\n","Time per one epoch 0:00:48.563701\n","EPOCH 11 : TRAINING RANK 0.54516, VALID RANK 0.54516\n","Time per one epoch 0:00:48.318132\n","EPOCH 12 : TRAINING RANK 0.39444, VALID RANK 0.39444\n","Time per one epoch 0:00:46.785170\n","EPOCH 13 : TRAINING RANK 0.74885, VALID RANK 0.74885\n","Time per one epoch 0:00:46.347805\n","EPOCH 14 : TRAINING RANK 0.79147, VALID RANK 0.79147\n","Time per one epoch 0:00:46.180976\n","EPOCH 15 : TRAINING RANK 0.80281, VALID RANK 0.80281\n","Time per one epoch 0:00:47.471120\n","Training takes time 0:12:03.398521\n"]}]},{"cell_type":"markdown","metadata":{"id":"vhB5APuTQNQI"},"source":["---"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jpKEP2FGQSKE","executionInfo":{"status":"ok","timestamp":1638115654441,"user_tz":-330,"elapsed":3216,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"c36674e7-df1a-4143-f355-9786cdfdf417"},"source":["!pip install -q watermark\n","%reload_ext watermark\n","%watermark -a \"Sparsh A.\" -m -iv -u -t -d"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Author: Sparsh A.\n","\n","Last updated: 2021-11-28 16:07:35\n","\n","Compiler : GCC 7.5.0\n","OS : Linux\n","Release : 5.4.104+\n","Machine : x86_64\n","Processor : x86_64\n","CPU cores : 2\n","Architecture: 64bit\n","\n","torch : 1.10.0+cu111\n","IPython : 5.5.0\n","sys : 3.7.12 (default, Sep 10 2021, 00:21:48) \n","[GCC 7.5.0]\n","numpy : 1.19.5\n","pandas : 1.1.5\n","matplotlib: 3.2.2\n","\n"]}]},{"cell_type":"markdown","metadata":{"id":"-k-HLCIjQNQR"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"Mxkw17lSQNQS"},"source":["**END**"]}]}
1 change: 1 addition & 0 deletions _notebooks/2022-01-24-contextual-mab.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-fairness.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-gd.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _notebooks/2022-01-24-group-data.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 3b4b08e

Please sign in to comment.