diff --git a/PCA-RNN-Xiaotong Sun/PCA-RNN.ipynb b/PCA-RNN-Xiaotong Sun/PCA-RNN.ipynb new file mode 100644 index 0000000..c5a0249 --- /dev/null +++ b/PCA-RNN-Xiaotong Sun/PCA-RNN.ipynb @@ -0,0 +1,523 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "6135a635", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import pickle\n", + "import random\n", + "from glob import glob\n", + "\n", + "from skimage.io import imread, imsave\n", + "from skimage.transform import resize\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.model_selection import train_test_split\n", + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers\n", + "from tensorflow.keras.callbacks import EarlyStopping\n", + "import math\n", + "from sklearn.metrics import mean_squared_error\n", + "from tensorflow.python.util.nest import flatten\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.decomposition import PCA\n", + "from tensorflow.keras.models import Sequential\n", + "\n", + "\n", + "np.random.seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b1fdba59", + "metadata": {}, + "outputs": [], + "source": [ + "data_path_base = \"/Users/xiaotongsun/Documents/ML for ME/HW7/Image Sequences\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3f172ed7", + "metadata": {}, + "outputs": [], + "source": [ + "image_dirs = glob(f\"{data_path_base}/*.jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b0b66dd7", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(image_dirs)\n", + "df.to_csv(\"dataset.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "376806c0", + "metadata": {}, + "outputs": [], + "source": [ + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a799cbdc", + "metadata": {}, + "outputs": [], + "source": [ + "image_size = (128, 128)\n", + "\n", + "dataset = pd.read_csv(\"dataset.csv\")\n", + "n_samples = len(dataset)\n", + "data = np.empty((n_samples, image_size[0]*image_size[1]))\n", + "for i in range(n_samples):\n", + " path = dataset.iloc[i, 0]\n", + " img = np.float32(imread(path)) / 255.\n", + " image_resized = resize(img, image_size, anti_aliasing=True)\n", + " data[i] = image_resized.flatten()\n", + " \n", + "with open(\"raw.npy\", \"wb\") as f:\n", + " np.save(f, data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26fa1153", + "metadata": {}, + "outputs": [], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9573a43e", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"raw.npy\", \"rb\") as f:\n", + " data = np.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c4a15dd4", + "metadata": {}, + "outputs": [], + "source": [ + "n_components = 100\n", + "# sc = StandardScaler()\n", + "# sc.fit(data)\n", + "# data_sc = sc.transform(data)\n", + "pca = PCA(n_components=n_components)\n", + "principalComponents = pca.fit_transform(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4a6a5945", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"pca.pickle\", \"wb\") as f:\n", + " pickle.dump(pca, f)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fee1831d", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"pcs.npy\", \"wb\") as f:\n", + " np.save(f, principalComponents)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3f08bab2", + "metadata": {}, + "outputs": [], + "source": [ + "def sample_image_seq(seq_len, data, stride=1):\n", + " total_len = data.shape[0]\n", + " num_seq = (total_len - seq_len) // stride + 1\n", + " indices = (np.arange(seq_len) + np.arange(0, num_seq * stride, stride).reshape(-1,1))\n", + " return data[indices, ...]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7ba69c6c", + "metadata": {}, + "outputs": [], + "source": [ + "def getXY_pcs(seq_len, data, stride=1):\n", + " total_len = data.shape[0]\n", + " X_pcs = sample_image_seq(seq_len, data[:total_len-seq_len, ...])\n", + " Y_pcs = sample_image_seq(seq_len, data[seq_len:, ...])\n", + " return X_pcs, Y_pcs" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "4cbc6c02", + "metadata": {}, + "outputs": [], + "source": [ + "seq_len = 10\n", + "X_pcs, y_pcs = getXY_pcs(seq_len, principalComponents)\n", + "y_true = sample_image_seq(seq_len, data[seq_len:, ...])" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "e14f6853", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((9982, 10, 100), (9982, 10, 100), (9982, 10, 16384))" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_pcs.shape, y_pcs.shape, y_true.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "0123d522", + "metadata": {}, + "outputs": [], + "source": [ + "total_num = len(X_pcs)\n", + "split_ratio = 0.9 \n", + "train_num = int(total_num * split_ratio)\n", + "test_num = total_num - train_num\n", + "\n", + "indices = np.arange(total_num)\n", + "np.random.shuffle(indices)\n", + "\n", + "train_indices = indices[:train_num]\n", + "test_indices = indices[train_num:]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "0b09e7a0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8983, 999)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(train_indices), len(test_indices)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "adedf52c", + "metadata": {}, + "outputs": [], + "source": [ + "train_X = X_pcs[train_indices, ...]\n", + "train_y = y_pcs[train_indices, ...]\n", + "\n", + "test_X = X_pcs[test_indices, ...]\n", + "test_y = y_pcs[test_indices, ...]\n", + "\n", + "test_y_true = y_true[test_indices, ...]\n", + "\n", + "# with open(\"input.npy\", \"wb\") as f:\n", + "# np.save(f, train_X)\n", + "# np.save(f, train_y)\n", + "# np.save(f, test_X)\n", + "# np.save(f, test_y)\n", + "# np.save(f, test_y_true)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "d7204747", + "metadata": {}, + "outputs": [], + "source": [ + "def create_RNN(hidden_units=100, time_steps=10):\n", + " model = keras.Sequential()\n", + " model.add(layers.LSTM(hidden_units, activation='relu', input_shape=(time_steps,100)))\n", + " model.add(layers.RepeatVector(time_steps))\n", + " model.add(layers.LSTM(hidden_units, activation='relu', return_sequences=True))\n", + " model.add(layers.TimeDistributed(layers.Dense(100)))\n", + " model.compile(loss='mean_squared_error', optimizer='adam')\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "55e111c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "225/225 - 4s - loss: 2.0794 - val_loss: 2.0687\n", + "Epoch 2/100\n", + "225/225 - 2s - loss: 2.0482 - val_loss: 2.0417\n", + "Epoch 3/100\n", + "225/225 - 2s - loss: 2.0202 - val_loss: 2.0227\n", + "Epoch 4/100\n", + "225/225 - 2s - loss: 2.0007 - val_loss: 2.0097\n", + "Epoch 5/100\n", + "225/225 - 2s - loss: 1.9846 - val_loss: 2.0000\n", + "Epoch 6/100\n", + "225/225 - 2s - loss: 1.9694 - val_loss: 1.9914\n", + "Epoch 7/100\n", + "225/225 - 2s - loss: 1.9543 - val_loss: 1.9810\n", + "Epoch 8/100\n", + "225/225 - 2s - loss: 1.9403 - val_loss: 1.9728\n", + "Epoch 9/100\n", + "225/225 - 2s - loss: 1.9292 - val_loss: 1.9674\n", + "Epoch 10/100\n", + "225/225 - 2s - loss: 1.9170 - val_loss: 1.9619\n", + "Epoch 11/100\n", + "225/225 - 2s - loss: 1.9052 - val_loss: 1.9576\n", + "Epoch 12/100\n", + "225/225 - 3s - loss: 1.8952 - val_loss: 1.9515\n", + "Epoch 13/100\n", + "225/225 - 2s - loss: 1.8847 - val_loss: 1.9496\n", + "Epoch 14/100\n", + "225/225 - 2s - loss: 1.8766 - val_loss: 1.9478\n", + "Epoch 15/100\n", + "225/225 - 2s - loss: 1.8658 - val_loss: 1.9443\n", + "Epoch 16/100\n", + "225/225 - 3s - loss: 1.8590 - val_loss: 1.9405\n", + "Epoch 17/100\n", + "225/225 - 3s - loss: 1.8495 - val_loss: 1.9436\n", + "Epoch 18/100\n", + "225/225 - 2s - loss: 1.8413 - val_loss: 1.9360\n", + "Epoch 19/100\n", + "225/225 - 3s - loss: 1.8335 - val_loss: 1.9371\n", + "Epoch 20/100\n", + "225/225 - 2s - loss: 1.8274 - val_loss: 1.9378\n", + "Epoch 21/100\n", + "225/225 - 2s - loss: 1.8203 - val_loss: 1.9381\n", + "Epoch 22/100\n", + "225/225 - 2s - loss: 1.8143 - val_loss: 1.9358\n", + "Epoch 23/100\n", + "225/225 - 3s - loss: 1.8089 - val_loss: 1.9389\n", + "Epoch 24/100\n", + "225/225 - 2s - loss: 1.7992 - val_loss: 1.9345\n", + "Epoch 25/100\n", + "225/225 - 2s - loss: 1.7943 - val_loss: 1.9366\n", + "Epoch 26/100\n", + "225/225 - 3s - loss: 1.7905 - val_loss: 1.9396\n", + "Epoch 27/100\n", + "225/225 - 3s - loss: 1.7842 - val_loss: 1.9367\n", + "Epoch 28/100\n", + "225/225 - 2s - loss: 1.7764 - val_loss: 1.9336\n", + "Epoch 29/100\n", + "225/225 - 2s - loss: 1.7742 - val_loss: 1.9400\n", + "Epoch 30/100\n", + "225/225 - 2s - loss: 1.7686 - val_loss: 1.9411\n", + "Epoch 31/100\n", + "225/225 - 2s - loss: 1.7639 - val_loss: 1.9431\n", + "Epoch 32/100\n", + "225/225 - 2s - loss: 1.7571 - val_loss: 1.9429\n", + "Epoch 33/100\n", + "225/225 - 3s - loss: 1.7498 - val_loss: 1.9409\n", + "Restoring model weights from the end of the best epoch.\n", + "Epoch 00033: early stopping\n", + "Model: \"sequential_4\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "lstm_8 (LSTM) (None, 128) 117248 \n", + "_________________________________________________________________\n", + "repeat_vector_4 (RepeatVecto (None, 10, 128) 0 \n", + "_________________________________________________________________\n", + "lstm_9 (LSTM) (None, 10, 128) 131584 \n", + "_________________________________________________________________\n", + "time_distributed_4 (TimeDist (None, 10, 100) 12900 \n", + "=================================================================\n", + "Total params: 261,732\n", + "Trainable params: 261,732\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n" + ] + } + ], + "source": [ + "checkpoint_filepath = '/Users/xiaotongsun/Documents/ML for ME/HW7/best_model7.hdf5'\n", + "model = create_RNN(hidden_units=128, time_steps=10) \n", + "monitor = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)\n", + "model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(\n", + "filepath=checkpoint_filepath,\n", + "save_weights_only=True,\n", + "monitor='val_loss',\n", + "mode='min',\n", + "save_best_only=True)\n", + "history = model.fit(train_X, train_y, batch_size=32, validation_split=0.2, shuffle=True, callbacks=[monitor,model_checkpoint_callback], verbose=2, epochs=100)\n", + "print(model.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "4f178f6d", + "metadata": {}, + "outputs": [], + "source": [ + "y_pred_pcs = model.predict(test_X)\n", + "y_pred_image = pca.inverse_transform(y_pred_pcs)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "61eed50e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(999, 10, 16384)\n" + ] + } + ], + "source": [ + "print(y_pred_image.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "6cece164", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "for i in range(1, 4, 1):\n", + " plt.subplot(2, 3, i)\n", + " plt.imshow(y_pred_image[1, i-1, ...].reshape(128, 128), cmap=\"gray\")\n", + " plt.subplot(2, 3, 3 + i)\n", + " plt.imshow(test_y_true[1, i-1, ...].reshape(128, 128), cmap=\"gray\")\n", + "plt.savefig(\"/Users/xiaotongsun/Documents/ML for ME/HW7/predict_vs_true7.jpg\")\n", + "plt.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "70695950", + "metadata": {}, + "outputs": [], + "source": [ + "y_true_projected = pca.inverse_transform(test_y)\n", + "plt.figure()\n", + "for i in range(1, 4, 1):\n", + " plt.subplot(2, 3, i)\n", + " plt.imshow(y_true_projected[1, i-1, ...].reshape(128, 128), cmap=\"gray\")\n", + " plt.subplot(2, 3, 3 + i)\n", + " plt.imshow(test_y_true[1, i-1, ...].reshape(128, 128), cmap=\"gray\")\n", + "plt.savefig(\"/Users/xiaotongsun/Documents/ML for ME/HW7/true7.jpg\")\n", + "plt.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "610540b9", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "for i in range(1, 4, 1):\n", + " plt.subplot(2, 3, i)\n", + " plt.imshow(y_pred_image[500, i-1, ...].reshape(128, 128), cmap=\"gray\")\n", + " plt.subplot(2, 3, 3 + i)\n", + " plt.imshow(test_y_true[500, i-1, ...].reshape(128, 128), cmap=\"gray\")\n", + "plt.savefig(\"/Users/xiaotongsun/Documents/ML for ME/HW7/predict_vs_true7_2.jpg\")\n", + "plt.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "509d31e2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/PCA-RNN-Xiaotong Sun/Xiaotong Sun-PCA-RNN.pdf b/PCA-RNN-Xiaotong Sun/Xiaotong Sun-PCA-RNN.pdf new file mode 100644 index 0000000..8dbff97 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/Xiaotong Sun-PCA-RNN.pdf differ diff --git a/PCA-RNN-Xiaotong Sun/best_model7.hdf5 b/PCA-RNN-Xiaotong Sun/best_model7.hdf5 new file mode 100644 index 0000000..7e93de6 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/best_model7.hdf5 differ diff --git a/PCA-RNN-Xiaotong Sun/pca.pickle b/PCA-RNN-Xiaotong Sun/pca.pickle new file mode 100644 index 0000000..f8dc570 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/pca.pickle differ diff --git a/PCA-RNN-Xiaotong Sun/pcs.npy b/PCA-RNN-Xiaotong Sun/pcs.npy new file mode 100644 index 0000000..62237ca Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/pcs.npy differ diff --git a/PCA-RNN-Xiaotong Sun/predict_vs_true7.jpg b/PCA-RNN-Xiaotong Sun/predict_vs_true7.jpg new file mode 100644 index 0000000..2518243 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/predict_vs_true7.jpg differ diff --git a/PCA-RNN-Xiaotong Sun/predict_vs_true7_2.jpg b/PCA-RNN-Xiaotong Sun/predict_vs_true7_2.jpg new file mode 100644 index 0000000..7fe040e Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/predict_vs_true7_2.jpg differ diff --git a/PCA-RNN-Xiaotong Sun/test_2.npy b/PCA-RNN-Xiaotong Sun/test_2.npy new file mode 100644 index 0000000..2d80b15 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/test_2.npy differ diff --git a/PCA-RNN-Xiaotong Sun/train_pca2.npy b/PCA-RNN-Xiaotong Sun/train_pca2.npy new file mode 100644 index 0000000..9ce81c0 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/train_pca2.npy differ diff --git a/PCA-RNN-Xiaotong Sun/true7.jpg b/PCA-RNN-Xiaotong Sun/true7.jpg new file mode 100644 index 0000000..a54bf14 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/true7.jpg differ diff --git a/PCA-RNN-Xiaotong Sun/true7_2.jpg b/PCA-RNN-Xiaotong Sun/true7_2.jpg new file mode 100644 index 0000000..7dd8b31 Binary files /dev/null and b/PCA-RNN-Xiaotong Sun/true7_2.jpg differ