restore original version of euroscipy2016

leriomaggio · Aug 22, 2017 · fd5587d · fd5587d
1 parent 5973123
commit fd5587d
Show file tree

Hide file tree

Showing 68 changed files with 23,873 additions and 0 deletions.
diff --git a/0. Preamble.ipynb b/0. Preamble.ipynb
diff --git a/1.1 Introduction - Deep Learning and ANN.ipynb b/1.1 Introduction - Deep Learning and ANN.ipynb
diff --git a/1.2 Introduction - Theano.ipynb b/1.2 Introduction - Theano.ipynb
diff --git a/1.3 Introduction - Keras.ipynb b/1.3 Introduction - Keras.ipynb
diff --git a/1.4 (Extra) A Simple Implementation of ANN for MNIST.ipynb b/1.4 (Extra) A Simple Implementation of ANN for MNIST.ipynb
@@ -0,0 +1,399 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# A simple implementation of ANN for MNIST\n",
+    "\n",
+    "This code was taken from: https://github.com/mnielsen/neural-networks-and-deep-learning\n",
+    "\n",
+    "This accompanies the online text http://neuralnetworksanddeeplearning.com/ . The book is highly recommended. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using Theano backend.\n",
+      "Using gpu device 0: GeForce GTX 760 (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 4007)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Import libraries\n",
+    "import random\n",
+    "import numpy as np\n",
+    "import keras\n",
+    "from keras.datasets import mnist"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Set the full path to mnist.pkl.gz\n",
+    "# Point this to the data folder inside the repository\n",
+    "path_to_dataset = \"euroscipy2016_dl-tutorial/data/mnist.pkl.gz\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir -p $HOME/.keras/datasets/euroscipy2016_dl-tutorial/data/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading data from https://s3.amazonaws.com/img-datasets/mnist.pkl.gz\n",
+      "15286272/15296311 [============================>.] - ETA: 0s"
+     ]
+    }
+   ],
+   "source": [
+    "# Load the datasets\n",
+    "(X_train, y_train), (X_test, y_test) = mnist.load_data(path_to_dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(60000, 28, 28) (60000,)\n",
+      "(10000, 28, 28) (10000,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_train.shape, y_train.shape)\n",
+    "print(X_test.shape, y_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "network.py\n",
+    "~~~~~~~~~~\n",
+    "A module to implement the stochastic gradient descent learning\n",
+    "algorithm for a feedforward neural network.  Gradients are calculated\n",
+    "using backpropagation.  Note that I have focused on making the code\n",
+    "simple, easily readable, and easily modifiable.  It is not optimized,\n",
+    "and omits many desirable features.\n",
+    "\"\"\"\n",
+    "\n",
+    "#### Libraries\n",
+    "# Standard library\n",
+    "import random\n",
+    "\n",
+    "# Third-party libraries\n",
+    "import numpy as np\n",
+    "\n",
+    "class Network(object):\n",
+    "\n",
+    "    def __init__(self, sizes):\n",
+    "        \"\"\"The list ``sizes`` contains the number of neurons in the\n",
+    "        respective layers of the network.  For example, if the list\n",
+    "        was [2, 3, 1] then it would be a three-layer network, with the\n",
+    "        first layer containing 2 neurons, the second layer 3 neurons,\n",
+    "        and the third layer 1 neuron.  The biases and weights for the\n",
+    "        network are initialized randomly, using a Gaussian\n",
+    "        distribution with mean 0, and variance 1.  Note that the first\n",
+    "        layer is assumed to be an input layer, and by convention we\n",
+    "        won't set any biases for those neurons, since biases are only\n",
+    "        ever used in computing the outputs from later layers.\"\"\"\n",
+    "        self.num_layers = len(sizes)\n",
+    "        self.sizes = sizes\n",
+    "        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]\n",
+    "        self.weights = [np.random.randn(y, x)\n",
+    "                        for x, y in zip(sizes[:-1], sizes[1:])]\n",
+    "\n",
+    "    def feedforward(self, a):\n",
+    "        \"\"\"Return the output of the network if ``a`` is input.\"\"\"\n",
+    "        for b, w in zip(self.biases, self.weights):\n",
+    "            a = sigmoid(np.dot(w, a)+b)\n",
+    "        return a\n",
+    "\n",
+    "    def SGD(self, training_data, epochs, mini_batch_size, eta,\n",
+    "            test_data=None):\n",
+    "        \"\"\"Train the neural network using mini-batch stochastic\n",
+    "        gradient descent.  The ``training_data`` is a list of tuples\n",
+    "        ``(x, y)`` representing the training inputs and the desired\n",
+    "        outputs.  The other non-optional parameters are\n",
+    "        self-explanatory.  If ``test_data`` is provided then the\n",
+    "        network will be evaluated against the test data after each\n",
+    "        epoch, and partial progress printed out.  This is useful for\n",
+    "        tracking progress, but slows things down substantially.\"\"\"\n",
+    "        training_data = list(training_data)\n",
+    "        test_data = list(test_data)\n",
+    "        if test_data: n_test = len(test_data)\n",
+    "        n = len(training_data)\n",
+    "        for j in range(epochs):\n",
+    "            random.shuffle(training_data)\n",
+    "            mini_batches = [\n",
+    "                training_data[k:k+mini_batch_size]\n",
+    "                for k in range(0, n, mini_batch_size)]\n",
+    "            for mini_batch in mini_batches:\n",
+    "                self.update_mini_batch(mini_batch, eta)\n",
+    "            if test_data:\n",
+    "                print( \"Epoch {0}: {1} / {2}\".format(\n",
+    "                    j, self.evaluate(test_data), n_test))\n",
+    "            else:\n",
+    "                print( \"Epoch {0} complete\".format(j))\n",
+    "\n",
+    "    def update_mini_batch(self, mini_batch, eta):\n",
+    "        \"\"\"Update the network's weights and biases by applying\n",
+    "        gradient descent using backpropagation to a single mini batch.\n",
+    "        The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``\n",
+    "        is the learning rate.\"\"\"\n",
+    "        nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
+    "        nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
+    "        for x, y in mini_batch:\n",
+    "            delta_nabla_b, delta_nabla_w = self.backprop(x, y)\n",
+    "            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n",
+    "            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n",
+    "        self.weights = [w-(eta/len(mini_batch))*nw\n",
+    "                        for w, nw in zip(self.weights, nabla_w)]\n",
+    "        self.biases = [b-(eta/len(mini_batch))*nb\n",
+    "                       for b, nb in zip(self.biases, nabla_b)]\n",
+    "\n",
+    "    def backprop(self, x, y):\n",
+    "        \"\"\"Return a tuple ``(nabla_b, nabla_w)`` representing the\n",
+    "        gradient for the cost function C_x.  ``nabla_b`` and\n",
+    "        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar\n",
+    "        to ``self.biases`` and ``self.weights``.\"\"\"\n",
+    "        nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
+    "        nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
+    "        # feedforward\n",
+    "        activation = x\n",
+    "        activations = [x] # list to store all the activations, layer by layer\n",
+    "        zs = [] # list to store all the z vectors, layer by layer\n",
+    "        for b, w in zip(self.biases, self.weights):\n",
+    "            z = np.dot(w, activation)+b\n",
+    "            zs.append(z)\n",
+    "            activation = sigmoid(z)\n",
+    "            activations.append(activation)\n",
+    "        # backward pass\n",
+    "        delta = self.cost_derivative(activations[-1], y) * \\\n",
+    "            sigmoid_prime(zs[-1])\n",
+    "        nabla_b[-1] = delta\n",
+    "        nabla_w[-1] = np.dot(delta, activations[-2].transpose())\n",
+    "        # Note that the variable l in the loop below is used a little\n",
+    "        # differently to the notation in Chapter 2 of the book.  Here,\n",
+    "        # l = 1 means the last layer of neurons, l = 2 is the\n",
+    "        # second-last layer, and so on.  It's a renumbering of the\n",
+    "        # scheme in the book, used here to take advantage of the fact\n",
+    "        # that Python can use negative indices in lists.\n",
+    "        for l in range(2, self.num_layers):\n",
+    "            z = zs[-l]\n",
+    "            sp = sigmoid_prime(z)\n",
+    "            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp\n",
+    "            nabla_b[-l] = delta\n",
+    "            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())\n",
+    "        return (nabla_b, nabla_w)\n",
+    "\n",
+    "    def evaluate(self, test_data):\n",
+    "        \"\"\"Return the number of test inputs for which the neural\n",
+    "        network outputs the correct result. Note that the neural\n",
+    "        network's output is assumed to be the index of whichever\n",
+    "        neuron in the final layer has the highest activation.\"\"\"\n",
+    "        test_results = [(np.argmax(self.feedforward(x)), y)\n",
+    "                        for (x, y) in test_data]\n",
+    "        return sum(int(x == y) for (x, y) in test_results)\n",
+    "\n",
+    "    def cost_derivative(self, output_activations, y):\n",
+    "        \"\"\"Return the vector of partial derivatives \\partial C_x /\n",
+    "        \\partial a for the output activations.\"\"\"\n",
+    "        return (output_activations-y)\n",
+    "\n",
+    "#### Miscellaneous functions\n",
+    "def sigmoid(z):\n",
+    "    \"\"\"The sigmoid function.\"\"\"\n",
+    "    return 1.0/(1.0+np.exp(-z))\n",
+    "\n",
+    "def sigmoid_prime(z):\n",
+    "    \"\"\"Derivative of the sigmoid function.\"\"\"\n",
+    "    return sigmoid(z)*(1-sigmoid(z))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def vectorized_result(j):\n",
+    "    \"\"\"Return a 10-dimensional unit vector with a 1.0 in the jth\n",
+    "    position and zeroes elsewhere.  This is used to convert a digit\n",
+    "    (0...9) into a corresponding desired output from the neural\n",
+    "    network.\"\"\"\n",
+    "    e = np.zeros((10, 1))\n",
+    "    e[j] = 1.0\n",
+    "    return e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "net = Network([784, 30, 10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "training_inputs = [np.reshape(x, (784, 1)) for x in X_train.copy()]\n",
+    "training_results = [vectorized_result(y) for y in y_train.copy()]\n",
+    "training_data = zip(training_inputs, training_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "test_inputs = [np.reshape(x, (784, 1)) for x in X_test.copy()]\n",
+    "test_data = zip(test_inputs, y_test.copy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0: 1348 / 10000\n",
+      "Epoch 1: 1939 / 10000\n",
+      "Epoch 2: 2046 / 10000\n",
+      "Epoch 3: 1422 / 10000\n",
+      "Epoch 4: 1365 / 10000\n",
+      "Epoch 5: 1351 / 10000\n",
+      "Epoch 6: 1879 / 10000\n",
+      "Epoch 7: 1806 / 10000\n",
+      "Epoch 8: 1754 / 10000\n",
+      "Epoch 9: 1974 / 10000\n"
+     ]
+    }
+   ],
+   "source": [
+    "net.SGD(training_data, 10, 10, 3.0, test_data=test_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0: 3526 / 10000\n",
+      "Epoch 1: 3062 / 10000\n",
+      "Epoch 2: 2946 / 10000\n",
+      "Epoch 3: 2462 / 10000\n",
+      "Epoch 4: 3617 / 10000\n",
+      "Epoch 5: 3773 / 10000\n",
+      "Epoch 6: 3568 / 10000\n",
+      "Epoch 7: 4459 / 10000\n",
+      "Epoch 8: 3009 / 10000\n",
+      "Epoch 9: 2660 / 10000\n"
+     ]
+    }
+   ],
+   "source": [
+    "net = Network([784, 10, 10])\n",
+    "\n",
+    "training_inputs = [np.reshape(x, (784, 1)) for x in X_train.copy()]\n",
+    "training_results = [vectorized_result(y) for y in y_train.copy()]\n",
+    "training_data = zip(training_inputs, training_results)\n",
+    "\n",
+    "test_inputs = [np.reshape(x, (784, 1)) for x in X_test.copy()]\n",
+    "test_data = zip(test_inputs, y_test.copy())\n",
+    "\n",
+    "net.SGD(training_data, 10, 10, 1.0, test_data=test_data)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}