Buggy metrics

csxeba · Jun 24, 2019 · 4cab97d · 4cab97d
1 parent f12e5ab
commit 4cab97d
Show file tree

Hide file tree

Showing 29 changed files with 242 additions and 143 deletions.
diff --git a/brainforge/atomic/core_op.py b/brainforge/atomic/core_op.py
@@ -25,8 +25,8 @@ class ReshapeOp:
     type = "Reshape"
 
     @staticmethod
-    def forward(X, outshape):
-        return X.reshape(X.shape[0], *outshape)
+    def forward(X: np.ndarray, outshape: tuple):
+        return X.reshape(-1, *outshape)
 
     @staticmethod
     def backward(E, inshape):

diff --git a/brainforge/atomic/tensor_op.py b/brainforge/atomic/tensor_op.py
@@ -20,17 +20,15 @@ def valid(A, F):
             err += "input depth: {} != {} :filter depth".format(ic, fc)
             raise ValueError(err)
 
-        for i, pic in enumerate(A):
-            for sy in range(oy):
-                for sx in range(ox):
-                    rfields[i][sy*ox + sx] = pic[:, sy:sy+fy, sx:sx+fx].ravel()
+        for i, sy, sx in ((idx, shy, shx) for shx in range(ox) for shy in range(oy) for idx in range(im)):
+            rfields[i][sy*ox + sx] = A[i, :, sy:sy+fy, sx:sx+fx].ravel()
 
-        output = np.zeros((im, oy*ox, nf))
-        for m in range(im):
-            output[m] = np.dot(rfields[m], Frsh.T)
+        # output = np.zeros((im, oy*ox, nf))
+        # for m in range(im):
+        #     output[m] = np.dot(rfields[m], Frsh.T)
 
-        # output = np.matmul(rfields, F.reshape(nf, recfield_size).T)
-        output = output.transpose((0, 2, 1)).reshape(im, nf, oy, ox)
+        output = np.matmul(rfields, Frsh.T)
+        output = output.transpose((0, 2, 1)).reshape((im, nf, oy, ox))
         return output
 
     @staticmethod
@@ -82,6 +80,15 @@ class MaxPoolOp:
     def __str__(self):
         return "MaxPool"
 
+    @staticmethod
+    def predict(A):
+        return np.max([
+            A[:, :, 0::2, 0::2],
+            A[:, :, 0::2, 1::2],
+            A[:, :, 1::2, 0::2],
+            A[:, :, 1::2, 1::2],
+        ], axis=0)
+
     @staticmethod
     def forward(A, fdim):
         im, ic, iy, ix = A.shape

diff --git a/brainforge/cost/__init__.py b/brainforge/cost/__init__.py
diff --git a/brainforge/layers/tensor.py b/brainforge/layers/tensor.py
@@ -1,3 +1,4 @@
+from brainforge.util import emptyX
 from .abstract_layer import LayerBase, NoParamMixin
 from ..util import zX, zX_like, white
 
@@ -25,21 +26,10 @@ def connect(self, brain):
         self.output = zX(ic, iy // self.fdim, ix // self.fdim)
 
     def feedforward(self, questions):
-        """
-        Implementation of a max pooling layer.
-
-        :param questions: numpy.ndarray, a batch of outsize from the previous layer
-        :return: numpy.ndarray, max pooled batch
-        """
         self.output, self.filter = self.op.forward(questions, self.fdim)
         return self.output
 
     def backpropagate(self, delta):
-        """
-        Calculates the error of the previous layer.
-        :param delta:
-        :return: numpy.ndarray, the errors of the previous layer
-        """
         return self.op.backward(delta, self.filter)
 
     @property
@@ -101,3 +91,20 @@ def outshape(self):
 
     def __str__(self):
         return "Conv({}x{}x{})-{}".format(self.nfilters, self.fy, self.fx, str(self.activation)[:4])
+
+
+class GlobalAveragePooling(NoParamMixin, LayerBase):
+
+    def __init__(self):
+        super().__init__()
+        self.dynamic_input_shape = None
+
+    def feedforward(self, X):
+        self.dynamic_input_shape = X.shape
+        return X.mean(axis=(2, 3))
+
+    def backpropagate(self, delta):
+        canvas = emptyX(*self.inputs.shape)
+        nxy = self.dynamic_input_shape[-2] * self.dynamic_input_shape[-1]
+        for mm, cc in ((m, c) for c in range(delta.shape[1]) for m in range(delta.shape[0])):
+            canvas.flat[mm, cc] = delta[mm, cc] / nxy
diff --git a/brainforge/learner/abstract_learner.py b/brainforge/learner/abstract_learner.py
@@ -1,10 +1,6 @@
-import abc
-
-import numpy as np
-
 from ..model.layerstack import LayerStack
-from ..cost import costs, CostFunction
-from ..util import batch_stream
+from ..metrics import costs as _costs, metrics as _metrics
+from ..util import batch_stream, logging
 
 
 class Learner:
@@ -17,83 +13,81 @@ def __init__(self, layerstack, cost="mse", name="", **kw):
         self.layers = layerstack
         self.name = name
         self.age = 0
-        self.cost = cost if isinstance(cost, CostFunction) else costs[cost]
+        self.cost = _costs.get(cost)
 
-    def fit_generator(self, generator, lessons_per_epoch, epochs=30, classify=True, validation=(), verbose=1, **kw):
-        epcosts = []
+    def fit_generator(self, generator, lessons_per_epoch, epochs=30, metrics=(), validation=(), verbose=1, **kw):
+        metrics = [_metrics.get(metric) for metric in metrics]
+        history = logging.MetricLogs.from_metric_list(lessons_per_epoch, ("cost",), metrics)
         lstr = len(str(epochs))
         for epoch in range(1, epochs+1):
             if verbose:
                 print("Epoch {:>{w}}/{}".format(epoch, epochs, w=lstr))
-            epcosts += self.epoch(generator, no_lessons=lessons_per_epoch, classify=classify,
-                                  validation=validation, verbose=verbose, **kw)
-        return epcosts
+            epoch_history = self.epoch(generator, updates_per_epoch=lessons_per_epoch, metrics=metrics,
+                                       validation=validation, verbose=verbose, **kw)
+            history.update(epoch_history)
+
+        return history
 
-    def fit(self, X, Y, batch_size=20, epochs=30, classify=True, validation=(), verbose=1, shuffle=True, **kw):
+    def fit(self, X, Y, batch_size=20, epochs=30, metrics=(), validation=(), verbose=1, shuffle=True, **kw):
+        metrics = [_metrics.get(metric) for metric in metrics]
         datastream = batch_stream(X, Y, m=batch_size, shuffle=shuffle)
-        return self.fit_generator(datastream, len(X), epochs, classify, validation, verbose, **kw)
+        return self.fit_generator(datastream, len(X) // batch_size, epochs, metrics, validation, verbose, **kw)
 
-    def epoch(self, generator, no_lessons, classify=True, validation=None, verbose=1, **kw):
-        losses = []
+    def epoch(self, generator, updates_per_epoch, metrics=(), validation=None, verbose=1, **kw):
+        metrics = [_metrics.get(metric) for metric in metrics]
+        history = logging.MetricLogs.from_metric_list(updates_per_epoch, ["cost"], metrics)
         done = 0
 
         self.layers.learning = True
-        while done < no_lessons:
+        batch_size = 0
+        for i in range(updates_per_epoch):
             batch = next(generator)
-            cost = self.learn_batch(*batch, **kw)
-            losses.append(cost)
-
-            done += len(batch[0])
+            batch_size = len(batch[0])
+            epoch_metrics = self.learn_batch(*batch, metrics=metrics, **kw)
+            history.record(epoch_metrics)
             if verbose:
-                print("\rDone: {0:>6.1%} Cost: {1: .5f}\t "
-                      .format(done/no_lessons, np.mean(losses)), end="")
+                history.log(prefix="\r", end="")
+
         self.layers.learning = False
-        if verbose:
-            print("\rDone: {0:>6.1%} Cost: {1: .5f}\t ".format(1., np.mean(losses)), end="")
-            if validation:
-                self._print_progress(validation, classify)
+        if verbose and validation:
+            history = self.evaluate(*validation, batch_size=batch_size, metrics=metrics)
+            history.log(prefix=" ", suffix="")
             print()
 
-        self.age += no_lessons
-        return losses
-
-    def _print_progress(self, validation, classify):
-        results = self.evaluate(*validation, classify=classify)
-
-        chain = "Testing cost: {0:.5f}"
-        if classify:
-            tcost, tacc = results
-            accchain = " accuracy: {0:.2%}".format(tacc)
-        else:
-            tcost = results
-            accchain = ""
-        print(chain.format(tcost) + accchain, end="")
+        self.age += updates_per_epoch
+        return history
 
     def predict(self, X):
         return self.layers.feedforward(X)
 
-    def evaluate(self, X, Y, batch_size=32, classify=True, shuffle=False, verbose=False):
+    def evaluate_batch(self, x, y, metrics=()):
+        m = len(x)
+        preds = self.predict(x)
+        eval_metrics = {"cost": self.cost(self.output, y) / m}
+        if metrics:
+            for metric in metrics:
+                eval_metrics[str(metric).lower()] = metric(preds, y) / m
+        return eval_metrics
+
+    def evaluate(self, X, Y, batch_size=32, metrics=(), verbose=False):
+        metrics = [_metrics.get(metric) for metric in metrics]
         N = X.shape[0]
-        batches = batch_stream(X, Y, m=batch_size, shuffle=shuffle, infinite=False)
+        batch_size = min(batch_size, N)
+        steps = int(round(N / batch_size))
+        history = logging.MetricLogs.from_metric_list(steps, ["cost"], metrics)
 
-        cost, acc = [], []
-        for bno, (x, y) in enumerate(batches, start=1):
+        for x, y in batch_stream(X, Y, m=batch_size, shuffle=False, infinite=False):
+            eval_metrics = self.evaluate_batch(x, y, metrics)
+            history.record(eval_metrics)
             if verbose:
-                print("\rEvaluating: {:>7.2%}".format((bno*batch_size) / N), end="")
-            pred = self.predict(x)
-            cost.append(self.cost(pred, y) / len(x))
-            if classify:
-                pred_classes = np.argmax(pred, axis=1)
-                trgt_classes = np.argmax(y, axis=1)
-                eq = np.equal(pred_classes, trgt_classes)
-                acc.append(eq.mean())
-        results = np.mean(cost)
-        if classify:
-            results = (results, np.mean(acc))
-        return results
-
-    @abc.abstractmethod
-    def learn_batch(self, X, Y, **kw):
+                history.log("\r", end="")
+
+        if verbose:
+            print()
+        history.reduce_mean()
+        return history
+
+    def learn_batch(self, X, Y, metrics=(), **kw) -> dict:
         raise NotImplementedError
 
     @property
@@ -106,7 +100,7 @@ def outshape(self):
 
     @property
     def num_params(self):
-        return self.layers.nparams
+        return self.layers.num_params
 
     @property
     def trainable_layers(self):

diff --git a/brainforge/learner/backpropagation.py b/brainforge/learner/backpropagation.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from .abstract_learner import Learner
-from brainforge.optimization import optimizers, GradientDescent
+from ..optimizers import optimizers, GradientDescent
 
 
 class BackpropNetwork(Learner):
@@ -13,15 +13,19 @@ def __init__(self, layerstack, cost="mse", optimizer="sgd", name="", **kw):
         )
         self.optimizer.initialize(nparams=self.layers.num_params)
 
-    def learn_batch(self, X, Y, w=None):
+    def learn_batch(self, X, Y, w=None, metrics=()):
         m = len(X)
         preds = self.predict(X)
         delta = self.cost.derivative(preds, Y)
         if w is not None:
             delta *= w[:, None]
         self.backpropagate(delta)
         self.update(m)
-        return self.cost(self.output, Y) / m
+        train_metrics = {"cost": self.cost(self.output, Y) / m}
+        if metrics:
+            for metric in metrics:
+                train_metrics[str(metric).lower()] = metric(preds, Y) / m
+        return train_metrics
 
     def backpropagate(self, error):
         for layer in self.layers[-1:0:-1]:

diff --git a/brainforge/metrics/__init__.py b/brainforge/metrics/__init__.py
@@ -0,0 +1,3 @@
+from .costs import mean_squared_error, categorical_crossentropy, binary_crossentropy, hinge
+from .costs import mse, cxent, bxent
+from .metrics import accuracy
diff --git a/brainforge/cost/_costs.py → brainforge/metrics/costs.py b/brainforge/cost/_costs.py → brainforge/metrics/costs.py
@@ -1,5 +1,3 @@
-import abc
-
 import numpy as np
 
 from ..util.typing import scalX
@@ -10,7 +8,7 @@
 s2 = scalX(2.)
 
 
-class CostFunction(abc.ABC):
+class CostFunction:
 
     def __call__(self, outputs, targets): pass
 
@@ -22,7 +20,7 @@ def derivative(outputs, targets):
         return outputs - targets
 
 
-class MeanSquaredError(CostFunction):
+class _MeanSquaredError(CostFunction):
 
     def __call__(self, outputs, targets):
         return s05 * np.linalg.norm(outputs - targets) ** s2
@@ -32,7 +30,7 @@ def true_derivative(outputs, targets):
         return outputs - targets
 
 
-class CategoricalCrossEntropy(CostFunction):
+class _CategoricalCrossEntropy(CostFunction):
 
     def __call__(self, outputs: np.ndarray, targets: np.ndarray):
         return -(targets * np.log(outputs)).sum()
@@ -44,7 +42,7 @@ def true_derivative(outputs, targets):
         return enum / denom
 
 
-class BinaryCrossEntropy(CostFunction):
+class _BinaryCrossEntropy(CostFunction):
 
     def __call__(self, outputs: np.ndarray, targets: np.ndarray):
         return -(targets * np.log(outputs) + (s1 - targets) * np.log(s1 - outputs)).sum()
@@ -54,7 +52,7 @@ def true_derivative(outputs, targets):
         raise NotImplementedError
 
 
-class Hinge(CostFunction):
+class _Hinge(CostFunction):
 
     def __call__(self, outputs, targets):
         return (np.maximum(s0, s1 - targets * outputs)).sum()
@@ -70,11 +68,22 @@ def derivative(outputs, targets):
         return out
 
 
-mean_squared_error = MeanSquaredError()
-categorical_crossentropy = CategoricalCrossEntropy()
-binary_crossentropy = BinaryCrossEntropy()
-hinge = Hinge()
+mean_squared_error = _MeanSquaredError()
+categorical_crossentropy = _CategoricalCrossEntropy()
+binary_crossentropy = _BinaryCrossEntropy()
+hinge = _Hinge()
 
 mse = mean_squared_error
 cxent = categorical_crossentropy
-bxent = binary_crossentropy
+bxent = binary_crossentropy
+
+_costs = {k: v for k, v in locals().items() if k[0] != "_" and k != "CostFunction"}
+
+
+def get(cost_function):
+    if isinstance(cost_function, CostFunction):
+        return cost_function
+    cost = _costs.get(cost_function)
+    if cost is None:
+        raise ValueError("No such cost function: {}".format(cost))
+    return cost