diff --git a/layers/abstract_layer.py b/layers/abstract_layer.py index f314047..a71a35d 100644 --- a/layers/abstract_layer.py +++ b/layers/abstract_layer.py @@ -66,7 +66,7 @@ def gradients(self): return self.get_gradients(unfold=True) @property - def nparams(self): + def num_params(self): return self.weights.size + self.biases.size @abc.abstractmethod diff --git a/learner/__init__.py b/learner/__init__.py index 20f41fd..0e2ed76 100644 --- a/learner/__init__.py +++ b/learner/__init__.py @@ -2,3 +2,4 @@ from .neuroevolution import NeuroEvolution from .abstract_learner import Learner from .feedback_alignment import DirectFeedbackAlignment +from .extreme_learning_machine import ExtremeLearningMachine diff --git a/learner/backpropagation.py b/learner/backpropagation.py index 0e9b270..8b32ba8 100644 --- a/learner/backpropagation.py +++ b/learner/backpropagation.py @@ -11,7 +11,7 @@ def __init__(self, layerstack, cost="mse", optimizer="sgd", name="", **kw): self.optimizer = ( optimizer if isinstance(optimizer, GradientDescent) else optimizers[optimizer]() ) - self.optimizer.initialize(nparams=self.layers.nparams) + self.optimizer.initialize(nparams=self.layers.num_params) def learn_batch(self, X, Y, w=None): m = len(X) @@ -49,4 +49,4 @@ def get_gradients(self, unfold=True): @property def nparams(self): - return self.layers.nparams + return self.layers.num_params diff --git a/learner/extreme_learning_machine.py b/learner/extreme_learning_machine.py new file mode 100644 index 0000000..7ec93b8 --- /dev/null +++ b/learner/extreme_learning_machine.py @@ -0,0 +1,32 @@ +import numpy as np +from .abstract_learner import Learner + + +class ExtremeLearningMachine(Learner): + + def __init__(self, layers, cost="mse", name="", solve_mode="pseudoinverse", **kw): + super().__init__(layers, cost, name, **kw) + self.solve = { + "pseudoinverse": self.solve_with_pseudo_inverse, + "covariance": self.solve_with_covariance_matrices, + "correlation": self.solve_with_covariance_matrices + }[solve_mode] + for layer in layers[:-1]: + layer.trainable = False + + def solve_with_pseudo_inverse(self, Z, Y): + A = np.linalg.pinv(Z) + Wo = A @ Y + self.layers[-1].set_weights([Wo, np.array([0] * self.layers[-1].neurons)], fold=False) + + def solve_with_covariance_matrices(self, Z, Y): + A = np.cov(Z.T) + B = np.cov(Z.T, Y.T) + W = np.invert(A) @ B + self.layers[-1].set_weights([W, np.array([0] * self.layers[-1].neurons)], fold=False) + + def learn_batch(self, X, Y, **kwarg): + H = X.copy() + for layer in self.layers[:-1]: + H = layer.feedforward(H) + self.solve(H, Y) diff --git a/learner/feedback_alignment.py b/learner/feedback_alignment.py index 548a39f..70f8b96 100644 --- a/learner/feedback_alignment.py +++ b/learner/feedback_alignment.py @@ -8,12 +8,19 @@ class DirectFeedbackAlignment(BackpropNetwork): def __init__(self, layerstack, cost, optimizer, name="", **kw): super().__init__(layerstack, cost, optimizer, name, **kw) - self.backwards_weights = [white(self.outshape[0], np.prod(layer.outshape)) - for layer in self.trainable_layers[:-1]] + self.backwards_weights = np.concatenate( + [white(self.outshape[0], np.prod(layer.outshape)) + for layer in self.trainable_layers[:-1]] + ) def backpropagate(self, error): m = len(error) self.layers[-1].backpropagate(error) - for layer, weight in zip(list(self.trainable_layers)[:-1], self.backwards_weights): - delta = error @ weight + all_deltas = error @ self.backwards_weights # [m x net_out] [net_out x [layer_outs]] = [m x [layer_outs]] + start = 0 + for layer in self.trainable_layers[1:-1]: + num_deltas = np.prod(layer.outshape) + end = start + num_deltas + delta = all_deltas[:, num_deltas] layer.backpropagate(delta.reshape((m,) + layer.outshape)) + start = end diff --git a/learner/neuroevolution.py b/learner/neuroevolution.py index 3742d69..f103e5e 100644 --- a/learner/neuroevolution.py +++ b/learner/neuroevolution.py @@ -10,7 +10,7 @@ def __init__(self, layerstack, cost="mse", population_size=100, name="", **kw): fw = kw.pop("fitness_weights", [1.]) oa = kw.pop("on_accuracy", False) self.population = Population( - loci=self.layers.nparams, + loci=self.layers.num_params, fitness_function=ff, fitness_weights=fw, limit=population_size, **kw diff --git a/model/graph.py b/model/graph.py index 8c12c33..bb658dc 100644 --- a/model/graph.py +++ b/model/graph.py @@ -23,7 +23,7 @@ def outshape(self): @property def nparams(self): - return sum(node.nparams for node in self.nodes) + return sum(node.num_params for node in self.nodes) def get_weights(self, unfold=True): return [] diff --git a/model/layerstack.py b/model/layerstack.py index ea8bcff..30e75d5 100644 --- a/model/layerstack.py +++ b/model/layerstack.py @@ -52,7 +52,7 @@ def set_weights(self, ws, fold=True): if fold: start = 0 for layer in trl: - end = start + layer.nparams + end = start + layer.num_params layer.set_weights(ws[start:end]) start = end else: @@ -72,7 +72,7 @@ def outshape(self): @property def nparams(self): - return sum(layer.nparams for layer in self.layers if layer.trainable) + return sum(layer.num_params for layer in self.layers if layer.trainable) @property def output(self): diff --git a/reinforcement/abstract_agent.py b/reinforcement/abstract_agent.py index 3bf2556..53d6987 100644 --- a/reinforcement/abstract_agent.py +++ b/reinforcement/abstract_agent.py @@ -30,8 +30,8 @@ def sample(self, state, reward): def accumulate(self, state, reward): raise NotImplementedError - def learn_batch(self): - X, Y = self.xp.replay(self.cfg.bsize) + def learn_batch(self, batch_size=None): + X, Y = self.xp.replay(batch_size or self.cfg.bsize) N = len(X) if N < self.cfg.bsize: return 0. diff --git a/reinforcement/policygradient.py b/reinforcement/policygradient.py index a955a75..dc3b0a6 100644 --- a/reinforcement/policygradient.py +++ b/reinforcement/policygradient.py @@ -16,7 +16,7 @@ def __init__(self, network, nactions, agentconfig=None, **kw): self.X = [] self.Y = [] self.rewards = [] - self.grad = np.zeros((network.nparams,)) + self.grad = np.zeros((network.num_params,)) def reset(self): self.X = [] diff --git a/reinforcement/qlearning.py b/reinforcement/qlearning.py index a05b600..0d684a0 100644 --- a/reinforcement/qlearning.py +++ b/reinforcement/qlearning.py @@ -40,13 +40,11 @@ def accumulate(self, state, reward): Q = np.stack(self.predictions[1:] + [q], axis=0) R = np.array(self.rewards[1:] + [reward]) ix = tuple(self.actions) - Y = Q.copy() + Y = Q Y[range(len(Y)), ix] = -(R + Y.max(axis=1) * self.cfg.gamma) Y[-1, ix[-1]] = -reward self.xp.remember(X, Y) self.reset() - cost = self.learn_batch() - return cost class DDQN(DQN): diff --git a/xperiments/xp_elm.py b/xperiments/xp_elm.py index 8cab467..831e743 100644 --- a/xperiments/xp_elm.py +++ b/xperiments/xp_elm.py @@ -1,19 +1,37 @@ import numpy as np -from csxdata.utilities.loader import pull_mnist_data +from keras.datasets import mnist from brainforge import LayerStack from brainforge.layers import DenseLayer -from brainforge.learner.elm import ExtremeLearningMachine +from brainforge.learner.extreme_learning_machine import ExtremeLearningMachine -lX, lY, tX, tY = pull_mnist_data() +def pull_mnist(split=0.1): + learning, testing = mnist.load_data() + X = np.concatenate([learning[0], testing[0]]).astype("float32") + Y = np.concatenate([learning[1], testing[1]]).astype("uint8") + X -= X.mean() + X /= X.std() + X = X.reshape(-1, 784) + Y = np.eye(10)[Y] + + if split: + arg = np.arange(len(X)) + np.random.shuffle(arg) + div = int(len(X) * split) + targ, larg = arg[:div], arg[div:] + return X[larg], Y[larg], X[targ], Y[targ] + + return X, Y + layers = LayerStack(input_shape=(784,), layers=[ DenseLayer(60, activation="tanh", trainable=False), DenseLayer(10, activation="linear", trainable=True) ]) +lX, lY, tX, tY = pull_mnist(0.1) elm = ExtremeLearningMachine(layers, cost="mse") elm.learn_batch(tX, tY) pred = elm.predict(tX)