From 355b588c9bd186243b3a1c157838892ccfb79d86 Mon Sep 17 00:00:00 2001 From: Csxeba Date: Sat, 29 Sep 2018 10:14:19 +0200 Subject: [PATCH] Fixed epsilon decay --- reinforcement/qlearning.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reinforcement/qlearning.py b/reinforcement/qlearning.py index 96a1a8f..927cee5 100644 --- a/reinforcement/qlearning.py +++ b/reinforcement/qlearning.py @@ -29,7 +29,7 @@ def sample(self, state, reward): self.rewards.append(reward) Q = self.net.predict(state[None, ...])[0] self.predictions.append(Q) - action = (np.argmax(Q) if np.random.uniform() > self.cfg.decaying_epsilon + action = (np.argmax(Q) if np.random.uniform() > self.cfg.epsilon else np.random.randint(0, self.num_actions)) self.actions.append(action) return action @@ -57,6 +57,7 @@ def accumulate(self, state, reward): Y[range(len(Y)), ix] = -(R + Y.max(axis=1) * self.cfg.gamma) Y[-1, ix[-1]] = -reward self.xp.remember(X, Y) + self.cfg.epsilon *= self.cfg.epsilon_decay self.reset() def accumulate_multiple(self, states, rewards):