Skip to content

Commit

Permalink
Fixed epsilon decay
Browse files Browse the repository at this point in the history
  • Loading branch information
csxeba committed Sep 29, 2018
1 parent cb027f0 commit 355b588
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion reinforcement/qlearning.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def sample(self, state, reward):
self.rewards.append(reward)
Q = self.net.predict(state[None, ...])[0]
self.predictions.append(Q)
action = (np.argmax(Q) if np.random.uniform() > self.cfg.decaying_epsilon
action = (np.argmax(Q) if np.random.uniform() > self.cfg.epsilon
else np.random.randint(0, self.num_actions))
self.actions.append(action)
return action
Expand Down Expand Up @@ -57,6 +57,7 @@ def accumulate(self, state, reward):
Y[range(len(Y)), ix] = -(R + Y.max(axis=1) * self.cfg.gamma)
Y[-1, ix[-1]] = -reward
self.xp.remember(X, Y)
self.cfg.epsilon *= self.cfg.epsilon_decay
self.reset()

def accumulate_multiple(self, states, rewards):
Expand Down

0 comments on commit 355b588

Please sign in to comment.