manage illegal move

DHDev0 · Jan 19, 2023 · d930293 · d930293
1 parent af43deb
commit d930293
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -167,6 +167,7 @@ Core Muzero feature:
 * [x] MCTS with 0 simulation (use of prior) or any number of simulation.
 * [x] Model weights automatically saved at best selfplay average reward.
 * [x] Priority or Uniform for sampling in replay buffer.
+* [X] Manage illegal move with negative reward.
 * [X] Scale the loss using the importance sampling ratio.
 * [x] Custom "Loss function" class to apply transformation and loss on label/prediction.
 * [X] Load your pretrained model from tag number.

diff --git a/game.py b/game.py
@@ -117,9 +117,19 @@ def observation(self,observation_shape=None,
         #manage feedback observation
         else:
             state = feedback[0]
-            
+        self.feedback_state = state
         return state
 
+    def step(self,action):
+        try: 
+            next_step = (self.env.step(action))
+        except:
+            obs = self.feedback_state
+            reward = min(-len(self.rewards),-self.limit_of_game_play,-1)
+            done = self.done
+            next_step = (obs,reward,done)
+        return next_step
+
     def close(self):
         return self.env.close()
 
@@ -229,7 +239,7 @@ def policy_step(self, root = None , temperature = 0 , feedback = None, iteration
             # # # apply mouve and return variable of the env
             # # # save game variable to a list to return them 
             #contain [observation, reward, done, info] + [meta_data for som gym env]
-            step_output = (self.env.step(self.action_map[selected_action]))
+            step_output = self.step(self.action_map[selected_action])
 
             #Get the new observation generate by step 
             if self.rgb_observation :