Minor changes in examples to correspond with respective train.py sc…

…ripts.
hallvardnmbu · Mar 13, 2024 · 3fc2006 · 3fc2006
1 parent 8b8f9af
commit 3fc2006
Show file tree

Hide file tree

Showing 6 changed files with 331 additions and 105 deletions.
diff --git a/breakout/results/example.ipynb b/breakout/results/example.ipynb
diff --git a/breakout/train.py b/breakout/train.py
@@ -54,9 +54,9 @@
 # OPTIMIZER : A dictionary defining the optimizer used in training.
 # METRICS : The file path where the metrics are saved.
 
-GAMES = 10000
-SKIP = 6
-CHECKPOINT = 1000
+GAMES = 25000
+SKIP = 4
+CHECKPOINT = 2500
 
 SHAPE = {
     "original": (1, 1, 210, 160),
@@ -66,34 +66,34 @@
 
 DISCOUNT = 0.95
 GAMMA = 0.99
-GRADIENTS = (-10, 10)
+GRADIENTS = (-1, 1)
 
 PUNISHMENT = -1
-INCENTIVE = 1
+INCENTIVE = 3
 
 MINIBATCH = 32
 TRAIN_EVERY = 1
 
 EXPLORATION_RATE = 1.0
-EXPLORATION_MIN = 0.005
-EXPLORATION_STEPS = 8000 // TRAIN_EVERY
+EXPLORATION_MIN = 0.08
+EXPLORATION_STEPS = 5000 // TRAIN_EVERY
 
 MIN_REWARD = lambda game: 1.7 ** (game / 1000) if game <= 4000 else 10
-MEMORY = 250
-RESET_Q_EVERY = TRAIN_EVERY * 5
+MEMORY = 1000
+RESET_Q_EVERY = TRAIN_EVERY * 250
 
 NETWORK = {
     "input_channels": 1, "outputs": 4,
     "channels": [32, 64, 64],
     "kernels": [8, 4, 3],
     "padding": ["valid", "valid", "valid"],
     "strides": [4, 2, 1],
-    "nodes": [512],
+    "nodes": [1024, 512],
 }
 OPTIMIZER = {
-    "optimizer": torch.optim.RMSprop,
+    "optimizer": torch.optim.Adam,
     "lr": 0.0001,
-    "hyperparameters": {}
+    "hyperparameters": {"eps": 1.5e-4}
 }
 
 METRICS = "./output/metrics.csv"

diff --git a/enduro/results/example.ipynb b/enduro/results/example.ipynb
@@ -101,11 +101,7 @@
     "    \"strides\": [4, 2, 1],\n",
     "    \"nodes\": [512],\n",
     "}\n",
-    "optimizer = {\n",
-    "    \"optimizer\": torch.optim.RMSprop,\n",
-    "    \"lr\": 0.0001,\n",
-    "    \"hyperparameters\": {}\n",
-    "}\n",
+    "optimizer = {\"optimizer\": torch.optim.RMSprop, \"lr\": 0.0001}\n",
     "\n",
     "agent = Agent(\n",
     "    network=network,\n",

diff --git a/help/visualisation/preprocessing.ipynb b/help/visualisation/preprocessing.ipynb
diff --git a/tetris/DQN.py b/tetris/DQN.py
@@ -332,7 +332,7 @@ def reward(self, state, reward):
         state = self.preprocess(state)
 
         height = 0
-        for i, row in enumerate(state.__reversed__()):
+        for i, row in enumerate(reversed(state)):
             if all(row == -1):
                 height = state.shape[0] - i
                 break

diff --git a/tetris/results/example.ipynb b/tetris/results/example.ipynb