Works, but needs to be optimized. Especially the learn-method (i.e.…

…, memory handling etc.)
hallvardnmbu · Mar 1, 2024 · 1b74d8c · 1b74d8c
1 parent 23c43e9
commit 1b74d8c
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 57 deletions.
diff --git a/reinforcement-learning/orion-hpc/README.md b/reinforcement-learning/orion-hpc/README.md
@@ -7,62 +7,62 @@ code and example, but with included logging and model checkpoints.
 Rearrange
 ---------
 
-    Rearrange the file structure in Orion to the following layout:
+Rearrange the file structure in Orion to the following layout:
 
-        ```
-            ./
-            ├── singularity/
-            │   ├── singularity.def
-            │   ├── singularity.sh
-            │   │
-            │   └── singularity.sif
-            │
-            ├── output/
-            │   ├── print_%s.out
-            │   ├── debug.txt
-            │   │
-            │   ├── value-vision-tetris.gif
-            │   ├── value-vision-tetris.png
-            │   │
-            │   ├── weights-{game-number}.pth
-            │   │   ...
-            │   └── weights-final.pth
-            │
-            ├── agent.py
-            ├── train.py
-            └── train.sh
-        ```
+```
+    ./
+    ├── singularity/
+    │   ├── singularity.def
+    │   ├── singularity.sh
+    │   │
+    │   └── singularity.sif
+    │
+    ├── output/
+    │   ├── print.out
+    │   ├── debug.txt
+    │   │
+    │   ├── value-vision-tetris.gif
+    │   ├── value-vision-tetris.png
+    │   │
+    │   ├── weights-{game-number}.pth
+    │   │   ...
+    │   └── weights-final.pth
+    │
+    ├── agent.py
+    ├── train.py
+    └── train.sh
+```
 
-    Note that `./singularity/singularity.sif` must be created as mentioned below. Also note that
-    the files within the `output/` directory are created by the training script.
+Note that `./singularity/singularity.sif` must be created as mentioned below. Also note that
+the files within the `output/` directory are created by the training script.
 
 Execution
 ---------
 
-    Modify the `SIFFILE` path in `./train.sh` to point to the correct singularity file.
-
-        If you do not have a singularity file, you can create one by running:
-
-        ```bash
-            cd singularity
-            sbatch singularity.sh singularity.def
-        ```
-
-        This will create a `singularity.sif`-file in the `./singularity/` directory. You can then
-        reference this file in `./train.sh` by setting `SIFFILE` to `./singularity/singularity.sif`.
+Modify the `SIFFILE` path in `./train.sh` to point to the correct singularity file.
+
+If you do not have a singularity file, you can create one by running:
+
+```bash
+cd singularity
+sbatch singularity.sh singularity.def
+```
 
-    The job defined in `./train.sh` and is submitted by running:
+This will create a `singularity.sif`-file in the `./singularity/` directory. You can then
+reference this file in `./train.sh` by setting `SIFFILE` to `./singularity/singularity.sif`.
+
+The job defined in `./train.sh` and is submitted by running:
 
-        ```bash
-            sbatch train.sh
-        ```
+```bash
+sbatch train.sh
+```
 
 Notes
 -----
 
-    The checkpointed weights are saved to `./output/weights-{game-number}.pth`. And the final weights
-    are saved to `./output/weights-final.pth`.
-    
-    The log messages (i.e., debug and info) are written to `./output/debug.txt`.
-    
-    Printouts are saved to `./output/print.out`.
+The checkpointed weights are saved to `./output/weights-{game-number}.pth`. And the final weights
+are saved to `./output/weights-final.pth`.
+
+The log messages (i.e., debug and info) are written to `./output/debug.txt`.
+
+Printouts are saved to `./output/print.out`.
diff --git a/reinforcement-learning/orion-hpc/scripts/agent.py b/reinforcement-learning/orion-hpc/scripts/agent.py
@@ -236,6 +236,17 @@ def learn(self, network):
         memory = random.sample(self.memory["memory"],
                                min(self.memory["batch_size"], len(self.memory["memory"])))
 
+        # ******************************************************************************************
+        # ******************************************************************************************
+        # ******************************************************************************************
+        #
+        # ORDNE OPP HER.
+        # Her kan det optimaliseres. Se på lagring og henting av minnet.
+        #
+        # ******************************************************************************************
+        # ******************************************************************************************
+        # ******************************************************************************************
+
         states = torch.cat([torch.stack(game.state).squeeze() for game in memory]).unsqueeze(1)
         actions = torch.cat([torch.stack(game.action) for game in memory]).to(self.device)
         new_states = torch.cat([torch.stack(game.new_state).squeeze()
@@ -276,20 +287,21 @@ def learn(self, network):
         #
         # where Q' is a copy of the agent, which is updated every C steps.
 
-        actual = self(states).gather(1, actions.view(-1, 1))
+        with torch.cuda.amp.autocast():
+            actual = self(states).gather(1, actions.view(-1, 1))
 
-        with torch.no_grad():
-            optimal = (rewards +
-                       self.parameter["gamma"] * network(new_states).max(1).values.view(-1, 1))
+            with torch.no_grad():
+                optimal = (rewards +
+                           self.parameter["gamma"] * network(new_states).max(1).values.view(-1, 1))
 
-        # As Google DeepMind suggests, the optimal Q-value is set to r if the game is over.
-        for step in steps:
-            optimal[step] = rewards[step]
+            # As Google DeepMind suggests, the optimal Q-value is set to r if the game is over.
+            for step in steps:
+                optimal[step] = rewards[step]
 
         # BACKPROPAGATION
         # ------------------------------------------------------------------------------------------
 
-        loss = torch.nn.functional.mse_loss(actual, optimal)
+            loss = torch.nn.functional.mse_loss(actual, optimal)
 
         self.optimizer.zero_grad()
         loss.backward()

diff --git a/reinforcement-learning/orion-hpc/scripts/train.py b/reinforcement-learning/orion-hpc/scripts/train.py
@@ -31,7 +31,6 @@
 # Parameters
 
 GAMES = 25000
-FRAMESKIP = 4  # Repeat action for n frames
 
 DISCOUNT = 0.99  # Discount rate for rewards
 GAMMA = 0.99  # Discount rate for Q-learning
@@ -50,7 +49,6 @@
 
 NETWORK = {
     "input_channels": 1, "outputs": 5,
-    "frames": FRAMESKIP,
     "channels": [32, 64, 64],
     "kernels": [5, 3, 3],
     "strides": [3, 2, 1],