Fix github actions errors.

hallvardnmbu · Feb 5, 2024 · e4a25ed · e4a25ed
1 parent 897147e
commit e4a25ed
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 24 deletions.
diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml
@@ -17,7 +17,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pylint flake8
+        pip install pylint flake8 numpy torch
 
     # Run Pylint and Flake8 simultaneously using background tasks and redirect output to files.
     - name: Run Pylint and Flake8

diff --git a/reinforcement-learning/agents.py b/reinforcement-learning/agents.py
@@ -1,15 +1,17 @@
+"""Reinforcement learning Agents."""
+
 from abc import ABC, abstractmethod
 import numpy as np
 import torch
 
 
 class Agent(ABC, torch.nn.Module):
+    """Base Agent for reinforcement learning."""
     def __init__(self,
                  inputs=4,
                  outputs=2,
                  optimizer=torch.optim.RMSprop,
                  lr=0.00025,
-                 discount=0.99
                  ):
         """
         Base Agent for reinforcement learning.
@@ -24,12 +26,8 @@ def __init__(self,
             Optimizer for the Agent to learn.
         lr : float, optional
             Learning rate for the optimizer.
-        discount : float, optional
-            Discount factor for future rewards.
-            --> 0: only consider immediate rewards
-            --> 1: consider all future rewards equally
         """
-        super(Agent, self).__init__()
+        super().__init__()
 
         # ARCHITECTURE
         # --------------------------------------------------
@@ -40,8 +38,12 @@ def __init__(self,
 
         # LEARNING
         # --------------------------------------------------
+        # discount : float
+        #     Discount factor for future rewards.
+        #     --> 0: only consider immediate rewards
+        #     --> 1: consider all future rewards equally
 
-        self.discount = discount
+        self.discount = 0.99
         self.optimizer = optimizer(self.parameters(), lr=lr)
 
         self.memory = {}
@@ -80,7 +82,6 @@ def action(self, state):
         action : int
             Selected action.
         """
-        pass
 
     @abstractmethod
     def learn(self):
@@ -92,28 +93,28 @@ def learn(self):
         float
             Either the gradient, loss, Q-value, etc.
         """
-        pass
 
     @abstractmethod
-    def memorize(self, *args):
+    def memorize(self, *args, **kwargs):
         """
         Abstract method for memorizing.
 
         Parameters
         ----------
         *args : list
-            Observation, action, reward, etc.
+            Positional arguments to memorize.
+        **kwargs : dict
+            Keyword arguments to memorize.
         """
-        pass
 
 
 class PolicyGradientAgent(Agent):
+    """Policy-based Agent for reinforcement learning."""
     def __init__(self,
                  inputs=4,
                  outputs=2,
                  optimizer=torch.optim.RMSprop,
                  lr=0.00025,
-                 discount=0.99
                  ):
         """
         Policy-based Agent for reinforcement learning.
@@ -128,12 +129,8 @@ def __init__(self,
             Optimizer for the Agent to learn.
         lr : float, optional
             Learning rate for the optimizer.
-        discount : float, optional
-            Discount factor for future rewards.
-            --> 0: only consider immediate rewards
-            --> 1: consider all future rewards equally
         """
-        super().__init__(inputs, outputs, optimizer, lr, discount)
+        super().__init__(inputs, outputs, optimizer, lr)
 
         self.memory["logarithm"] = []
         self.memory["reward"] = []
@@ -219,16 +216,18 @@ def learn(self):
 
         return gradient.item()
 
-    def memorize(self, logarithm, reward):
+    def memorize(self, *args, **kwargs):
         """
         Append observation, action and reward to Agent memory.
 
         Parameters
         ----------
-        logarithm : torch.Tensor
-            Logarithm of the selected action probability.
-        reward : int
-            Reward from the chosen action.
+        *args : list
+            Positional arguments to memorize.
+        **kwargs : dict
+            Keyword arguments to memorize.
         """
+        logarithm, reward = args
+
         self.memory["logarithm"].append(logarithm)
         self.memory["reward"].append(reward)