Skip to content

Commit

Permalink
Fix github actions errors.
Browse files Browse the repository at this point in the history
  • Loading branch information
hallvardnmbu committed Feb 5, 2024
1 parent 897147e commit e4a25ed
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint flake8
pip install pylint flake8 numpy torch
# Run Pylint and Flake8 simultaneously using background tasks and redirect output to files.
- name: Run Pylint and Flake8
Expand Down
45 changes: 22 additions & 23 deletions reinforcement-learning/agents.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
"""Reinforcement learning Agents."""

from abc import ABC, abstractmethod
import numpy as np
import torch


class Agent(ABC, torch.nn.Module):
"""Base Agent for reinforcement learning."""
def __init__(self,
inputs=4,
outputs=2,
optimizer=torch.optim.RMSprop,
lr=0.00025,
discount=0.99
):
"""
Base Agent for reinforcement learning.
Expand All @@ -24,12 +26,8 @@ def __init__(self,
Optimizer for the Agent to learn.
lr : float, optional
Learning rate for the optimizer.
discount : float, optional
Discount factor for future rewards.
--> 0: only consider immediate rewards
--> 1: consider all future rewards equally
"""
super(Agent, self).__init__()
super().__init__()

# ARCHITECTURE
# --------------------------------------------------
Expand All @@ -40,8 +38,12 @@ def __init__(self,

# LEARNING
# --------------------------------------------------
# discount : float
# Discount factor for future rewards.
# --> 0: only consider immediate rewards
# --> 1: consider all future rewards equally

self.discount = discount
self.discount = 0.99
self.optimizer = optimizer(self.parameters(), lr=lr)

self.memory = {}
Expand Down Expand Up @@ -80,7 +82,6 @@ def action(self, state):
action : int
Selected action.
"""
pass

@abstractmethod
def learn(self):
Expand All @@ -92,28 +93,28 @@ def learn(self):
float
Either the gradient, loss, Q-value, etc.
"""
pass

@abstractmethod
def memorize(self, *args):
def memorize(self, *args, **kwargs):
"""
Abstract method for memorizing.
Parameters
----------
*args : list
Observation, action, reward, etc.
Positional arguments to memorize.
**kwargs : dict
Keyword arguments to memorize.
"""
pass


class PolicyGradientAgent(Agent):
"""Policy-based Agent for reinforcement learning."""
def __init__(self,
inputs=4,
outputs=2,
optimizer=torch.optim.RMSprop,
lr=0.00025,
discount=0.99
):
"""
Policy-based Agent for reinforcement learning.
Expand All @@ -128,12 +129,8 @@ def __init__(self,
Optimizer for the Agent to learn.
lr : float, optional
Learning rate for the optimizer.
discount : float, optional
Discount factor for future rewards.
--> 0: only consider immediate rewards
--> 1: consider all future rewards equally
"""
super().__init__(inputs, outputs, optimizer, lr, discount)
super().__init__(inputs, outputs, optimizer, lr)

self.memory["logarithm"] = []
self.memory["reward"] = []
Expand Down Expand Up @@ -219,16 +216,18 @@ def learn(self):

return gradient.item()

def memorize(self, logarithm, reward):
def memorize(self, *args, **kwargs):
"""
Append observation, action and reward to Agent memory.
Parameters
----------
logarithm : torch.Tensor
Logarithm of the selected action probability.
reward : int
Reward from the chosen action.
*args : list
Positional arguments to memorize.
**kwargs : dict
Keyword arguments to memorize.
"""
logarithm, reward = args

self.memory["logarithm"].append(logarithm)
self.memory["reward"].append(reward)

0 comments on commit e4a25ed

Please sign in to comment.