From 99f419675ee24bbe69b1f4df95f289be9d06fc2e Mon Sep 17 00:00:00 2001
From: Maksymilian Wojnar <maksymilian.wojnar@outlook.com>
Date: Tue, 6 Feb 2024 21:02:48 +0100
Subject: [PATCH] Remove reference to actor and critic from DDPG

---
 reinforced_lib/agents/deep/ddpg.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/reinforced_lib/agents/deep/ddpg.py b/reinforced_lib/agents/deep/ddpg.py
index cfaee81..13bbd32 100644
--- a/reinforced_lib/agents/deep/ddpg.py
+++ b/reinforced_lib/agents/deep/ddpg.py
@@ -22,9 +22,9 @@ class DDPGState(AgentState):
     Attributes
     ----------
     q_params : dict
-        Parameters of the Q-network (critic).
+        Parameters of the Q-network.
     q_net_state : dict
-        State of the Q-network (critic).
+        State of the Q-network.
     q_params_target : dict
         Parameters of the target Q-network.
     q_net_state_target : dict
@@ -32,9 +32,9 @@ class DDPGState(AgentState):
     q_opt_state : optax.OptState
         Optimizer state of the Q-network.
     a_params : dict
-        Parameters of the policy network (actor).
+        Parameters of the policy network.
     a_net_state : dict
-        State of the policy network (actor).
+        State of the policy network.
     a_params_target : dict
         Parameters of the target policy network.
     a_net_state_target : dict
@@ -71,17 +71,17 @@ class DDPG(BaseAgent):
     Deep deterministic policy gradient [3]_ [4]_ agent with white Gaussian noise exploration and experience replay
     buffer. The agent simultaneously learns a Q-function and a policy. The Q-function is updated using the Bellman
     equation. The policy is learned using the gradient of the Q-function with respect to the policy parameters
-    to maximize the Q-value. The agent uses two Q-networks (critics) and two policy networks (actors) to stabilize
-    the learning process and avoid overestimation. The target networks are updated with a soft update. This agent
-    follows the off-policy learning paradigm and is suitable for environments with continuous action spaces.
+    to maximize the Q-value. The agent uses two Q-networks and two policy networks to stabilize the learning process
+    and avoid overestimation. The target networks are updated with a soft update. This agent follows the off-policy
+    learning paradigm and is suitable for environments with continuous action spaces.
 
     Parameters
     ----------
     q_network : nn.Module
-        Architecture of the Q-networks (critics).
+        Architecture of the Q-networks.
         The input to the network should be two tensors of observations and actions respectively.
     a_network : nn.Module
-        Architecture of the policy networks (actors).
+        Architecture of the policy networks.
     obs_space_shape : Shape
         Shape of the observation space.
     act_space_shape : Shape