diff --git a/moviebot/dialogue_manager/dialogue_manager.py b/moviebot/dialogue_manager/dialogue_manager.py
index fdc1d17..e30d059 100644
--- a/moviebot/dialogue_manager/dialogue_manager.py
+++ b/moviebot/dialogue_manager/dialogue_manager.py
@@ -11,7 +11,9 @@
 
 from moviebot.core.intents.agent_intents import AgentIntents
 from moviebot.dialogue_manager.dialogue_act import DialogueAct
-from moviebot.dialogue_manager.dialogue_policy import DialoguePolicy
+from moviebot.dialogue_manager.dialogue_policy.rb_dialogue_policy import (
+    RuleBasedDialoguePolicy,
+)
 from moviebot.dialogue_manager.dialogue_state import DialogueState
 from moviebot.dialogue_manager.dialogue_state_tracker import (
     DialogueStateTracker,
@@ -36,7 +38,9 @@ def __init__(
         self.isBot = isBot
         self.new_user = new_user
         self.dialogue_state_tracker = DialogueStateTracker(config, self.isBot)
-        self.dialogue_policy = DialoguePolicy(self.isBot, self.new_user)
+        self.dialogue_policy = RuleBasedDialoguePolicy(
+            self.isBot, self.new_user
+        )
         self.recommender: RecommenderModel = config.get("recommender")
 
     def start_dialogue(self, new_user: bool = False) -> List[DialogueAct]:
diff --git a/moviebot/dialogue_manager/dialogue_policy/__init__.py b/moviebot/dialogue_manager/dialogue_policy/__init__.py
new file mode 100644
index 0000000..e1a9070
--- /dev/null
+++ b/moviebot/dialogue_manager/dialogue_policy/__init__.py
@@ -0,0 +1,19 @@
+from moviebot.dialogue_manager.dialogue_policy.a2c_dialogue_policy import (
+    A2CDialoguePolicy,
+)
+from moviebot.dialogue_manager.dialogue_policy.dqn_dialogue_policy import (
+    DQNDialoguePolicy,
+)
+from moviebot.dialogue_manager.dialogue_policy.neural_dialogue_policy import (
+    NeuralDialoguePolicy,
+)
+from moviebot.dialogue_manager.dialogue_policy.rb_dialogue_policy import (
+    RuleBasedDialoguePolicy,
+)
+
+__all__ = [
+    "A2CDialoguePolicy",
+    "DQNDialoguePolicy",
+    "NeuralDialoguePolicy",
+    "RuleBasedDialoguePolicy",
+]
diff --git a/moviebot/dialogue_manager/dialogue_policy/a2c_dialogue_policy.py b/moviebot/dialogue_manager/dialogue_policy/a2c_dialogue_policy.py
new file mode 100644
index 0000000..6681f46
--- /dev/null
+++ b/moviebot/dialogue_manager/dialogue_policy/a2c_dialogue_policy.py
@@ -0,0 +1,206 @@
+"""Deep dialogue policy based on advantage actor-critic."""
+from __future__ import annotations
+
+from typing import Any, List, Optional, Tuple
+
+import torch
+
+from moviebot.dialogue_manager.dialogue_policy.neural_dialogue_policy import (
+    NeuralDialoguePolicy,
+)
+
+
+class A2CDialoguePolicy(NeuralDialoguePolicy):
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        output_size: int,
+        possible_actions: List[Any],
+        num_timesteps: Optional[int] = None,
+        n_envs: int = 1,
+    ) -> None:
+        """Initializes the policy.
+
+        Args:
+            input_size: The size of the input vector.
+            hidden_size: The size of the hidden layer.
+            output_size: The size of the output vector.
+            possible_actions: The list of possible actions.
+            num_timesteps: The number of timesteps. Defaults to None.
+            n_envs: The number of environments. Defaults to 1.
+        """
+        super().__init__(input_size, hidden_size, output_size, possible_actions)
+
+        self.n_envs = n_envs
+
+        self.actor = torch.nn.Sequential(
+            torch.nn.Linear(input_size, hidden_size),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size, output_size),
+        )
+
+        self.critic = torch.nn.Sequential(
+            torch.nn.Linear(input_size, hidden_size),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size, 1),
+        )
+
+        self.actor_optimizer = torch.optim.Adam(
+            self.actor.parameters(), lr=0.001
+        )
+        self.actor_lr_scheduler = None
+        self.critic_optimizer = torch.optim.Adam(
+            self.critic.parameters(), lr=0.005
+        )
+        self.critic_lr_scheduler = None
+
+        if num_timesteps is not None:
+            self.actor_lr_scheduler = torch.optim.lr_scheduler.LinearLR(
+                self.actor_optimizer, total_iters=num_timesteps
+            )
+            self.critic_lr_scheduler = torch.optim.lr_scheduler.LinearLR(
+                self.critic_optimizer, total_iters=num_timesteps
+            )
+
+    def forward(self, state: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Forward pass.
+
+        Args:
+            state: A batched vector of dialogue states.
+
+        Returns:
+            The output of the actor and the critic.
+        """
+        state_values = self.critic(state)
+        actions_log_probs = self.actor(state)
+        return state_values, actions_log_probs
+
+    def select_action(
+        self, state: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Returns the selected action and its log probability.
+
+        Args:
+            state: Representation of dialogue state as a vector.
+
+        Returns:
+            The selected action id, its log probability, the state value, and
+            the entropy.
+        """
+        state_value, actions_log_prob = self.forward(state)
+        actions_distribution = torch.distributions.Categorical(
+            logits=actions_log_prob
+        )
+        action = actions_distribution.sample()
+        actions_log_prob = actions_distribution.log_prob(action)
+        entropy = actions_distribution.entropy()
+        return action, actions_log_prob, state_value, entropy
+
+    def get_losses(
+        self,
+        rewards: torch.Tensor,
+        action_log_probs: torch.Tensor,
+        value_preds: torch.Tensor,
+        entropy: torch.Tensor,
+        mask: torch.Tensor,
+        gamma: float = 0.99,
+        lam: float = 0.95,
+        entropy_coef: float = 0.01,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Computes the loss of a minibatch using the generalized advantage
+        estimator.
+
+        Args:
+            rewards: The rewards.
+            action_log_probs: The log probabilities of the actions.
+            value_preds: The predicted values.
+            entropy: The entropy.
+            mask: The mask.
+            gamma: The discount factor. Defaults to 0.99.
+            lam: The GAE parameter (1 for Monte-Carlo sampling, 0 for normal
+              TD-learning). Defaults to 0.95.
+            entropy_coef: The entropy coefficient. Defaults to 0.01.
+
+        Returns:
+            The critic and actor losses for the minibatch.
+        """
+        T = len(rewards)
+        advantages = torch.zeros(T, self.n_envs)
+
+        # Compute advantages with GAE
+        gae = 0.0
+        for t in reversed(range(T - 1)):
+            td_error = (
+                rewards[t]
+                + gamma * mask[t] * value_preds[t + 1]
+                - value_preds[t]
+            )
+            gae = td_error + gamma * lam * mask[t] * gae
+            advantages[t] = gae
+
+        # Compute losses
+        critic_loss = advantages.pow(2).mean()
+        actor_loss = (
+            -(advantages.detach() * action_log_probs).mean()
+            - entropy_coef * entropy.mean()
+        )
+        return critic_loss, actor_loss
+
+    def update_parameters(
+        self, critic_loss: torch.Tensor, actor_loss: torch.Tensor
+    ) -> None:
+        """Updates the parameters of the policy.
+
+        Args:
+            critic_loss: The critic loss.
+            actor_loss: The actor loss.
+        """
+        self.critic_optimizer.zero_grad()
+        critic_loss.backward()
+        self.critic_optimizer.step()
+        if self.critic_lr_scheduler is not None:
+            self.critic_lr_scheduler.step()
+
+        self.actor_optimizer.zero_grad()
+        actor_loss.backward()
+        self.actor_optimizer.step()
+        if self.actor_lr_scheduler is not None:
+            self.actor_lr_scheduler.step()
+
+    def save_policy(self, path: str) -> None:
+        """Saves the policy.
+
+        Args:
+            path: The path to save the policy to.
+        """
+        state_dict = {
+            "actor": self.actor.state_dict(),
+            "critic": self.critic.state_dict(),
+            "input_size": self.input_size,
+            "hidden_size": self.hidden_size,
+            "output_size": self.output_size,
+            "possible_actions": self.possible_actions,
+        }
+        torch.save(state_dict, path)
+
+    @classmethod
+    def load_policy(cls, path: str) -> A2CDialoguePolicy:
+        """Loads the policy.
+
+        Args:
+            path: The path to load the policy from.
+
+        Returns:
+            The loaded policy.
+        """
+        state_dict = torch.load(path)
+        policy = cls(
+            state_dict["input_size"],
+            state_dict["hidden_size"],
+            state_dict["output_size"],
+            state_dict["possible_actions"],
+        )
+        policy.actor.load_state_dict(state_dict["actor"])
+        policy.critic.load_state_dict(state_dict["critic"])
+        return policy
diff --git a/moviebot/dialogue_manager/dialogue_policy/dqn_dialogue_policy.py b/moviebot/dialogue_manager/dialogue_policy/dqn_dialogue_policy.py
new file mode 100644
index 0000000..2ea204c
--- /dev/null
+++ b/moviebot/dialogue_manager/dialogue_policy/dqn_dialogue_policy.py
@@ -0,0 +1,98 @@
+"""Deep dialogue policy based on Q network."""
+
+from __future__ import annotations
+
+from typing import Any, List, Tuple
+
+import torch
+
+from moviebot.dialogue_manager.dialogue_policy.neural_dialogue_policy import (
+    NeuralDialoguePolicy,
+)
+
+
+class DQNDialoguePolicy(NeuralDialoguePolicy):
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        output_size: int,
+        possible_actions: List[Any],
+    ) -> None:
+        """Initializes the policy.
+
+        Args:
+            input_size: The size of the input vector.
+            hidden_size: The size of the hidden layer.
+            output_size: The size of the output vector.
+            possible_actions: The list of possible actions.
+        """
+        super().__init__(input_size, hidden_size, output_size, possible_actions)
+
+        self.model = torch.nn.Sequential(
+            torch.nn.Linear(input_size, hidden_size),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size, hidden_size),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size, output_size),
+        )
+
+    def forward(self, state: torch.Tensor) -> torch.Tensor:
+        """Forward pass of the policy.
+
+        Args:
+            state: State or batch of states.
+
+        Returns:
+            Next action(s) probabilities.
+        """
+        return self.model(state)
+
+    def select_action(self, state: torch.Tensor) -> Tuple[int, Any]:
+        """Selects an action based on the current state.
+
+        Args:
+            state: The current state.
+
+        Returns:
+            The id of selected action and the action.
+        """
+        with torch.no_grad():
+            action = self.model(state).max(1)[1].view(1, 1)
+
+        return action.item(), self.possible_actions[action.item()]
+
+    def save_policy(self, path: str) -> None:
+        """Saves the policy to a file.
+
+        Args:
+            path: The path to save the policy to.
+        """
+        state_dict = {
+            "input_size": self.input_size,
+            "hidden_size": self.hidden_size,
+            "output_size": self.output_size,
+            "possible_actions": self.possible_actions,
+            "model_state_dict": self.model.state_dict(),
+        }
+        torch.save(state_dict, path)
+
+    @classmethod
+    def load_policy(cls, path: str) -> DQNDialoguePolicy:
+        """Loads the policy from a file.
+
+        Args:
+            path: The path to load the policy from.
+
+        Returns:
+            The loaded policy.
+        """
+        state_dict = torch.load(path)
+        policy = cls(
+            state_dict["input_size"],
+            state_dict["hidden_size"],
+            state_dict["output_size"],
+            state_dict["possible_actions"],
+        )
+        policy.load_state_dict(state_dict["model_state_dict"])
+        return policy
diff --git a/moviebot/dialogue_manager/dialogue_policy/neural_dialogue_policy.py b/moviebot/dialogue_manager/dialogue_policy/neural_dialogue_policy.py
new file mode 100644
index 0000000..9692166
--- /dev/null
+++ b/moviebot/dialogue_manager/dialogue_policy/neural_dialogue_policy.py
@@ -0,0 +1,216 @@
+"""Neural dialogue policy built on top of PyTorch."""
+
+from __future__ import annotations
+
+from abc import abstractmethod
+from typing import Any, List
+
+import torch
+from sklearn.preprocessing import MultiLabelBinarizer
+
+from moviebot.core.intents.agent_intents import AgentIntents
+from moviebot.core.intents.user_intents import UserIntents
+from moviebot.dialogue_manager.dialogue_state import DialogueState
+
+
+class NeuralDialoguePolicy(torch.nn.Module):
+    user_label_encoder = MultiLabelBinarizer().fit(
+        [list(map(lambda x: x.value.label, UserIntents))]
+    )
+    agent_label_encoder = MultiLabelBinarizer().fit(
+        [list(map(lambda x: x.value.label, AgentIntents))]
+    )
+
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        output_size: int,
+        possible_actions: List[Any],
+    ) -> None:
+        """Initializes the policy.
+
+        Args:
+            input_size: The size of the input vector.
+            hidden_size: The size of the hidden layer.
+            output_size: The size of the output vector.
+            possible_actions: The list of possible actions.
+        """
+        super(NeuralDialoguePolicy, self).__init__()
+
+        self.possible_actions = possible_actions
+
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+
+    @abstractmethod
+    def forward(self, state: torch.Tensor) -> torch.Tensor:
+        """Forward pass of the policy.
+
+        Args:
+            state: State or batch of states.
+
+        Raises:
+            NotImplementedError: If the method is not implemented in the
+              subclass.
+        Returns:
+            Output of the policy.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def select_action(self, state: torch.Tensor) -> Any:
+        """Selects an action based on the current state.
+
+        Args:
+            state: The current state.
+
+        Raises:
+            NotImplementedError: If the method is not implemented in the
+              subclass.
+
+        Returns:
+            Selected action and optionally other information.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def save_policy(self, path: str) -> None:
+        """Saves the policy.
+
+        Args:
+            path: Path to save the policy.
+
+        Raises:
+            NotImplementedError: If the method is not implemented in the
+              subclass.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    @abstractmethod
+    def load_policy(cls, path: str) -> NeuralDialoguePolicy:
+        """Loads the policy.
+
+        Args:
+            path: Path to load the policy from.
+
+        Raises:
+            NotImplementedError: If the method is not implemented in the
+              subclass.
+
+        Returns:
+            The loaded policy.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def build_input_from_dialogue_state(
+        cls, dialogue_state: DialogueState, **kwargs
+    ) -> torch.Tensor:
+        """Builds the input vector from the dialogue state.
+
+        The markovian state representation is built from booleans in the
+        dialogue state (e.g., a recommendation was made, the agent should make
+        an offer, we are at the beginning of the conversation). It can be seen
+        as a one-hot encoding of the state.
+
+        Args:
+            dialogue_state: The dialogue state.
+
+        Returns:
+            Input vector for the policy (i.e., markovian state representation).
+        """
+        dialogue_state_tensor = torch.tensor(
+            [
+                dialogue_state.is_beginning,
+                dialogue_state.agent_req_filled,
+                dialogue_state.agent_can_lookup,
+                dialogue_state.agent_made_partial_offer,
+                dialogue_state.agent_should_make_offer,
+                dialogue_state.agent_made_offer,
+                dialogue_state.agent_offer_no_results,
+                dialogue_state.at_terminal_state,
+            ],
+            dtype=torch.float,
+        )
+        return dialogue_state_tensor
+
+    @classmethod
+    def _encode_intents(
+        cls, intents: List[Any], label_encoder: MultiLabelBinarizer
+    ) -> torch.Tensor:
+        """Encodes the intents.
+
+        Args:
+            intents: Intents to encode.
+            label_encoder: Label encoder to use.
+
+        Returns:
+            Encoded intents.
+        """
+        if len(intents) == 0:
+            intents_tensor = torch.zeros(
+                len(label_encoder.classes_), dtype=torch.float
+            )
+        else:
+            intents_tensor = torch.tensor(
+                label_encoder.transform(
+                    [list(map(lambda x: x.value.label, intents))]
+                )[0],
+                dtype=torch.float,
+            )
+        return intents_tensor
+
+    @classmethod
+    def build_input_from_dialogue_state_and_intents(
+        cls,
+        dialogue_state: DialogueState,
+        user_intents: List[UserIntents],
+        agent_intents: List[AgentIntents],
+        **kwargs,
+    ) -> torch.Tensor:
+        """Builds the input vector from the dialogue state and previous intents.
+
+        Args:
+            dialogue_state: The dialogue state.
+            user_intents: The user intents.
+            agent_intents: The agent intents.
+
+        Returns:
+            The input vector.
+        """
+        dialogue_state_tensor = cls.build_input_from_dialogue_state(
+            dialogue_state
+        )
+
+        user_intents_tensor = cls._encode_intents(
+            user_intents, cls.user_label_encoder
+        )
+        agent_intents_tensor = cls._encode_intents(
+            agent_intents, cls.agent_label_encoder
+        )
+
+        return torch.cat(
+            [dialogue_state_tensor, user_intents_tensor, agent_intents_tensor],
+            dim=0,
+        )
+
+    @classmethod
+    def build_input(
+        cls, dialogue_state: DialogueState, **kwargs
+    ) -> torch.Tensor:
+        """Builds the input vector.
+
+        Args:
+            dialogue_state: The dialogue state.
+
+        Returns:
+            The input vector.
+        """
+        if kwargs.get("b_use_intents", False):
+            return cls.build_input_from_dialogue_state_and_intents(
+                dialogue_state, **kwargs
+            )
+        return cls.build_input_from_dialogue_state(dialogue_state)
diff --git a/moviebot/dialogue_manager/dialogue_policy.py b/moviebot/dialogue_manager/dialogue_policy/rb_dialogue_policy.py
similarity index 99%
rename from moviebot/dialogue_manager/dialogue_policy.py
rename to moviebot/dialogue_manager/dialogue_policy/rb_dialogue_policy.py
index 177e39e..26cb398 100644
--- a/moviebot/dialogue_manager/dialogue_policy.py
+++ b/moviebot/dialogue_manager/dialogue_policy/rb_dialogue_policy.py
@@ -15,7 +15,7 @@
 from moviebot.nlu.annotation.slots import Slots
 
 
-class DialoguePolicy:
+class RuleBasedDialoguePolicy:
     def __init__(self, isBot: bool, new_user: bool) -> None:
         """Loads all necessary parameters for the policy.
 
diff --git a/moviebot/dialogue_manager/dialogue_state.py b/moviebot/dialogue_manager/dialogue_state.py
index 97be530..1d06a06 100644
--- a/moviebot/dialogue_manager/dialogue_state.py
+++ b/moviebot/dialogue_manager/dialogue_state.py
@@ -39,14 +39,15 @@ def __init__(
             {}
         )  # previous information needs of the user in case user want to go back
         self.prev_agent_dacts: List[DialogueAct] = []  # list of agent dacts
-        self.last_agent_dacts: DialogueAct = (
-            None  # the current agent dact (singular, must be updated carefully)
-        )
+        # the current agent dact (singular, must be updated carefully)
+        self.last_agent_dacts: DialogueAct = None
         self.last_user_dacts: List[DialogueAct] = None  # the current user act
 
         # Keep track of the recommended movies
         self.movies_recommended = {}
 
+        self.is_beginning = True
+
     def _agent_offer_state(self) -> str:
         """Returns string representation of the agent's offer state."""
         offer_state = {
@@ -142,3 +143,5 @@ def initialize(self) -> None:
             3  # number of CIN slots which remain empty before agent must make
         )
         # an offer
+
+        self.is_beginning = True
diff --git a/moviebot/dialogue_manager/dialogue_state_tracker.py b/moviebot/dialogue_manager/dialogue_state_tracker.py
index febd153..eeb5e03 100644
--- a/moviebot/dialogue_manager/dialogue_state_tracker.py
+++ b/moviebot/dialogue_manager/dialogue_state_tracker.py
@@ -272,6 +272,7 @@ def update_state_agent(self, agent_dacts: List[DialogueAct]) -> None:
             agent_dacts: List of dialogue acts which is the output of dialogue
               policy.
         """
+        self.dialogue_state.is_beginning = False
         # re-filtering the dacts
         agent_dacts_copy = deepcopy(agent_dacts)
         agent_dacts = []
diff --git a/tests/dialogue_manager/test_dialogue_manager.py b/tests/dialogue_manager/test_dialogue_manager.py
index ee27df2..3aea99f 100644
--- a/tests/dialogue_manager/test_dialogue_manager.py
+++ b/tests/dialogue_manager/test_dialogue_manager.py
@@ -6,7 +6,9 @@
 from moviebot.core.intents.agent_intents import AgentIntents
 from moviebot.dialogue_manager.dialogue_act import DialogueAct
 from moviebot.dialogue_manager.dialogue_manager import DialogueManager
-from moviebot.dialogue_manager.dialogue_policy import DialoguePolicy
+from moviebot.dialogue_manager.dialogue_policy.rb_dialogue_policy import (
+    RuleBasedDialoguePolicy,
+)
 from moviebot.dialogue_manager.dialogue_state import DialogueState
 from moviebot.dialogue_manager.dialogue_state_tracker import (
     DialogueStateTracker,
@@ -76,7 +78,7 @@ def test_generate_output(dialogue_manager: DialogueManager):
 
 
 @mock.patch.object(
-    DialoguePolicy,
+    RuleBasedDialoguePolicy,
     "next_action",
     return_value=[DialogueAct(AgentIntents.ACKNOWLEDGE)],
 )
@@ -109,7 +111,7 @@ def test_generate_output_with_lookup(
 
 
 @mock.patch.object(
-    DialoguePolicy,
+    RuleBasedDialoguePolicy,
     "next_action",
 )
 @mock.patch.object(DialogueStateTracker, "update_state_agent")
diff --git a/tests/dialogue_manager/test_dialogue_policy.py b/tests/dialogue_manager/test_dialogue_policy.py
index b812002..c5b349c 100644
--- a/tests/dialogue_manager/test_dialogue_policy.py
+++ b/tests/dialogue_manager/test_dialogue_policy.py
@@ -6,7 +6,9 @@
 from moviebot.core.intents.agent_intents import AgentIntents
 from moviebot.core.intents.user_intents import UserIntents
 from moviebot.dialogue_manager.dialogue_act import DialogueAct
-from moviebot.dialogue_manager.dialogue_policy import DialoguePolicy
+from moviebot.dialogue_manager.dialogue_policy.rb_dialogue_policy import (
+    RuleBasedDialoguePolicy,
+)
 from moviebot.dialogue_manager.dialogue_state import DialogueState
 from moviebot.nlu.annotation.item_constraint import ItemConstraint
 from moviebot.nlu.annotation.operator import Operator
@@ -42,8 +44,8 @@ def state(ontology, database_results, slots) -> DialogueState:
 
 
 @pytest.fixture
-def policy() -> DialoguePolicy:
-    yield DialoguePolicy(isBot=False, new_user=True)
+def policy() -> RuleBasedDialoguePolicy:
+    yield RuleBasedDialoguePolicy(isBot=False, new_user=True)
 
 
 @pytest.mark.parametrize(
@@ -69,7 +71,7 @@ def policy() -> DialoguePolicy:
     ],
 )
 def test_next_action_basic(
-    policy: DialoguePolicy,
+    policy: RuleBasedDialoguePolicy,
     state: DialogueState,
     last_agent_dacts,
     last_user_dacts,
@@ -82,7 +84,9 @@ def test_next_action_basic(
     assert agent_dacts[0].intent == expected
 
 
-def test_next_action_restart(policy: DialoguePolicy, state: DialogueState):
+def test_next_action_restart(
+    policy: RuleBasedDialoguePolicy, state: DialogueState
+):
     agent_dacts = policy.next_action(state, restart=True)
     assert len(agent_dacts) == 2
     assert agent_dacts[0].intent == AgentIntents.RESTART
@@ -90,7 +94,7 @@ def test_next_action_restart(policy: DialoguePolicy, state: DialogueState):
 
 
 def test_next_action_made_partial_offer(
-    policy: DialoguePolicy, state: DialogueState
+    policy: RuleBasedDialoguePolicy, state: DialogueState
 ):
     state.agent_made_partial_offer = True
 
@@ -105,7 +109,7 @@ def test_next_action_made_partial_offer(
 
 
 def test_next_action_made_partial_offer_all_slots_filled(
-    policy: DialoguePolicy, state: DialogueState
+    policy: RuleBasedDialoguePolicy, state: DialogueState
 ):
     state.agent_made_partial_offer = True
     state.slot_left_unasked = 10
@@ -119,7 +123,7 @@ def test_next_action_made_partial_offer_all_slots_filled(
 
 
 def test_next_action_should_make_offer(
-    policy: DialoguePolicy, state: DialogueState, database_results
+    policy: RuleBasedDialoguePolicy, state: DialogueState, database_results
 ):
     state.agent_should_make_offer = True
     state.item_in_focus = database_results[1]
@@ -134,7 +138,7 @@ def test_next_action_should_make_offer(
 
 
 def test_next_action_inquire_empty(
-    policy: DialoguePolicy, state: DialogueState, database_results
+    policy: RuleBasedDialoguePolicy, state: DialogueState, database_results
 ):
     state.agent_made_offer = True
     state.item_in_focus = database_results[2]
@@ -150,7 +154,7 @@ def test_next_action_inquire_empty(
 
 
 def test_next_action_inquire(
-    policy: DialoguePolicy, state: DialogueState, database_results
+    policy: RuleBasedDialoguePolicy, state: DialogueState, database_results
 ):
     state.agent_made_offer = True
     state.item_in_focus = database_results[2]
@@ -170,7 +174,7 @@ def test_next_action_inquire(
 
 
 def test_next_action_accept_recommendation(
-    policy: DialoguePolicy, state: DialogueState, database_results
+    policy: RuleBasedDialoguePolicy, state: DialogueState, database_results
 ):
     state.agent_made_offer = True
     state.item_in_focus = database_results[1]
@@ -206,9 +210,12 @@ def test_next_action_accept_recommendation(
     ],
 )
 @mock.patch(
-    "moviebot.dialogue_manager.dialogue_policy.set", mock.MagicMock(wraps=list)
+    "moviebot.dialogue_manager.dialogue_policy.rb_dialogue_policy.set",
+    mock.MagicMock(wraps=list),
 )
-def test__generate_examples(policy: DialoguePolicy, results, slot, expected):
+def test__generate_examples(
+    policy: RuleBasedDialoguePolicy, results, slot, expected
+):
     random.seed(42)
     examples = policy._generate_examples(results, slot)
     assert examples == expected