diff --git a/data/policies/design_C.1/model_1.0/acrobot/evolsac/model.zip b/data/policies/design_C.1/model_1.0/acrobot/evolsac/model.zip
new file mode 100644
index 00000000..3773a162
Binary files /dev/null and b/data/policies/design_C.1/model_1.0/acrobot/evolsac/model.zip differ
diff --git a/data/policies/design_C.1/model_1.0/pendubot/evolsac/model.zip b/data/policies/design_C.1/model_1.0/pendubot/evolsac/model.zip
new file mode 100644
index 00000000..72526c82
Binary files /dev/null and b/data/policies/design_C.1/model_1.0/pendubot/evolsac/model.zip differ
diff --git a/docker/Dockerfile_evolsac b/docker/Dockerfile_evolsac
new file mode 100644
index 00000000..2db3aa05
--- /dev/null
+++ b/docker/Dockerfile_evolsac
@@ -0,0 +1,55 @@
+From ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install wget -y && \
+    apt-get install unzip -y && \
+    apt-get install git -y && \
+    apt-get install vim -y && \
+    apt-get install python3-pip -y && \
+    apt-get install libyaml-cpp-dev -y && \
+    #apt install libeigen3-dev -y && \
+    apt-get install libpython3.10 -y && \
+    apt-get install libx11-6 -y && \
+    apt-get install libsm6 -y && \
+    apt-get install libxt6 -y && \
+    apt-get install libglib2.0-0 -y && \
+    apt-get install python3-sphinx -y && \
+    apt-get install python3-numpydoc -y && \
+    apt-get install python3-sphinx-rtd-theme -y && \
+    apt-get install python-is-python3
+
+# libeigen3-dev install does not work with apt
+RUN wget -O Eigen.zip https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
+RUN unzip Eigen.zip
+RUN cp -r eigen-3.4.0/Eigen /usr/local/include
+
+#RUN python -m ensurepip --upgrade
+RUN pip install -U pip
+RUN pip3 install numpy dill
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install drake
+
+RUN python -m pip install torch
+RUN python -m pip install stable-baselines3==2.3.2
+RUN python -m pip install evotorch
+RUN python -m pip install gymnasium
+RUN python -m pip install ffmpeg-python
+
+# Copy everything
+COPY . ./double_pendulum/
+
+WORKDIR "/double_pendulum"
+
+# RUN git checkout v0.1.0
+
+RUN make install
+RUN make pythonfull
+
+RUN apt-get -y update
+RUN apt-get -y upgrade
+RUN apt-get install -y ffmpeg
+
+
+RUN python -m pip install stable-baselines3==2.3.2
\ No newline at end of file
diff --git a/examples/reinforcement_learning/evolsac/README.md b/examples/reinforcement_learning/evolsac/README.md
new file mode 100644
index 00000000..953a73cd
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/README.md
@@ -0,0 +1,7 @@
+# EvolSAC training
+To train the Evolutionary SAC Agent for both the pendubot and the acrobot, first ensure that the variable robot is consistently set to either acrobot or pendubot in all 3 `main.py` files contained inside the folders `SAC_main_training` and `SNES_finetuning`. 
+
+The scripts below must be ran directly from the folders that contain them to ensure path integrity.
+
+1. Run `python main.py 3.0 0 0 0` from `SAC_main_training`, which trains the agent according to the surrogate reward function defined in the same file
+2. Run `python main.py 3.0 0 0 0 [acrobot/pendubot]` from `SNES_finetuning`, which loads the agent found in step 3, and further trains it based on the performance score defined by the competition's organizers
\ No newline at end of file
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/environment.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/environment.py
new file mode 100644
index 00000000..fcf2bc5d
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/environment.py
@@ -0,0 +1,109 @@
+import gymnasium as gym
+import numpy as np
+
+class CustomCustomEnv(gym.Env):
+    def __init__(
+        self,
+        dynamics_func,
+        reset_func,
+        obs_space=gym.spaces.Box(
+            np.array([-1.0, -1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0, 1.0])
+        ),
+        act_space=gym.spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0])),
+        max_episode_steps=1000,
+        scaling=True,
+        terminates=True,
+    ):
+        self.dynamics_func = dynamics_func
+        self.reset_func = reset_func
+        self.observation_space = obs_space
+        self.action_space = act_space
+        self.max_episode_steps = max_episode_steps
+
+        self.previous_action = 0
+        self.terminates = terminates
+
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        self.stabilisation_mode = False
+        self.y = [0,0] 
+        self.update_y()
+        self.scaling = scaling
+
+        l1 = self.dynamics_func.simulator.plant.l[0]
+        l2 = self.dynamics_func.simulator.plant.l[1]
+        self.max_height = l1 + l2
+
+        if self.dynamics_func.robot == "acrobot":
+            self.control_line = 0.75 * self.max_height
+        elif self.dynamics_func.robot == "pendubot":
+            self.control_line = 0.7 * self.max_height
+
+        self.old_obs = None
+
+    def step(self, action):
+        self.old_obs = np.copy(self.observation)
+        self.observation = self.dynamics_func(
+            self.observation, action, scaling=self.scaling
+        )
+
+        self.update_y()
+        self.stabilisation_mode = self.y[1] >= self.control_line
+        terminated = self.terminated_func()
+        reward = self.reward_func(terminated, action)
+        info = {}
+        truncated = False
+        self.step_counter += 1
+        if self.step_counter >= self.max_episode_steps:
+            truncated = True
+        self.previous_action = action[0]
+        return self.observation, reward, terminated, truncated, info
+
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        info = {}
+        self.previous_action = 0
+        self.stabilisation_mode = False
+        self.old_obs = np.copy(self.observation)
+        return self.observation, info
+
+    def render(self, mode="human"):
+        pass
+
+    def reward_func(self, terminated, action):
+        raise NotImplementedError("You have to define the reward function")
+
+    def terminated_func(self):
+        if self.terminates:
+            # Checks if we're in stabilisation mode and the ee has fallen below the control line
+            if self.stabilisation_mode and self.y[1] < self.control_line:
+                return True
+        return False
+
+    # Update the y coordinate of the first joint and the end effector
+    def update_y(self):
+        theta1, theta2, _, _ = self.dynamics_func.unscale_state(self.observation)
+
+        link_end_points = self.dynamics_func.simulator.plant.forward_kinematics(
+            [theta1, theta2]
+        )
+        self.y[0] = link_end_points[0][1]
+        self.y[1] = link_end_points[1][1]
+
+    def gravitational_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        V = self.dynamics_func.simulator.plant.potential_energy(x)
+        return V
+
+    def V(self):
+        return self.gravitational_reward()
+
+    def kinetic_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        T = self.dynamics_func.simulator.plant.kinetic_energy(x)
+        return T
+
+    def T(self):
+        return self.kinetic_reward()
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/magic.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/magic.py
new file mode 100644
index 00000000..5efe9be8
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/magic.py
@@ -0,0 +1,255 @@
+import os
+import shutil
+
+import numpy as np
+import pandas
+from double_pendulum.analysis.leaderboard import leaderboard_scores
+from double_pendulum.controller.evolsac.evolsac_controller import EvolSACController
+from double_pendulum.model.model_parameters import model_parameters
+from sim_controller import simulate_controller
+from stable_baselines3.common.callbacks import BaseCallback
+
+
+def load_controller(dynamics_func, model, window_size, include_time):
+    name = "evolsac"
+    leaderboard_config = {
+        "csv_path": name + "/sim_swingup.csv",
+        "name": name,
+        "simple_name": name,
+        "short_description": "SAC finetuning for both swingup and stabilisation",
+        "readme_path": f"readmes/{name}.md",
+        "username": "MarcoCali0",
+    }
+    controller = EvolSACController(
+        model=model,
+        dynamics_func=dynamics_func,
+        window_size=window_size,
+        include_time=include_time,
+    )
+    controller.init()
+    return controller, leaderboard_config
+
+
+def magic_score(
+    dynamics_func,
+    model,
+    folder,
+    folder_id,
+    window_size,
+    max_torque,
+    include_time,
+    index=None,
+):
+    design = "design_C.1"
+    integrator = "runge_kutta"
+    dt = 0.002
+    t0 = 0.0
+    t_final = 10.0
+    x0 = [0.0, 0.0, 0.0, 0.0]
+    goal = [np.pi, 0.0, 0.0, 0.0]
+
+    if folder == "acrobot":
+        model_par_path = (
+            "../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + "model_1.1"
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        mpar.set_torque_limit([0.0, max_torque])
+
+    else: # robot = "pendubot"
+        model_par_path = (
+            "../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + "model_1.1"
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        mpar.set_torque_limit([max_torque, 0.0])
+
+    controller, leaderboard_config = load_controller(
+        dynamics_func, model, window_size, include_time
+    )
+
+    data_dir = f"data_{folder}/{folder_id}"
+
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir)
+
+    controller_name = f"evolsac_{folder}"
+    save_dir = (
+        f"{data_dir}/{controller_name}"
+        if index is None
+        else f"{data_dir}/{controller_name}/{index}"
+    )
+    if index is not None:
+        leaderboard_config["csv_path"] = f"{controller_name}/{index}/sim_swingup.csv"
+    save_to = os.path.join(save_dir, "leaderboard_entry.csv")
+
+    simulate_controller(
+        controller, save_dir, mpar, dt, t_final, t0, x0, goal, integrator
+    )
+
+    conf = leaderboard_config
+
+    conf["csv_path"] = os.path.join(data_dir, leaderboard_config["csv_path"])
+    data_paths = {}
+    data_paths[leaderboard_config["name"]] = conf
+
+    leaderboard_scores(
+        data_paths=data_paths,
+        save_to=save_to,
+        mpar=mpar,
+        # weights={"swingup_time": 0.5, "max_tau": 0.1, "energy": 0.0, "integ_tau": 0.4, "tau_cost": 0.0, "tau_smoothness": 0.0},
+        weights={
+            "swingup_time": 1.0,
+            "max_tau": 0.0,
+            "energy": 1.0,
+            "integ_tau": 0.0,
+            "tau_cost": 1.0,
+            "tau_smoothness": 1.0,
+            "velocity_cost": 1.0,
+        },
+        normalize={
+            "swingup_time": 20.0,
+            "max_tau": 1.0,  # not used
+            "energy": 60.0,
+            "integ_tau": 1.0,  # not used
+            "tau_cost": 20.0,
+            "tau_smoothness": 0.1,
+            "velocity_cost": 400,
+        },
+        link_base="",
+        score_version="v2",
+    )
+    df = pandas.read_csv(save_to)
+
+    # print(df)
+    # print(np.array(df["RealAI Score"])[0])
+    # print(df.sort_values(by=["RealAI Score"], ascending=False).to_markdown(index=False))
+
+    score = np.array(df["RealAI Score"])[0]
+    print("RealAI Score = ", score)
+    return score
+
+
+def copy_files(src_folder, dest_folder):
+    if not os.path.exists(dest_folder):
+        os.makedirs(dest_folder)
+
+    for filename in os.listdir(src_folder):
+        src_file = os.path.join(src_folder, filename)
+        dest_file = os.path.join(dest_folder, filename)
+
+        if os.path.isfile(src_file):
+            shutil.copy2(src_file, dest_file)
+
+
+class MagicCallback(BaseCallback):
+    def __init__(
+        self,
+        path,
+        folder_id,
+        dynamics_func,
+        robot,
+        window_size,
+        max_torque,
+        include_time,
+    ):
+        super().__init__(False)
+        self.path = f"{path}{folder_id}"
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+        self.best = -np.inf
+        self.folder_id = folder_id
+        self.dynamics_func = dynamics_func
+        self.robot = robot
+        self.window_size = window_size
+        self.max_torque = max_torque
+        self.include_time = include_time
+
+    def _on_step(self) -> bool:
+        score = magic_score(
+            self.dynamics_func,
+            self.model,
+            self.robot,
+            self.folder_id,
+            self.window_size,
+            self.max_torque,
+            self.include_time,
+        )
+        if score >= self.best:
+            self.best = score
+            self.model.save(f"{self.path}/best_model")
+            copy_files(f"./data/{self.folder_id}/sac/", self.path)
+        return True
+
+import concurrent.futures
+
+
+class BruteMagicCallback(BaseCallback):
+    def __init__(
+        self,
+        path,
+        folder_id,
+        dynamics_func,
+        robot,
+        window_size,
+        max_torque,
+        include_time,
+    ):
+        super().__init__(False)
+        self.path = f"{path}{folder_id}"
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+        self.folder_id = folder_id
+        self.dynamics_func = dynamics_func
+        self.robot = robot
+        self.window_size = window_size
+        self.max_torque = max_torque
+        self.include_time = include_time
+        self.iteration = 0
+        self.executor = concurrent.futures.ThreadPoolExecutor()
+
+    def _on_step(self) -> bool:
+        self.iteration += 1
+        self.executor.submit(lambda: async_store(self.iteration, self))
+        return True
+
+
+import tempfile
+
+from stable_baselines3 import SAC
+
+
+def deepcopy_model(model):
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        model_path = os.path.join(tmpdirname, "temp_model")
+        model.save(model_path)
+        copied_model = SAC.load(model_path)
+    return copied_model
+
+
+
+def async_store(iteration, callback: BruteMagicCallback):
+    model = deepcopy_model(callback.model)
+    _ = magic_score(
+        callback.dynamics_func,
+        model,
+        callback.robot,
+        callback.folder_id,
+        callback.window_size,
+        callback.max_torque,
+        callback.include_time,
+        index=iteration,
+    )
+    if not os.path.exists(f"{callback.path}/{iteration}"):
+        os.makedirs(f"{callback.path}/{iteration}", exist_ok=True)
+    model.save(f"{callback.path}/{iteration}/best_model")
+    copy_files(
+        f"./data/{callback.folder_id}/evolsac/{iteration}",
+        f"{os.path.join(callback.path, str(iteration))}",
+    )
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/main.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/main.py
new file mode 100644
index 00000000..a640c52f
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/main.py
@@ -0,0 +1,215 @@
+import os
+import sys
+
+import gymnasium as gym
+import numpy as np
+import stable_baselines3
+import torch
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+from environment import CustomCustomEnv
+from magic import BruteMagicCallback, MagicCallback
+from simulator import CustomSimulator
+from stable_baselines3 import SAC
+from stable_baselines3.common.callbacks import EvalCallback
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.sac.policies import MlpPolicy
+from wrappers import *
+
+np.random.seed(0)
+torch.manual_seed(0)
+torch.random.manual_seed(0)
+torch.backends.cudnn.deterministic = True
+stable_baselines3.common.utils.set_random_seed(0)
+
+class MyEnv(CustomCustomEnv):
+    def reward_func(self, terminated, action):
+        _, theta2, omega1, omega2 = self.dynamics_func.unscale_state(self.observation)
+        costheta2 = np.cos(theta2)
+
+        a = action[0]
+        delta_action = np.abs(a - self.previous_action)
+        lambda_delta = 0.05
+        lambda_action = 0.02
+        lambda_velocities = 0.01
+        if not terminated:
+            if self.stabilisation_mode:
+                reward = (
+                    self.V()
+                    + 2 * (1 + costheta2) ** 2                     * 2
+                    - self.T()
+                    - 5 * lambda_action * np.square(a)             * 2
+                    - 3 * lambda_delta * delta_action              * 2
+                )
+            else:
+                reward = (
+                    (1 - np.abs(a)) * self.V()
+                    - lambda_action * np.square(a)                         * 2
+                    - 2 * lambda_velocities * (omega1**2 + omega2**2)      * 2
+                    - 3 * lambda_delta * delta_action                      * 2
+                )
+        else:
+            reward = -1.0
+        return reward
+
+
+assert (
+    len(sys.argv) >= 3
+), "Please provide: [max torque] [robustness] [window_size (0 = no window)] [include_time]"
+max_torque = float(sys.argv[1])
+robustness = float(sys.argv[2])
+WINDOW_SIZE = int(sys.argv[3])
+INCLUDE_TIME = bool(int(sys.argv[4]))
+robot = str(sys.argv[5])
+FOLDER_ID = f"{os.path.basename(__file__)}-{max_torque}-{robustness}-{WINDOW_SIZE}-{int(INCLUDE_TIME)}"
+TERMINATION = False
+
+# define robot variation
+# robot = "acrobot"
+
+# setting log path for the training
+log_dir = f"./log_data_{robot}/SAC_training/{FOLDER_ID}"
+if not os.path.exists(log_dir):
+    os.makedirs(log_dir)
+
+design = "design_C.1"
+model = "model_1.1"
+model_par_path = (
+    "../../../../data/system_identification/identified_parameters/"
+    + design
+    + "/"
+    + model
+    + "/model_parameters.yml"
+)
+
+# model and reward parameter
+max_velocity = 50
+torque_limit = [max_torque, 0] if robot == "pendubot" else [0, max_torque]
+
+mpar = model_parameters(filepath=model_par_path)
+mpar.set_torque_limit(torque_limit)
+dt = 0.01
+integrator = "runge_kutta"
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+simulator = CustomSimulator(plant=plant, robustness=robustness, max_torque=max_torque, robot=robot)
+eval_simulator = Simulator(plant=plant)
+
+# learning environment parameters
+state_representation = 2
+
+obs_space = gym.spaces.Box(np.array([-1.0] * 4), np.array([1.0] * 4))
+act_space = gym.spaces.Box(np.array([-1]), np.array([1]))
+max_steps = 10 / dt
+
+###############################################################################
+
+n_envs = 50
+training_steps = 10_000_000
+verbose = 1
+eval_freq = 5000
+n_eval_episodes = 1
+# a patto che i reward istantanei siano piccoli
+# 0.01 -> 1500000 -> 7
+# 0.003 -> 1500000 -> 46
+# 0.001 -> 1500000 -> 38
+# 0.0003 -> 1500000 -> 19
+learning_rate = 0.00008
+
+###############################################################################
+
+# initialize double pendulum dynamics
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+eval_dynamics_func = double_pendulum_dynamics_func(
+    simulator=eval_simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+
+def zero_reset_func():
+    observation = [-1.0, -1.0, 0.0, 0.0]
+    return observation
+
+
+###############################################################################
+
+
+def wrap(env):
+    if INCLUDE_TIME:
+        env = TimeAwareWrapper(env, dt=dt)
+    if WINDOW_SIZE > 0:
+        env = StateActionHistoryWrapper(env, history_length=WINDOW_SIZE)
+    return env
+
+
+envs = make_vec_env(
+    env_id=MyEnv,
+    wrapper_class=wrap,
+    n_envs=n_envs,
+    env_kwargs={
+        "dynamics_func": dynamics_func,
+        "reset_func": zero_reset_func,
+        "terminates": TERMINATION,
+        "obs_space": obs_space,
+        "act_space": act_space,
+        "max_episode_steps": max_steps,
+    },
+)
+
+eval_env = wrap(
+    MyEnv(
+        dynamics_func=eval_dynamics_func,
+        reset_func=zero_reset_func,
+        obs_space=obs_space,
+        act_space=act_space,
+        max_episode_steps=max_steps,
+        terminates=TERMINATION,
+    )
+)
+
+eval_callback = EvalCallback(
+    eval_env,
+    callback_after_eval=BruteMagicCallback(
+        f"./models_{robot}/",
+        folder_id=FOLDER_ID,
+        dynamics_func=eval_dynamics_func,
+        robot=robot,
+        window_size=WINDOW_SIZE,
+        max_torque=max_torque,
+        include_time=INCLUDE_TIME,
+    ),
+    best_model_save_path=os.path.join(log_dir, "best_model"),
+    log_path=log_dir,
+    eval_freq=eval_freq,
+    verbose=verbose,
+    n_eval_episodes=n_eval_episodes,
+)
+
+###############################################################################
+
+agent = SAC(
+    MlpPolicy,
+    envs,
+    verbose=verbose,
+    learning_rate=learning_rate,
+)
+
+loaded_model = "/home/alberto_sinigaglia/test_dp_clone/leaderboard/pendubot/pendubot_best_model_0567_0800.zip" if robot == "pendubot" else "/home/alberto_sinigaglia/test_dp_clone/leaderboard/acrobot/acrobot_best_model_0504_0700.zip"
+agent.set_parameters(loaded_model)
+agent.learn(total_timesteps=training_steps, callback=eval_callback)
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/score_check.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/score_check.py
new file mode 100644
index 00000000..d5e4632e
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/score_check.py
@@ -0,0 +1,50 @@
+import os
+import csv
+
+def extract_score_from_file(file_path):
+    """Extract the last value (RealAI Score) from the scores.csv file."""
+    with open(file_path, mode='r') as file:
+        csv_reader = csv.reader(file)
+        next(csv_reader)  # Skip the header
+        row = next(csv_reader)  # Read the actual data
+        return float(row[-1])  # Return the last value as the score
+
+def find_highest_score(root_directory):
+    """Find the highest score and the corresponding directory."""
+    highest_score = float('-inf')
+    best_directory = None
+    
+    # Iterate through directories named '1' to some number
+    for dir_name in os.listdir(root_directory):
+        dir_path = os.path.join(root_directory, dir_name)
+        
+        # Check if it's a directory
+        if os.path.isdir(dir_path):
+            file_path = os.path.join(dir_path, 'scores.csv')
+            
+            # Check if the scores.csv file exists in the directory
+            if os.path.exists(file_path):
+                try:
+                    score = extract_score_from_file(file_path)
+                    
+                    # Check if this score is the highest
+                    if score > highest_score:
+                        highest_score = score
+                        best_directory = dir_name
+                except Exception as e:
+                    print(f"Error reading {file_path}: {e}")
+    
+    return highest_score, best_directory
+
+# Specify the root directory where the numbered folders are located
+
+robot = "acrobot"
+root_directory = f'data_{robot}/main.py-3.0-0.0-0-0/evolsac_{robot}'
+
+# Find the highest score and the directory containing it
+highest_score, best_directory = find_highest_score(root_directory)
+
+if best_directory is not None:
+    print(f"The highest score for {robot} is {highest_score}, found in directory: {best_directory}")
+else:
+    print("No valid scores found.")
\ No newline at end of file
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/sim_controller.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/sim_controller.py
new file mode 100644
index 00000000..3d40d567
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/sim_controller.py
@@ -0,0 +1,54 @@
+import os
+import argparse
+import importlib
+import numpy as np
+import matplotlib.pyplot as plt
+
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.simulation.simulation import Simulator
+from double_pendulum.utils.csv_trajectory import save_trajectory
+from double_pendulum.utils.plotting import plot_timeseries
+
+
+
+
+def simulate_controller(controller, save_dir, mpar, dt, t_final, t0, x0, goal, integrator, controller_name=""):
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    plant = SymbolicDoublePendulum(model_pars=mpar)
+    sim = Simulator(plant=plant)
+
+    T, X, U = sim.simulate_and_animate(
+        t0=t0,
+        x0=x0,
+        tf=t_final,
+        dt=dt,
+        controller=controller,
+        integrator=integrator,
+        save_video=True,
+        video_name=os.path.join(save_dir, "sim_video.gif"),
+        plot_horizontal_line=True,
+        horizontal_line_height=0.9 * (mpar.l[0] + mpar.l[1]),
+        scale=0.25,
+    )
+
+    save_trajectory(os.path.join(save_dir, "sim_swingup.csv"), T=T, X_meas=X, U_con=U)
+
+    plot_timeseries(
+        T,
+        X,
+        U,
+        X_meas=sim.meas_x_values,
+        pos_y_lines=[-np.pi, 0.0, np.pi],
+        vel_y_lines=[0.0],
+        tau_y_lines=[-mpar.tl[1], 0.0, mpar.tl[1]],
+        save_to=os.path.join(save_dir, "timeseries"),
+        show=False,
+        scale=0.5,
+    )
+
+    if os.path.exists(f"readmes/{controller_name}.md"):
+        os.system(f"cp readmes/{controller_name}.md {save_dir}/README.md")
+
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/simulator.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/simulator.py
new file mode 100644
index 00000000..416bffaa
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/simulator.py
@@ -0,0 +1,130 @@
+from copy import deepcopy
+
+import numpy as np
+
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.simulation.simulation import Simulator
+
+
+def get_par_list(x0, min_rel, max_rel, n):
+    if x0 != 0:
+        if n % 2 == 0:
+            n = n + 1
+        li = np.linspace(min_rel, max_rel, n)
+    else:
+        li = np.linspace(0, max_rel, n)
+    par_list = li * x0
+    return par_list
+
+
+class CustomSimulator(Simulator):
+    def __init__(self, plant, robustness,max_torque, robot):
+        self.base_plant = deepcopy(plant)
+        design = "design_C.1"
+        model = "model_1.1"
+        # robot = "acrobot"
+    
+        model_par_path = (
+            "../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + model
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        # SHOULD USE MAX_TORQUE FROM TRAIN_SAC
+        torque_limit = [0.0, max_torque] if robot == "acrobot" else [max_torque,0.0]
+        mpar.set_torque_limit(torque_limit)
+
+        self.mpar = mpar
+        self.robustness = robustness
+        self.max_torque = max_torque
+        super().__init__(plant=plant)
+
+    def reset(self):
+        """
+        Reset the Simulator
+        Resets
+            - the internal data recorder
+            - the filter + arguments
+            - the process noise
+            - the measurement parameters
+            - the motor parameters
+            - perturbations
+        """
+        super().reset()
+
+        self.plant = deepcopy(self.base_plant)
+
+        if np.random.rand() < self.robustness:
+            N_var = 21
+            mpar_vars = [
+                "Ir",
+                "m1r1",
+                "I1",
+                "b1",
+                "cf1",
+                "m2r2",
+                "m2",
+                "I2",
+                "b2",
+                "cf2",
+            ]
+
+            Ir_var_list = np.linspace(0.0, 1e-4, N_var)
+            m1r1_var_list = get_par_list(
+                self.mpar.m[0] * self.mpar.r[0], 0.75, 1.25, N_var
+            )
+            I1_var_list = get_par_list(self.mpar.I[0], 0.75, 1.25, N_var)
+            b1_var_list = np.linspace(-0.1, 0.1, N_var)
+            cf1_var_list = np.linspace(-0.2, 0.2, N_var)
+            m2r2_var_list = get_par_list(
+                self.mpar.m[1] * self.mpar.r[1], 0.75, 1.25, N_var
+            )
+            m2_var_list = get_par_list(self.mpar.m[1], 0.75, 1.25, N_var)
+            I2_var_list = get_par_list(self.mpar.I[1], 0.75, 1.25, N_var)
+            b2_var_list = np.linspace(-0.1, 0.1, N_var)
+            cf2_var_list = np.linspace(-0.2, 0.2, N_var)
+
+            modelpar_var_lists = {
+                "Ir": Ir_var_list,
+                "m1r1": m1r1_var_list,
+                "I1": I1_var_list,
+                "b1": b1_var_list,
+                "cf1": cf1_var_list,
+                "m2r2": m2r2_var_list,
+                "m2": m2_var_list,
+                "I2": I2_var_list,
+                "b2": b2_var_list,
+                "cf2": cf2_var_list,
+            }
+
+            mp = np.random.choice(mpar_vars)
+            var = np.random.choice(modelpar_var_lists[mp])
+
+            if mp == "Ir":
+                self.plant.Ir = var
+            elif mp == "m1r1":
+                m1 = self.mpar.m[0]
+                r1 = var / m1
+                self.plant.m[0] = m1
+                self.plant.com[0] = r1
+            elif mp == "I1":
+                self.plant.I[0] = var
+            elif mp == "b1":
+                self.plant.b[0] = var
+            elif mp == "cf1":
+                self.plant.coulomb_fric[0] = var
+            elif mp == "m2r2":
+                m2 = self.mpar.m[1]
+                r2 = var / m2
+                self.plant.m[1] = m2
+                self.plant.com[1] = r2
+            elif mp == "m2":
+                self.plant.m[1] = var
+            elif mp == "I2":
+                self.plant.I[1] = var
+            elif mp == "b2":
+                self.plant.b[1] = var
+            elif mp == "cf2":
+                self.plant.coulomb_fric[1] = var
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/test_sac_swing_up.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/test_sac_swing_up.py
new file mode 100644
index 00000000..dc78550b
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/test_sac_swing_up.py
@@ -0,0 +1,92 @@
+import os
+
+import numpy as np
+from controller import SACController
+from stable_baselines3 import SAC
+
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+from double_pendulum.utils.plotting import plot_timeseries
+
+# hyperparameters
+robot = "acrobot"
+
+design = "design_C.1"
+model = "model_1.0"
+max_torque = 3.5
+scaling_state = True
+
+if robot == "pendubot":
+    torque_limit = [max_torque, 0.0]
+    active_act = 0
+
+elif robot == "acrobot":
+    torque_limit = [0.0, max_torque]
+    # model_path = "models/max_torque_1.5_robustness_0.2"
+    model_path = "acrobot_score_0500"
+
+# import model parameter
+model_par_path = (
+    "../../../data/system_identification/identified_parameters/"
+    + design
+    + "/"
+    + model
+    + "/model_parameters.yml"
+)
+mpar = model_parameters(filepath=model_par_path)
+
+mpar.set_torque_limit(torque_limit)
+
+# simulation parameters
+dt = 0.01
+t_final = 10.0
+integrator = "runge_kutta"
+goal = [np.pi, 0.0, 0.0, 0.0]
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+
+sim = Simulator(plant=plant)
+
+# initialize double pendulum dynamics
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=sim,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=2,
+    scaling=scaling_state,
+    max_velocity=50,
+    torque_limit=torque_limit,
+)
+
+model = SAC.load(model_path)
+
+# initialize sac controller
+controller = SACController(model, dynamics_func, include_time=False)
+controller.init()
+
+# start simulation
+T, X, U = sim.simulate_and_animate(
+    t0=0.0,
+    x0=[0.0, 0.0, 0.0, 0.0],
+    tf=t_final,
+    dt=dt,
+    controller=controller,
+    integrator=integrator,
+    save_video=True,
+)
+
+
+# plot time series
+# plot_timeseries(
+#     T,
+#     X,
+#     U,
+#     X_meas=sim.meas_x_values,
+#     pos_y_lines=[np.pi],
+#     tau_y_lines=[-torque_limit[active_act], torque_limit[active_act]],
+#     save_to=os.path.join("./", "timeseries"),
+#     show=False,
+# )
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/wrappers.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/wrappers.py
new file mode 100644
index 00000000..780a0b8e
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_finetuning/wrappers.py
@@ -0,0 +1,87 @@
+import gymnasium as gym
+from collections import deque
+import numpy as np
+
+
+class StateActionHistoryWrapper(gym.Wrapper):
+    def __init__(self, env, history_length=4):
+        super().__init__(env)
+        self.history_length = history_length
+        self.state_history = np.zeros((history_length, *env.observation_space.shape))
+        self.action_history = np.zeros((history_length, *env.action_space.shape))
+
+        state_space = env.observation_space
+        action_space = env.action_space
+        low = np.concatenate(
+            [state_space.low] * history_length + [action_space.low] * history_length
+        )
+        high = np.concatenate(
+            [state_space.high] * history_length + [action_space.high] * history_length
+        )
+        self.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
+
+    def reset(self, **kwargs):
+        observation = self.env.reset()
+        self.state_history.fill(0)
+        self.state_history[-1] = observation[0]
+        self.action_history.fill(0)
+        return self._get_observation(), {}
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = self.env.step(action)
+        self.state_history[0:-1] = self.state_history[1:]
+        self.state_history[-1] = observation
+        self.action_history[0:-1] = self.action_history[1:]
+        self.action_history[-1] = action
+        return self._get_observation(), reward, terminated, truncated, info
+
+    def _get_observation(self):
+        state_flat = self.state_history.reshape((-1))
+        action_flat = self.action_history.reshape((-1))
+        return np.concatenate([state_flat, action_flat])
+
+
+# adds noise to velocity
+class PerturbStateWrapper(gym.ObservationWrapper):
+    def __init__(self, env, perturb_std=0.02):
+        super().__init__(env)
+        self.std = perturb_std
+
+    def observation(self, observation):
+        observation = np.atleast_2d(observation)
+        observation[:, 2:] = (
+            observation[:, 2:] + np.random.randn(*observation[:, 2:].shape) * self.std
+        )
+        return observation
+
+
+# probably useless since it's already perturbed by SAC_main_training
+class PerturbActionWrapper(gym.ActionWrapper):
+    def __init__(self, env, perturb_std=0.01):
+        super().__init__(env)
+        self.noise = lambda: np.random.randn(*env.observation_space.shape) * perturb_std
+
+    def observation(self, observation):
+        return observation + self.noise()
+
+
+class TimeAwareWrapper(gym.ObservationWrapper):
+    def __init__(self, env, dt):
+        super().__init__(env)
+        self.dt = dt
+        observation_space = env.observation_space
+        low = np.append(observation_space.low, 0.0)
+        high = np.append(observation_space.high, 1.0)
+        self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
+        self.t = 0
+
+    def observation(self, observation):
+        return np.append(observation, self.t / 10)
+
+    def step(self, action):
+        self.t += self.dt
+        return super().step(action)
+
+    def reset(self, **kwargs):
+        self.t = 0
+        return super().reset(**kwargs)
diff --git a/examples/reinforcement_learning/evolsac/SAC_main_training/environment.py b/examples/reinforcement_learning/evolsac/SAC_main_training/environment.py
new file mode 100644
index 00000000..fcf2bc5d
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_main_training/environment.py
@@ -0,0 +1,109 @@
+import gymnasium as gym
+import numpy as np
+
+class CustomCustomEnv(gym.Env):
+    def __init__(
+        self,
+        dynamics_func,
+        reset_func,
+        obs_space=gym.spaces.Box(
+            np.array([-1.0, -1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0, 1.0])
+        ),
+        act_space=gym.spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0])),
+        max_episode_steps=1000,
+        scaling=True,
+        terminates=True,
+    ):
+        self.dynamics_func = dynamics_func
+        self.reset_func = reset_func
+        self.observation_space = obs_space
+        self.action_space = act_space
+        self.max_episode_steps = max_episode_steps
+
+        self.previous_action = 0
+        self.terminates = terminates
+
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        self.stabilisation_mode = False
+        self.y = [0,0] 
+        self.update_y()
+        self.scaling = scaling
+
+        l1 = self.dynamics_func.simulator.plant.l[0]
+        l2 = self.dynamics_func.simulator.plant.l[1]
+        self.max_height = l1 + l2
+
+        if self.dynamics_func.robot == "acrobot":
+            self.control_line = 0.75 * self.max_height
+        elif self.dynamics_func.robot == "pendubot":
+            self.control_line = 0.7 * self.max_height
+
+        self.old_obs = None
+
+    def step(self, action):
+        self.old_obs = np.copy(self.observation)
+        self.observation = self.dynamics_func(
+            self.observation, action, scaling=self.scaling
+        )
+
+        self.update_y()
+        self.stabilisation_mode = self.y[1] >= self.control_line
+        terminated = self.terminated_func()
+        reward = self.reward_func(terminated, action)
+        info = {}
+        truncated = False
+        self.step_counter += 1
+        if self.step_counter >= self.max_episode_steps:
+            truncated = True
+        self.previous_action = action[0]
+        return self.observation, reward, terminated, truncated, info
+
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        info = {}
+        self.previous_action = 0
+        self.stabilisation_mode = False
+        self.old_obs = np.copy(self.observation)
+        return self.observation, info
+
+    def render(self, mode="human"):
+        pass
+
+    def reward_func(self, terminated, action):
+        raise NotImplementedError("You have to define the reward function")
+
+    def terminated_func(self):
+        if self.terminates:
+            # Checks if we're in stabilisation mode and the ee has fallen below the control line
+            if self.stabilisation_mode and self.y[1] < self.control_line:
+                return True
+        return False
+
+    # Update the y coordinate of the first joint and the end effector
+    def update_y(self):
+        theta1, theta2, _, _ = self.dynamics_func.unscale_state(self.observation)
+
+        link_end_points = self.dynamics_func.simulator.plant.forward_kinematics(
+            [theta1, theta2]
+        )
+        self.y[0] = link_end_points[0][1]
+        self.y[1] = link_end_points[1][1]
+
+    def gravitational_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        V = self.dynamics_func.simulator.plant.potential_energy(x)
+        return V
+
+    def V(self):
+        return self.gravitational_reward()
+
+    def kinetic_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        T = self.dynamics_func.simulator.plant.kinetic_energy(x)
+        return T
+
+    def T(self):
+        return self.kinetic_reward()
diff --git a/examples/reinforcement_learning/evolsac/SAC_main_training/magic.py b/examples/reinforcement_learning/evolsac/SAC_main_training/magic.py
new file mode 100644
index 00000000..fb787bfe
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_main_training/magic.py
@@ -0,0 +1,250 @@
+import os
+import shutil
+
+import numpy as np
+import pandas
+from double_pendulum.analysis.leaderboard import leaderboard_scores
+from double_pendulum.controller.evolsac.evolsac_controller import EvolSACController
+from double_pendulum.model.model_parameters import model_parameters
+from sim_controller import simulate_controller
+from stable_baselines3.common.callbacks import BaseCallback
+
+
+def load_controller(dynamics_func, model, window_size, include_time):
+    name = "evolsac"
+    leaderboard_config = {
+        "csv_path": name + "/sim_swingup.csv",
+        "name": name,
+        "simple_name": name,
+        "short_description": "SAC finetuning for both swingup and stabilisation",
+        "readme_path": f"readmes/{name}.md",
+        "username": "MarcoCali0",
+    }
+    controller = EvolSACController(
+        model=model,
+        dynamics_func=dynamics_func,
+        window_size=window_size,
+        include_time=include_time,
+    )
+    controller.init()
+    return controller, leaderboard_config
+
+
+def magic_score(
+    dynamics_func,
+    model,
+    folder,
+    folder_id,
+    window_size,
+    max_torque,
+    include_time,
+    index=None,
+):
+    design = "design_C.1"
+    integrator = "runge_kutta"
+    dt = 0.002
+    t0 = 0.0
+    t_final = 10.0
+    x0 = [0.0, 0.0, 0.0, 0.0]
+    goal = [np.pi, 0.0, 0.0, 0.0]
+
+    if folder == "acrobot":
+        model_par_path = (
+            f"../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + "model_1.1"
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        mpar.set_torque_limit([0.0, max_torque])
+
+    else: # robot = "pendubot"
+        model_par_path = (
+            f"../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + "model_1.1"
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        mpar.set_torque_limit([max_torque, 0.0])
+
+    controller, leaderboard_config = load_controller(
+        dynamics_func, model, window_size, include_time
+    )
+
+    data_dir = f"data_{folder}/{folder_id}"
+
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir)
+
+    controller_name = f"evolsac_{folder}"
+    save_dir = (
+        f"{data_dir}/{controller_name}"
+        if index is None
+        else f"{data_dir}/{controller_name}/{index}"
+    )
+    if index is not None:
+        leaderboard_config["csv_path"] = f"{controller_name}/{index}/sim_swingup.csv"
+    save_to = os.path.join(save_dir, "leaderboard_entry.csv")
+
+    simulate_controller(
+        controller, save_dir, mpar, dt, t_final, t0, x0, goal, integrator
+    )
+
+    conf = leaderboard_config
+
+    conf["csv_path"] = os.path.join(data_dir, leaderboard_config["csv_path"])
+    data_paths = {}
+    data_paths[leaderboard_config["name"]] = conf
+
+    leaderboard_scores(
+        data_paths=data_paths,
+        save_to=save_to,
+        mpar=mpar,
+        # weights={"swingup_time": 0.5, "max_tau": 0.1, "energy": 0.0, "integ_tau": 0.4, "tau_cost": 0.0, "tau_smoothness": 0.0},
+        weights={
+            "swingup_time": 1.0,
+            "max_tau": 0.0,
+            "energy": 1.0,
+            "integ_tau": 0.0,
+            "tau_cost": 1.0,
+            "tau_smoothness": 1.0,
+            "velocity_cost": 1.0,
+        },
+        normalize={
+            "swingup_time": 20.0,
+            "max_tau": 1.0,  # not used
+            "energy": 60.0,
+            "integ_tau": 1.0,  # not used
+            "tau_cost": 20.0,
+            "tau_smoothness": 0.1,
+            "velocity_cost": 400,
+        },
+        link_base="",
+        score_version="v2",
+    )
+    df = pandas.read_csv(save_to)
+    score = np.array(df["RealAI Score"])[0]
+    print("RealAI Score = ", score)
+    return score
+
+
+def copy_files(src_folder, dest_folder):
+    if not os.path.exists(dest_folder):
+        os.makedirs(dest_folder)
+
+    for filename in os.listdir(src_folder):
+        src_file = os.path.join(src_folder, filename)
+        dest_file = os.path.join(dest_folder, filename)
+
+        if os.path.isfile(src_file):
+            shutil.copy2(src_file, dest_file)
+
+
+class MagicCallback(BaseCallback):
+    def __init__(
+        self,
+        path,
+        folder_id,
+        dynamics_func,
+        robot,
+        window_size,
+        max_torque,
+        include_time,
+    ):
+        super().__init__(False)
+        self.path = f"{path}{folder_id}"
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+        self.best = -np.inf
+        self.folder_id = folder_id
+        self.dynamics_func = dynamics_func
+        self.robot = robot
+        self.window_size = window_size
+        self.max_torque = max_torque
+        self.include_time = include_time
+
+    def _on_step(self) -> bool:
+        score = magic_score(
+            self.dynamics_func,
+            self.model,
+            self.robot,
+            self.folder_id,
+            self.window_size,
+            self.max_torque,
+            self.include_time,
+        )
+        if score >= self.best:
+            self.best = score
+            self.model.save(f"{self.path}/best_model")
+            copy_files(f"./data/{self.folder_id}/evolsac/", self.path)
+        return True
+
+import concurrent.futures
+
+
+class BruteMagicCallback(BaseCallback):
+    def __init__(
+        self,
+        path,
+        folder_id,
+        dynamics_func,
+        robot,
+        window_size,
+        max_torque,
+        include_time,
+    ):
+        super().__init__(False)
+        self.path = f"{path}{folder_id}"
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+        self.folder_id = folder_id
+        self.dynamics_func = dynamics_func
+        self.robot = robot
+        self.window_size = window_size
+        self.max_torque = max_torque
+        self.include_time = include_time
+        self.iteration = 0
+        self.executor = concurrent.futures.ThreadPoolExecutor()
+
+    def _on_step(self) -> bool:
+        self.iteration += 1
+        self.executor.submit(lambda: async_store(self.iteration, self))
+        return True
+
+
+import tempfile
+
+from stable_baselines3 import SAC
+
+
+def deepcopy_model(model):
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        model_path = os.path.join(tmpdirname, "temp_model")
+        model.save(model_path)
+        copied_model = SAC.load(model_path)
+    return copied_model
+
+
+
+def async_store(iteration, callback: BruteMagicCallback):
+    model = deepcopy_model(callback.model)
+    _ = magic_score(
+        callback.dynamics_func,
+        model,
+        callback.robot,
+        callback.folder_id,
+        callback.window_size,
+        callback.max_torque,
+        callback.include_time,
+        index=iteration,
+    )
+    if not os.path.exists(f"{callback.path}/{iteration}"):
+        os.makedirs(f"{callback.path}/{iteration}", exist_ok=True)
+    model.save(f"{callback.path}/{iteration}/best_model")
+    copy_files(
+        f"./data/{callback.folder_id}/evolsac/{iteration}",
+        f"{os.path.join(callback.path, str(iteration))}",
+    )
diff --git a/examples/reinforcement_learning/evolsac/SAC_main_training/main.py b/examples/reinforcement_learning/evolsac/SAC_main_training/main.py
new file mode 100644
index 00000000..2721296b
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_main_training/main.py
@@ -0,0 +1,214 @@
+import os
+import sys
+
+import gymnasium as gym
+import numpy as np
+import stable_baselines3
+import torch
+from environment import CustomCustomEnv
+from magic import MagicCallback, BruteMagicCallback
+from simulator import CustomSimulator
+from stable_baselines3 import SAC
+from stable_baselines3.common.callbacks import EvalCallback
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.sac.policies import MlpPolicy
+from wrappers import *
+
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+
+np.random.seed(0)
+torch.manual_seed(0)
+torch.random.manual_seed(0)
+torch.backends.cudnn.deterministic = True
+stable_baselines3.common.utils.set_random_seed(0)
+
+
+class MyEnv(CustomCustomEnv):
+    def reward_func(self, terminated, action):
+        _, theta2, omega1, omega2 = self.dynamics_func.unscale_state(self.observation)
+        costheta2 = np.cos(theta2)
+
+        a = action[0]
+        delta_action = np.abs(a - self.previous_action)
+        lambda_delta = 0.05
+        lambda_action = 0.02
+        lambda_velocities = 0.01
+        if not terminated:
+            if self.stabilisation_mode:
+                reward = (
+                    self.V()
+                    + 2 * (1 + costheta2) ** 2
+                    - self.T()
+                    - 5 * lambda_action * np.square(a)
+                    - 3 * lambda_delta * delta_action
+                )
+            else:
+                reward = (
+                    self.V()
+                    - lambda_action * np.square(a)
+                    - 2 * lambda_velocities * (omega1**2 + omega2**2)
+                    - 3 * lambda_delta * delta_action
+                )
+        else:
+            reward = -1.0
+        return reward
+
+
+assert (
+    len(sys.argv) >= 3
+), "Please provide: [max torque] [robustness] [window_size (0 = no window)] [include_time]"
+max_torque = float(sys.argv[1])
+robustness = float(sys.argv[2])
+WINDOW_SIZE = int(sys.argv[3])
+INCLUDE_TIME = bool(int(sys.argv[4]))
+FOLDER_ID = f"{os.path.basename(__file__)}-{max_torque}-{robustness}-{WINDOW_SIZE}-{int(INCLUDE_TIME)}"
+TERMINATION = False
+
+# define robot variation
+robot = "pendubot"
+
+# setting log path for the training
+log_dir = f"./log_data_{robot}/SAC_training/{FOLDER_ID}"
+if not os.path.exists(log_dir):
+    os.makedirs(log_dir)
+
+design = "design_C.1"
+model = "model_1.1"
+model_par_path = (
+    "../../../../data/system_identification/identified_parameters/"
+    + design
+    + "/"
+    + model
+    + "/model_parameters.yml"
+)
+
+# model and reward parameter
+max_velocity = 50
+torque_limit = [max_torque, 0] if robot == "pendubot" else [0, max_torque]
+
+mpar = model_parameters(filepath=model_par_path)
+mpar.set_torque_limit(torque_limit)
+dt = 0.01
+integrator = "runge_kutta"
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+simulator = CustomSimulator(plant=plant, robustness=robustness, max_torque=max_torque)
+eval_simulator = Simulator(plant=plant)
+
+# learning environment parameters
+state_representation = 2
+
+obs_space = gym.spaces.Box(np.array([-1.0] * 4), np.array([1.0] * 4))
+act_space = gym.spaces.Box(np.array([-1]), np.array([1]))
+max_steps = 10 / dt
+
+###############################################################################
+
+n_envs = 50
+training_steps = 30_000_000_000_000
+verbose = 1
+eval_freq = 10_000
+n_eval_episodes = 1
+# a patto che i reward istantanei siano piccoli
+# 0.01 -> 1500000 -> 7
+# 0.003 -> 1500000 -> 46
+# 0.001 -> 1500000 -> 38
+# 0.0003 -> 1500000 -> 19
+learning_rate = 0.001
+
+###############################################################################
+
+# initialize double pendulum dynamics
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+eval_dynamics_func = double_pendulum_dynamics_func(
+    simulator=eval_simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+
+def zero_reset_func():
+    observation = [-1.0, -1.0, 0.0, 0.0]
+    return observation
+
+
+###############################################################################
+
+
+def wrap(env):
+    if INCLUDE_TIME:
+        env = TimeAwareWrapper(env, dt=dt)
+    if WINDOW_SIZE > 0:
+        env = StateActionHistoryWrapper(env, history_length=WINDOW_SIZE)
+    return env
+
+
+envs = make_vec_env(
+    env_id=MyEnv,
+    wrapper_class=wrap,
+    n_envs=n_envs,
+    env_kwargs={
+        "dynamics_func": dynamics_func,
+        "reset_func": zero_reset_func,
+        "terminates": TERMINATION,
+        "obs_space": obs_space,
+        "act_space": act_space,
+        "max_episode_steps": max_steps,
+    },
+)
+
+eval_env = wrap(
+    MyEnv(
+        dynamics_func=eval_dynamics_func,
+        reset_func=zero_reset_func,
+        obs_space=obs_space,
+        act_space=act_space,
+        max_episode_steps=max_steps,
+        terminates=TERMINATION,
+    )
+)
+
+eval_callback = EvalCallback(
+    eval_env,
+    callback_after_eval=BruteMagicCallback(
+        f"./models_{robot}/",
+        folder_id=FOLDER_ID,
+        dynamics_func=eval_dynamics_func,
+        robot=robot,
+        window_size=WINDOW_SIZE,
+        max_torque=max_torque,
+        include_time=INCLUDE_TIME,
+    ),
+    best_model_save_path=os.path.join(log_dir, "best_model"),
+    log_path=log_dir,
+    eval_freq=eval_freq,
+    verbose=verbose,
+    n_eval_episodes=n_eval_episodes,
+)
+
+###############################################################################
+
+agent = SAC(
+    MlpPolicy,
+    envs,
+    verbose=verbose,
+    learning_rate=learning_rate,
+)
+
+agent.learn(total_timesteps=training_steps, callback=eval_callback)
diff --git a/examples/reinforcement_learning/evolsac/SAC_main_training/sim_controller.py b/examples/reinforcement_learning/evolsac/SAC_main_training/sim_controller.py
new file mode 100644
index 00000000..f8cc2666
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_main_training/sim_controller.py
@@ -0,0 +1,50 @@
+import os
+import numpy as np
+
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.simulation import Simulator
+from double_pendulum.utils.csv_trajectory import save_trajectory
+from double_pendulum.utils.plotting import plot_timeseries
+
+
+
+
+def simulate_controller(controller, save_dir, mpar, dt, t_final, t0, x0, goal, integrator, controller_name=""):
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    plant = SymbolicDoublePendulum(model_pars=mpar)
+    sim = Simulator(plant=plant)
+
+    T, X, U = sim.simulate_and_animate(
+        t0=t0,
+        x0=x0,
+        tf=t_final,
+        dt=dt,
+        controller=controller,
+        integrator=integrator,
+        save_video=True,
+        video_name=os.path.join(save_dir, "sim_video.gif"),
+        plot_horizontal_line=True,
+        horizontal_line_height=0.9 * (mpar.l[0] + mpar.l[1]),
+        scale=0.25,
+    )
+
+    save_trajectory(os.path.join(save_dir, "sim_swingup.csv"), T=T, X_meas=X, U_con=U)
+
+    plot_timeseries(
+        T,
+        X,
+        U,
+        X_meas=sim.meas_x_values,
+        pos_y_lines=[-np.pi, 0.0, np.pi],
+        vel_y_lines=[0.0],
+        tau_y_lines=[-mpar.tl[1], 0.0, mpar.tl[1]],
+        save_to=os.path.join(save_dir, "timeseries"),
+        show=False,
+        scale=0.5,
+    )
+
+    if os.path.exists(f"readmes/{controller_name}.md"):
+        os.system(f"cp readmes/{controller_name}.md {save_dir}/README.md")
+
diff --git a/examples/reinforcement_learning/evolsac/SAC_main_training/simulator.py b/examples/reinforcement_learning/evolsac/SAC_main_training/simulator.py
new file mode 100644
index 00000000..e8b61f3c
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_main_training/simulator.py
@@ -0,0 +1,129 @@
+from copy import deepcopy
+
+import numpy as np
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.simulation.simulation import Simulator
+
+
+def get_par_list(x0, min_rel, max_rel, n):
+    if x0 != 0:
+        if n % 2 == 0:
+            n = n + 1
+        li = np.linspace(min_rel, max_rel, n)
+    else:
+        li = np.linspace(0, max_rel, n)
+    par_list = li * x0
+    return par_list
+
+
+class CustomSimulator(Simulator):
+    def __init__(self, plant, robustness, max_torque):
+        self.base_plant = deepcopy(plant)
+        design = "design_C.1"
+        model = "model_1.1"
+        robot = "acrobot"
+        
+        model_par_path = (
+            "../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + model
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        # SHOULD USE MAX_TORQUE FROM TRAIN_SAC
+        torque_limit = [0.0, max_torque] if robot == "acrobot" else [max_torque,0.0]
+        mpar.set_torque_limit(torque_limit)
+
+        self.mpar = mpar
+        self.robustness = robustness
+        self.max_torque = max_torque
+        super().__init__(plant=plant)
+
+    def reset(self):
+        """
+        Reset the Simulator
+        Resets
+            - the internal data recorder
+            - the filter + arguments
+            - the process noise
+            - the measurement parameters
+            - the motor parameters
+            - perturbations
+        """
+        super().reset()
+
+        self.plant = deepcopy(self.base_plant)
+
+        if np.random.rand() < self.robustness:
+            N_var = 21
+            mpar_vars = [
+                "Ir",
+                "m1r1",
+                "I1",
+                "b1",
+                "cf1",
+                "m2r2",
+                "m2",
+                "I2",
+                "b2",
+                "cf2",
+            ]
+
+            Ir_var_list = np.linspace(0.0, 1e-4, N_var)
+            m1r1_var_list = get_par_list(
+                self.mpar.m[0] * self.mpar.r[0], 0.75, 1.25, N_var
+            )
+            I1_var_list = get_par_list(self.mpar.I[0], 0.75, 1.25, N_var)
+            b1_var_list = np.linspace(-0.1, 0.1, N_var)
+            cf1_var_list = np.linspace(-0.2, 0.2, N_var)
+            m2r2_var_list = get_par_list(
+                self.mpar.m[1] * self.mpar.r[1], 0.75, 1.25, N_var
+            )
+            m2_var_list = get_par_list(self.mpar.m[1], 0.75, 1.25, N_var)
+            I2_var_list = get_par_list(self.mpar.I[1], 0.75, 1.25, N_var)
+            b2_var_list = np.linspace(-0.1, 0.1, N_var)
+            cf2_var_list = np.linspace(-0.2, 0.2, N_var)
+
+            modelpar_var_lists = {
+                "Ir": Ir_var_list,
+                "m1r1": m1r1_var_list,
+                "I1": I1_var_list,
+                "b1": b1_var_list,
+                "cf1": cf1_var_list,
+                "m2r2": m2r2_var_list,
+                "m2": m2_var_list,
+                "I2": I2_var_list,
+                "b2": b2_var_list,
+                "cf2": cf2_var_list,
+            }
+
+            mp = np.random.choice(mpar_vars)
+            var = np.random.choice(modelpar_var_lists[mp])
+
+            if mp == "Ir":
+                self.plant.Ir = var
+            elif mp == "m1r1":
+                m1 = self.mpar.m[0]
+                r1 = var / m1
+                self.plant.m[0] = m1
+                self.plant.com[0] = r1
+            elif mp == "I1":
+                self.plant.I[0] = var
+            elif mp == "b1":
+                self.plant.b[0] = var
+            elif mp == "cf1":
+                self.plant.coulomb_fric[0] = var
+            elif mp == "m2r2":
+                m2 = self.mpar.m[1]
+                r2 = var / m2
+                self.plant.m[1] = m2
+                self.plant.com[1] = r2
+            elif mp == "m2":
+                self.plant.m[1] = var
+            elif mp == "I2":
+                self.plant.I[1] = var
+            elif mp == "b2":
+                self.plant.b[1] = var
+            elif mp == "cf2":
+                self.plant.coulomb_fric[1] = var
diff --git a/examples/reinforcement_learning/evolsac/SAC_main_training/test_sac_swing_up.py b/examples/reinforcement_learning/evolsac/SAC_main_training/test_sac_swing_up.py
new file mode 100644
index 00000000..654b4b8c
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_main_training/test_sac_swing_up.py
@@ -0,0 +1,92 @@
+import os
+
+import numpy as np
+from controller import SACController
+from stable_baselines3 import SAC
+
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+from double_pendulum.utils.plotting import plot_timeseries
+
+# hyperparameters
+robot = "acrobot"
+
+design = "design_C.1"
+model = "model_1.1"
+max_torque = 3
+scaling_state = True
+
+if robot == "pendubot":
+    torque_limit = [max_torque, 0.0]
+    active_act = 0
+
+elif robot == "acrobot":
+    torque_limit = [0.0, max_torque]
+    # model_path = "models/max_torque_1.5_robustness_0.2"
+    model_path = "acrobot_score_0500"
+
+# import model parameter
+model_par_path = (
+    "../../../../data/system_identification/identified_parameters/"
+    + design
+    + "/"
+    + model
+    + "/model_parameters.yml"
+)
+mpar = model_parameters(filepath=model_par_path)
+
+mpar.set_torque_limit(torque_limit)
+
+# simulation parameters
+dt = 0.01
+t_final = 10.0
+integrator = "runge_kutta"
+goal = [np.pi, 0.0, 0.0, 0.0]
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+
+sim = Simulator(plant=plant)
+
+# initialize double pendulum dynamics
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=sim,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=2,
+    scaling=scaling_state,
+    max_velocity=50,
+    torque_limit=torque_limit,
+)
+
+model = SAC.load(model_path)
+
+# initialize sac controller
+controller = SACController(model, dynamics_func, include_time=False)
+controller.init()
+
+# start simulation
+T, X, U = sim.simulate_and_animate(
+    t0=0.0,
+    x0=[0.0, 0.0, 0.0, 0.0],
+    tf=t_final,
+    dt=dt,
+    controller=controller,
+    integrator=integrator,
+    save_video=True,
+)
+
+
+# plot time series
+# plot_timeseries(
+#     T,
+#     X,
+#     U,
+#     X_meas=sim.meas_x_values,
+#     pos_y_lines=[np.pi],
+#     tau_y_lines=[-torque_limit[active_act], torque_limit[active_act]],
+#     save_to=os.path.join("./", "timeseries"),
+#     show=False,
+# )
diff --git a/examples/reinforcement_learning/evolsac/SAC_main_training/wrappers.py b/examples/reinforcement_learning/evolsac/SAC_main_training/wrappers.py
new file mode 100644
index 00000000..780a0b8e
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SAC_main_training/wrappers.py
@@ -0,0 +1,87 @@
+import gymnasium as gym
+from collections import deque
+import numpy as np
+
+
+class StateActionHistoryWrapper(gym.Wrapper):
+    def __init__(self, env, history_length=4):
+        super().__init__(env)
+        self.history_length = history_length
+        self.state_history = np.zeros((history_length, *env.observation_space.shape))
+        self.action_history = np.zeros((history_length, *env.action_space.shape))
+
+        state_space = env.observation_space
+        action_space = env.action_space
+        low = np.concatenate(
+            [state_space.low] * history_length + [action_space.low] * history_length
+        )
+        high = np.concatenate(
+            [state_space.high] * history_length + [action_space.high] * history_length
+        )
+        self.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
+
+    def reset(self, **kwargs):
+        observation = self.env.reset()
+        self.state_history.fill(0)
+        self.state_history[-1] = observation[0]
+        self.action_history.fill(0)
+        return self._get_observation(), {}
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = self.env.step(action)
+        self.state_history[0:-1] = self.state_history[1:]
+        self.state_history[-1] = observation
+        self.action_history[0:-1] = self.action_history[1:]
+        self.action_history[-1] = action
+        return self._get_observation(), reward, terminated, truncated, info
+
+    def _get_observation(self):
+        state_flat = self.state_history.reshape((-1))
+        action_flat = self.action_history.reshape((-1))
+        return np.concatenate([state_flat, action_flat])
+
+
+# adds noise to velocity
+class PerturbStateWrapper(gym.ObservationWrapper):
+    def __init__(self, env, perturb_std=0.02):
+        super().__init__(env)
+        self.std = perturb_std
+
+    def observation(self, observation):
+        observation = np.atleast_2d(observation)
+        observation[:, 2:] = (
+            observation[:, 2:] + np.random.randn(*observation[:, 2:].shape) * self.std
+        )
+        return observation
+
+
+# probably useless since it's already perturbed by SAC_main_training
+class PerturbActionWrapper(gym.ActionWrapper):
+    def __init__(self, env, perturb_std=0.01):
+        super().__init__(env)
+        self.noise = lambda: np.random.randn(*env.observation_space.shape) * perturb_std
+
+    def observation(self, observation):
+        return observation + self.noise()
+
+
+class TimeAwareWrapper(gym.ObservationWrapper):
+    def __init__(self, env, dt):
+        super().__init__(env)
+        self.dt = dt
+        observation_space = env.observation_space
+        low = np.append(observation_space.low, 0.0)
+        high = np.append(observation_space.high, 1.0)
+        self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
+        self.t = 0
+
+    def observation(self, observation):
+        return np.append(observation, self.t / 10)
+
+    def step(self, action):
+        self.t += self.dt
+        return super().step(action)
+
+    def reset(self, **kwargs):
+        self.t = 0
+        return super().reset(**kwargs)
diff --git a/examples/reinforcement_learning/evolsac/SNES_finetuning/environment.py b/examples/reinforcement_learning/evolsac/SNES_finetuning/environment.py
new file mode 100644
index 00000000..fcf2bc5d
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SNES_finetuning/environment.py
@@ -0,0 +1,109 @@
+import gymnasium as gym
+import numpy as np
+
+class CustomCustomEnv(gym.Env):
+    def __init__(
+        self,
+        dynamics_func,
+        reset_func,
+        obs_space=gym.spaces.Box(
+            np.array([-1.0, -1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0, 1.0])
+        ),
+        act_space=gym.spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0])),
+        max_episode_steps=1000,
+        scaling=True,
+        terminates=True,
+    ):
+        self.dynamics_func = dynamics_func
+        self.reset_func = reset_func
+        self.observation_space = obs_space
+        self.action_space = act_space
+        self.max_episode_steps = max_episode_steps
+
+        self.previous_action = 0
+        self.terminates = terminates
+
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        self.stabilisation_mode = False
+        self.y = [0,0] 
+        self.update_y()
+        self.scaling = scaling
+
+        l1 = self.dynamics_func.simulator.plant.l[0]
+        l2 = self.dynamics_func.simulator.plant.l[1]
+        self.max_height = l1 + l2
+
+        if self.dynamics_func.robot == "acrobot":
+            self.control_line = 0.75 * self.max_height
+        elif self.dynamics_func.robot == "pendubot":
+            self.control_line = 0.7 * self.max_height
+
+        self.old_obs = None
+
+    def step(self, action):
+        self.old_obs = np.copy(self.observation)
+        self.observation = self.dynamics_func(
+            self.observation, action, scaling=self.scaling
+        )
+
+        self.update_y()
+        self.stabilisation_mode = self.y[1] >= self.control_line
+        terminated = self.terminated_func()
+        reward = self.reward_func(terminated, action)
+        info = {}
+        truncated = False
+        self.step_counter += 1
+        if self.step_counter >= self.max_episode_steps:
+            truncated = True
+        self.previous_action = action[0]
+        return self.observation, reward, terminated, truncated, info
+
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        info = {}
+        self.previous_action = 0
+        self.stabilisation_mode = False
+        self.old_obs = np.copy(self.observation)
+        return self.observation, info
+
+    def render(self, mode="human"):
+        pass
+
+    def reward_func(self, terminated, action):
+        raise NotImplementedError("You have to define the reward function")
+
+    def terminated_func(self):
+        if self.terminates:
+            # Checks if we're in stabilisation mode and the ee has fallen below the control line
+            if self.stabilisation_mode and self.y[1] < self.control_line:
+                return True
+        return False
+
+    # Update the y coordinate of the first joint and the end effector
+    def update_y(self):
+        theta1, theta2, _, _ = self.dynamics_func.unscale_state(self.observation)
+
+        link_end_points = self.dynamics_func.simulator.plant.forward_kinematics(
+            [theta1, theta2]
+        )
+        self.y[0] = link_end_points[0][1]
+        self.y[1] = link_end_points[1][1]
+
+    def gravitational_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        V = self.dynamics_func.simulator.plant.potential_energy(x)
+        return V
+
+    def V(self):
+        return self.gravitational_reward()
+
+    def kinetic_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        T = self.dynamics_func.simulator.plant.kinetic_energy(x)
+        return T
+
+    def T(self):
+        return self.kinetic_reward()
diff --git a/examples/reinforcement_learning/evolsac/SNES_finetuning/magic.py b/examples/reinforcement_learning/evolsac/SNES_finetuning/magic.py
new file mode 100644
index 00000000..3888e9cd
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SNES_finetuning/magic.py
@@ -0,0 +1,254 @@
+import os
+import shutil
+
+import numpy as np
+import pandas
+from double_pendulum.analysis.leaderboard import leaderboard_scores
+from double_pendulum.controller.evolsac.evolsac_controller import EvolSACController
+from double_pendulum.model.model_parameters import model_parameters
+from sim_controller import simulate_controller
+from stable_baselines3.common.callbacks import BaseCallback
+
+
+def load_controller(dynamics_func, model, window_size, include_time, evaluating):
+    name = "sac"
+    leaderboard_config = {
+        "csv_path": name + "/sim_swingup.csv",
+        "name": name,
+        "simple_name": "sac",
+        "short_description": "SAC for both swingup and stabilisation",
+        "readme_path": f"readmes/{name}.md",
+        "username": "MarcoCali0",
+    }
+    controller = EvolSACController(
+        model=model,
+        dynamics_func=dynamics_func,
+        window_size=window_size,
+        include_time=include_time,
+        evaluating=evaluating,
+        SNES_phase=True,
+    )
+    controller.init()
+    return controller, leaderboard_config
+
+
+def magic_score(
+    dynamics_func,
+    model,
+    folder,
+    folder_id,
+    window_size,
+    max_torque,
+    include_time,
+    index=None,
+    evaluating=True,
+):
+    design = "design_C.1"
+    integrator = "runge_kutta"
+    dt = 0.002
+    t0 = 0.0
+    t_final = 10.0
+    x0 = [0.0, 0.0, 0.0, 0.0]
+    goal = [np.pi, 0.0, 0.0, 0.0]
+
+    if folder == "acrobot":
+        model_par_path = (
+            "../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + "model_1.1"
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        mpar.set_torque_limit([0.0, max_torque])
+
+    else: # robot = "pendubot"
+        design = "design_C.1"
+        model_par_path = (
+            "../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + "model_1.1"
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        mpar.set_torque_limit([max_torque, 0.0])
+
+    controller, leaderboard_config = load_controller(
+        dynamics_func, model, window_size, include_time, evaluating=evaluating
+    )
+
+    data_dir = f"data_{folder}/{folder_id}"
+
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir)
+
+    controller_name = f"evolsac_{folder}"
+    save_dir = (
+        f"{data_dir}/{controller_name}"
+        if index is None
+        else f"{data_dir}/{controller_name}/{index}"
+    )
+    if index is not None:
+        leaderboard_config["csv_path"] = f"{controller_name}/{index}/sim_swingup.csv"
+    save_to = os.path.join(save_dir, "leaderboard_entry.csv")
+
+    simulate_controller(
+        controller, save_dir, mpar, dt, t_final, t0, x0, goal, integrator
+    )
+
+    conf = leaderboard_config
+
+    conf["csv_path"] = os.path.join(data_dir, leaderboard_config["csv_path"])
+    data_paths = {}
+    data_paths[leaderboard_config["name"]] = conf
+
+    leaderboard_scores(
+        data_paths=data_paths,
+        save_to=save_to,
+        mpar=mpar,
+        # weights={"swingup_time": 0.5, "max_tau": 0.1, "energy": 0.0, "integ_tau": 0.4, "tau_cost": 0.0, "tau_smoothness": 0.0},
+        weights={
+            "swingup_time": 1.0,
+            "max_tau": 0.0,
+            "energy": 1.0,
+            "integ_tau": 0.0,
+            "tau_cost": 1.0,
+            "tau_smoothness": 1.0,
+            "velocity_cost": 1.0,
+        },
+        normalize={
+            "swingup_time": 20.0,
+            "max_tau": 1.0,  # not used
+            "energy": 60.0,
+            "integ_tau": 1.0,  # not used
+            "tau_cost": 20.0,
+            "tau_smoothness": 0.1,
+            "velocity_cost": 400,
+        },
+        link_base="",
+        score_version="v2",
+    )
+    df = pandas.read_csv(save_to)
+    score = np.array(df["RealAI Score"])[0]
+    print("RealAI Score = ", score)
+    return score
+
+
+def copy_files(src_folder, dest_folder):
+    if not os.path.exists(dest_folder):
+        os.makedirs(dest_folder)
+
+    for filename in os.listdir(src_folder):
+        src_file = os.path.join(src_folder, filename)
+        dest_file = os.path.join(dest_folder, filename)
+
+        if os.path.isfile(src_file):
+            shutil.copy2(src_file, dest_file)
+
+
+class MagicCallback(BaseCallback):
+    def __init__(
+        self,
+        path,
+        folder_id,
+        dynamics_func,
+        robot,
+        window_size,
+        max_torque,
+        include_time,
+    ):
+        super().__init__(False)
+        self.path = f"{path}{folder_id}"
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+        self.best = -np.inf
+        self.folder_id = folder_id
+        self.dynamics_func = dynamics_func
+        self.robot = robot
+        self.window_size = window_size
+        self.max_torque = max_torque
+        self.include_time = include_time
+
+    def _on_step(self) -> bool:
+        score = magic_score(
+            self.dynamics_func,
+            self.model,
+            self.robot,
+            self.folder_id,
+            self.window_size,
+            self.max_torque,
+            self.include_time,
+        )
+        if score >= self.best:
+            self.best = score
+            self.model.save(f"{self.path}/best_model")
+            copy_files(f"./data/{self.folder_id}/evolsac/", self.path)
+        return True
+
+import concurrent.futures
+
+
+class BruteMagicCallback(BaseCallback):
+    def __init__(
+        self,
+        path,
+        folder_id,
+        dynamics_func,
+        robot,
+        window_size,
+        max_torque,
+        include_time,
+    ):
+        super().__init__(False)
+        self.path = f"{path}{folder_id}"
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+        self.folder_id = folder_id
+        self.dynamics_func = dynamics_func
+        self.robot = robot
+        self.window_size = window_size
+        self.max_torque = max_torque
+        self.include_time = include_time
+        self.iteration = 0
+        self.executor = concurrent.futures.ThreadPoolExecutor()
+
+    def _on_step(self) -> bool:
+        self.iteration += 1
+        self.executor.submit(lambda: async_store(self.iteration, self))
+        return True
+
+
+import tempfile
+
+from stable_baselines3 import SAC
+
+
+def deepcopy_model(model):
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        model_path = os.path.join(tmpdirname, "temp_model")
+        model.save(model_path)
+        copied_model = SAC.load(model_path)
+    return copied_model
+
+
+
+def async_store(iteration, callback: BruteMagicCallback):
+    model = deepcopy_model(callback.model)
+    _ = magic_score(
+        callback.dynamics_func,
+        model,
+        callback.robot,
+        callback.folder_id,
+        callback.window_size,
+        callback.max_torque,
+        callback.include_time,
+        index=iteration,
+    )
+    if not os.path.exists(f"{callback.path}/{iteration}"):
+        os.makedirs(f"{callback.path}/{iteration}", exist_ok=True)
+    model.save(f"{callback.path}/{iteration}/best_model")
+    copy_files(
+        f"./data/{callback.folder_id}/evolsac/{iteration}",
+        f"{os.path.join(callback.path, str(iteration))}",
+    )
diff --git a/examples/reinforcement_learning/evolsac/SNES_finetuning/main.py b/examples/reinforcement_learning/evolsac/SNES_finetuning/main.py
new file mode 100644
index 00000000..c97f7ebd
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SNES_finetuning/main.py
@@ -0,0 +1,337 @@
+import os
+import sys
+
+import gymnasium as gym
+import numpy as np
+import stable_baselines3
+import torch
+from environment import CustomCustomEnv
+from magic import MagicCallback, BruteMagicCallback
+from simulator import CustomSimulator
+from stable_baselines3 import SAC
+from stable_baselines3.common.callbacks import EvalCallback
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.sac.policies import MlpPolicy
+from wrappers import *
+
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+
+np.random.seed(0)
+torch.manual_seed(0)
+torch.random.manual_seed(0)
+torch.backends.cudnn.deterministic = True
+stable_baselines3.common.utils.set_random_seed(0)
+
+
+class MyEnv(CustomCustomEnv):
+    def reward_func(self, terminated, action):
+        _, theta2, omega1, omega2 = self.dynamics_func.unscale_state(self.observation)
+        costheta2 = np.cos(theta2)
+
+        a = action[0]
+        delta_action = np.abs(a - self.previous_action)
+        lambda_delta = 0.05
+        lambda_action = 0.02
+        lambda_velocities = 0.00005
+        if not terminated:
+            if self.stabilisation_mode:
+                reward = (
+                    self.V()
+                    + 2 * (1 + costheta2) ** 2
+                    - self.T()
+                    - 5 * lambda_action * np.square(a)
+                    - 3 * lambda_delta * delta_action
+                )
+            else:
+                reward = (
+                    (1 - np.abs(a)) * self.V()
+                    - lambda_action * np.square(a)
+                    - 2 * lambda_velocities * (omega1**2 + omega2**2)
+                    - 3 * lambda_delta * delta_action
+                )
+        else:
+            reward = -1.0
+        return reward
+
+
+assert (
+    len(sys.argv) >= 3
+), "Please provide: [max torque] [robustness] [window_size (0 = no window)] [include_time]"
+max_torque = float(sys.argv[1])
+robustness = float(sys.argv[2])
+WINDOW_SIZE = int(sys.argv[3])
+INCLUDE_TIME = bool(int(sys.argv[4]))
+robot = str(sys.argv[5])
+FOLDER_ID = f"{os.path.basename(__file__)}-{max_torque}-{robustness}-{WINDOW_SIZE}-{int(INCLUDE_TIME)}"
+TERMINATION = False
+
+# setting log path for the training
+log_dir = f"./log_data_{robot}/SAC_training/{FOLDER_ID}"
+if not os.path.exists(log_dir):
+    os.makedirs(log_dir)
+
+design = "design_C.1"
+model = "model_1.1"
+model_par_path = (
+    "../../../../data/system_identification/identified_parameters/"
+    + design
+    + "/"
+    + model
+    + "/model_parameters.yml"
+)
+
+# model and reward parameter
+max_velocity = 50
+torque_limit = [max_torque, 0] if robot == "pendubot" else [0, max_torque]
+
+mpar = model_parameters(filepath=model_par_path)
+mpar.set_torque_limit(torque_limit)
+dt = 0.01
+integrator = "runge_kutta"
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+simulator = CustomSimulator(plant=plant, robustness=robustness, max_torque=max_torque, robot=robot)
+eval_simulator = Simulator(plant=plant)
+
+# learning environment parameters
+state_representation = 2
+
+obs_space = gym.spaces.Box(np.array([-1.0] * 4), np.array([1.0] * 4))
+act_space = gym.spaces.Box(np.array([-1]), np.array([1]))
+max_steps = 10 / dt
+
+###############################################################################
+
+n_envs = 1
+training_steps = 30_000_000_000_000
+verbose = 1
+eval_freq = 10_000
+n_eval_episodes = 1
+# a patto che i reward istantanei siano piccoli
+# 0.01 -> 1500000 -> 7
+# 0.003 -> 1500000 -> 46
+# 0.001 -> 1500000 -> 38
+# 0.0003 -> 1500000 -> 19
+learning_rate = 0.001
+
+###############################################################################
+
+# initialize double pendulum dynamics
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+eval_dynamics_func = double_pendulum_dynamics_func(
+    simulator=eval_simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+
+def zero_reset_func():
+    observation = [-1.0, -1.0, 0.0, 0.0]
+    return observation
+
+
+###############################################################################
+
+
+def wrap(env):
+    if INCLUDE_TIME:
+        env = TimeAwareWrapper(env, dt=dt)
+    if WINDOW_SIZE > 0:
+        env = StateActionHistoryWrapper(env, history_length=WINDOW_SIZE)
+    return env
+
+
+envs = make_vec_env(
+    env_id=MyEnv,
+    wrapper_class=wrap,
+    n_envs=n_envs,
+    env_kwargs={
+        "dynamics_func": dynamics_func,
+        "reset_func": zero_reset_func,
+        "terminates": TERMINATION,
+        "obs_space": obs_space,
+        "act_space": act_space,
+        "max_episode_steps": max_steps,
+    },
+)
+
+eval_env = wrap(
+    MyEnv(
+        dynamics_func=eval_dynamics_func,
+        reset_func=zero_reset_func,
+        obs_space=obs_space,
+        act_space=act_space,
+        max_episode_steps=max_steps,
+        terminates=TERMINATION,
+    )
+)
+
+eval_callback = EvalCallback(
+    eval_env,
+    callback_after_eval=MagicCallback(
+        f"./models_{robot}/",
+        folder_id=FOLDER_ID,
+        dynamics_func=eval_dynamics_func,
+        robot=robot,
+        window_size=WINDOW_SIZE,
+        max_torque=max_torque,
+        include_time=INCLUDE_TIME,
+    ),
+    best_model_save_path=os.path.join(log_dir, "best_model"),
+    log_path=log_dir,
+    eval_freq=eval_freq,
+    verbose=verbose,
+    n_eval_episodes=n_eval_episodes,
+)
+
+###############################################################################
+
+# agent = SAC(
+#     MlpPolicy,
+#     envs,
+#     verbose=verbose,
+#     learning_rate=learning_rate,
+# )
+
+# agent.learn(total_timesteps=training_steps, callback=eval_callback)
+
+
+REFERENCE_AGENT = SAC(
+    MlpPolicy,
+    envs,
+    verbose=verbose,
+    learning_rate=learning_rate,
+)
+reference_agent_ = "/home/alberto_sinigaglia/test_dp_clone/leaderboard/pendubot/pendubot_best_model_0567_0800.zip" if robot == "pendubot" else "/home/alberto_sinigaglia/test_dp_clone/leaderboard/acrobot/acrobot_best_model_0504_0700.zip"
+REFERENCE_AGENT.set_parameters(reference_agent_)
+
+from evotorch import Problem
+from evotorch.algorithms.distributed.gaussian import SNES
+from magic import deepcopy_model, magic_score
+
+WORKERS = 40
+i = 0
+
+import uuid
+def simulate(policy_params):
+    global i
+    i += 1
+    IDX = i
+    agent = deepcopy_model(REFERENCE_AGENT)
+    with torch.no_grad():
+        state_dict = agent.policy.actor.latent_pi.state_dict()
+        keys = list(state_dict.keys())
+        split_sizes = [torch.numel(state_dict[key]) for key in keys]
+        params_split = torch.split(policy_params.clone().detach(), split_sizes)
+        state_dict.update(
+            {
+                key: param.reshape(state_dict[key].shape)
+                for key, param in zip(keys, params_split)
+            }
+        )
+        agent.policy.actor.latent_pi.load_state_dict(state_dict)
+
+    index = uuid.uuid4().hex
+    score = np.mean([
+        magic_score(
+            dynamics_func,
+            agent,
+            robot,
+            "./result",
+            0,
+            max_torque,
+            0,
+            index=index,
+            evaluating=False
+        )
+        for _ in range(2)
+    ])
+    return score if not np.isnan(score) else 0.0
+
+
+# Set up the EvoTorch problem
+problem = Problem(
+    "max",
+    simulate,
+    solution_length=len(
+        torch.cat(
+            [
+                p.data.view(-1)
+                for p in REFERENCE_AGENT.policy.actor.latent_pi.parameters()
+            ]
+        )
+    ),
+    num_actors=WORKERS,
+)
+
+initial_solution = np.concatenate(
+    [
+        p.data.cpu().numpy().flatten()
+        for p in REFERENCE_AGENT.policy.actor.latent_pi.parameters()
+    ]
+)
+
+optimizer = SNES(problem, popsize=WORKERS, center_init=initial_solution, stdev_init=0.0075)
+for generation in range(1000):
+    optimizer.step()
+    print(
+        f"Generation {generation}: Best reward so far: {optimizer.status['best'].evals}"
+    )
+
+    best_params = optimizer.status["best"].values
+    score = optimizer.status["best_eval"]
+
+    # Update the policy's parameters
+    with torch.no_grad():
+        state_dict = REFERENCE_AGENT.policy.actor.latent_pi.state_dict()
+        keys = list(state_dict.keys())
+        split_sizes = [torch.numel(state_dict[key]) for key in keys]
+        params_split = torch.split(best_params.clone().detach(), split_sizes)
+        state_dict.update(
+            {
+                key: param.reshape(state_dict[key].shape)
+                for key, param in zip(keys, params_split)
+            }
+        )
+        REFERENCE_AGENT.policy.actor.latent_pi.load_state_dict(state_dict)
+
+
+    real_score = magic_score(
+        dynamics_func,
+        REFERENCE_AGENT,
+        robot,
+        "./result",
+        0,
+        max_torque,
+        0,
+        index=f"TEST-{max_torque}-{generation}",
+    )
+
+
+
+    # Save the model with the best parameters
+    import os
+
+    if not os.path.exists(f"savings_{max_torque}"):
+        os.makedirs(f"savings_{max_torque}", exist_ok=True)
+    REFERENCE_AGENT.save(f"savings_{max_torque}/{generation}/best_model-{score}-{real_score}.zip")
+    torch.save(
+        REFERENCE_AGENT.policy.actor.latent_pi.state_dict(),
+        f"savings_{max_torque}/{generation}/optimized_policy-{score}-{real_score}.pth",
+    )
diff --git a/examples/reinforcement_learning/evolsac/SNES_finetuning/sim_controller.py b/examples/reinforcement_learning/evolsac/SNES_finetuning/sim_controller.py
new file mode 100644
index 00000000..639ce76f
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SNES_finetuning/sim_controller.py
@@ -0,0 +1,54 @@
+import os
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.simulation import Simulator
+from double_pendulum.utils.csv_trajectory import save_trajectory
+
+
+def simulate_controller(controller, save_dir, mpar, dt, t_final, t0, x0, goal, integrator, controller_name=""):
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    plant = SymbolicDoublePendulum(model_pars=mpar)
+    sim = Simulator(plant=plant)
+
+    # T, X, U = sim.simulate_and_animate(
+    #     t0=t0,
+    #     x0=x0,
+    #     tf=t_final,
+    #     dt=dt,
+    #     controller=controller,
+    #     integrator=integrator,
+    #     save_video=True,
+    #     video_name=os.path.join(save_dir, "sim_video.gif"),
+    #     plot_horizontal_line=True,
+    #     horizontal_line_height=0.9 * (mpar.l[0] + mpar.l[1]),
+    #     scale=0.25,
+    # )
+
+    T, X, U = sim.simulate(
+        t0=t0,
+        x0=x0,
+        tf=t_final,
+        dt=dt,
+        controller=controller,
+        integrator=integrator,
+    )
+
+    save_trajectory(os.path.join(save_dir, "sim_swingup.csv"), T=T, X_meas=X, U_con=U)
+
+    # plot_timeseries(
+    #     T,
+    #     X,
+    #     U,
+    #     X_meas=sim.meas_x_values,
+    #     pos_y_lines=[-np.pi, 0.0, np.pi],
+    #     vel_y_lines=[0.0],
+    #     tau_y_lines=[-mpar.tl[1], 0.0, mpar.tl[1]],
+    #     save_to=os.path.join(save_dir, "timeseries"),
+    #     show=False,
+    #     scale=0.5,
+    # )
+
+    if os.path.exists(f"readmes/{controller_name}.md"):
+        os.system(f"cp readmes/{controller_name}.md {save_dir}/README.md")
+
diff --git a/examples/reinforcement_learning/evolsac/SNES_finetuning/simulator.py b/examples/reinforcement_learning/evolsac/SNES_finetuning/simulator.py
new file mode 100644
index 00000000..193fb1e5
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SNES_finetuning/simulator.py
@@ -0,0 +1,128 @@
+from copy import deepcopy
+
+import numpy as np
+
+from double_pendulum.model.model_parameters import model_parameters
+from double_pendulum.simulation.simulation import Simulator
+
+
+def get_par_list(x0, min_rel, max_rel, n):
+    if x0 != 0:
+        if n % 2 == 0:
+            n = n + 1
+        li = np.linspace(min_rel, max_rel, n)
+    else:
+        li = np.linspace(0, max_rel, n)
+    par_list = li * x0
+    return par_list
+
+
+class CustomSimulator(Simulator):
+    def __init__(self, plant, robustness, max_torque, robot):
+        self.base_plant = deepcopy(plant)
+        design = "design_C.1"
+        model = "model_1.1"
+
+        model_par_path = (
+            "../../../../data/system_identification/identified_parameters/"
+            + design
+            + "/"
+            + model
+            + "/model_parameters.yml"
+        )
+        mpar = model_parameters(filepath=model_par_path)
+        torque_limit = [max_torque, 0] if robot == "pendubot" else [0, max_torque]
+        mpar.set_torque_limit(torque_limit)
+
+        self.mpar = mpar
+        self.robustness = robustness
+        self.max_torque = max_torque
+        super().__init__(plant=plant)
+
+    def reset(self):
+        """
+        Reset the Simulator
+        Resets
+            - the internal data recorder
+            - the filter + arguments
+            - the process noise
+            - the measurement parameters
+            - the motor parameters
+            - perturbations
+        """
+        super().reset()
+
+        self.plant = deepcopy(self.base_plant)
+
+        if np.random.rand() < self.robustness:
+            N_var = 21
+            mpar_vars = [
+                "Ir",
+                "m1r1",
+                "I1",
+                "b1",
+                "cf1",
+                "m2r2",
+                "m2",
+                "I2",
+                "b2",
+                "cf2",
+            ]
+
+            Ir_var_list = np.linspace(0.0, 1e-4, N_var)
+            m1r1_var_list = get_par_list(
+                self.mpar.m[0] * self.mpar.r[0], 0.75, 1.25, N_var
+            )
+            I1_var_list = get_par_list(self.mpar.I[0], 0.75, 1.25, N_var)
+            b1_var_list = np.linspace(-0.1, 0.1, N_var)
+            cf1_var_list = np.linspace(-0.2, 0.2, N_var)
+            m2r2_var_list = get_par_list(
+                self.mpar.m[1] * self.mpar.r[1], 0.75, 1.25, N_var
+            )
+            m2_var_list = get_par_list(self.mpar.m[1], 0.75, 1.25, N_var)
+            I2_var_list = get_par_list(self.mpar.I[1], 0.75, 1.25, N_var)
+            b2_var_list = np.linspace(-0.1, 0.1, N_var)
+            cf2_var_list = np.linspace(-0.2, 0.2, N_var)
+
+            modelpar_var_lists = {
+                "Ir": Ir_var_list,
+                "m1r1": m1r1_var_list,
+                "I1": I1_var_list,
+                "b1": b1_var_list,
+                "cf1": cf1_var_list,
+                "m2r2": m2r2_var_list,
+                "m2": m2_var_list,
+                "I2": I2_var_list,
+                "b2": b2_var_list,
+                "cf2": cf2_var_list,
+            }
+
+            mp = np.random.choice(mpar_vars)
+            var = np.random.choice(modelpar_var_lists[mp])
+
+            if mp == "Ir":
+                self.plant.Ir = var
+            elif mp == "m1r1":
+                m1 = self.mpar.m[0]
+                r1 = var / m1
+                self.plant.m[0] = m1
+                self.plant.com[0] = r1
+            elif mp == "I1":
+                self.plant.I[0] = var
+            elif mp == "b1":
+                self.plant.b[0] = var
+            elif mp == "cf1":
+                self.plant.coulomb_fric[0] = var
+            elif mp == "m2r2":
+                m2 = self.mpar.m[1]
+                r2 = var / m2
+                self.plant.m[1] = m2
+                self.plant.com[1] = r2
+            elif mp == "m2":
+                self.plant.m[1] = var
+            elif mp == "I2":
+                self.plant.I[1] = var
+            elif mp == "b2":
+                self.plant.b[1] = var
+            elif mp == "cf2":
+                self.plant.coulomb_fric[1] = var
diff --git a/examples/reinforcement_learning/evolsac/SNES_finetuning/wrappers.py b/examples/reinforcement_learning/evolsac/SNES_finetuning/wrappers.py
new file mode 100644
index 00000000..51535c97
--- /dev/null
+++ b/examples/reinforcement_learning/evolsac/SNES_finetuning/wrappers.py
@@ -0,0 +1,87 @@
+import gymnasium as gym
+from collections import deque
+import numpy as np
+
+
+class StateActionHistoryWrapper(gym.Wrapper):
+    def __init__(self, env, history_length=4):
+        super().__init__(env)
+        self.history_length = history_length
+        self.state_history = np.zeros((history_length, *env.observation_space.shape))
+        self.action_history = np.zeros((history_length, *env.action_space.shape))
+
+        state_space = env.observation_space
+        action_space = env.action_space
+        low = np.concatenate(
+            [state_space.low] * history_length + [action_space.low] * history_length
+        )
+        high = np.concatenate(
+            [state_space.high] * history_length + [action_space.high] * history_length
+        )
+        self.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
+
+    def reset(self, **kwargs):
+        observation = self.env.reset()
+        self.state_history.fill(0)
+        self.state_history[-1] = observation[0]
+        self.action_history.fill(0)
+        return self._get_observation(), {}
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = self.env.step(action)
+        self.state_history[0:-1] = self.state_history[1:]
+        self.state_history[-1] = observation
+        self.action_history[0:-1] = self.action_history[1:]
+        self.action_history[-1] = action
+        return self._get_observation(), reward, terminated, truncated, info
+
+    def _get_observation(self):
+        state_flat = self.state_history.reshape((-1))
+        action_flat = self.action_history.reshape((-1))
+        return np.concatenate([state_flat, action_flat])
+
+
+# adds noise to velocity
+class PerturbStateWrapper(gym.ObservationWrapper):
+    def __init__(self, env, perturb_std=0.02):
+        super().__init__(env)
+        self.std = perturb_std
+
+    def observation(self, observation):
+        observation = np.atleast_2d(observation)
+        observation[:, 2:] = (
+            observation[:, 2:] + np.random.randn(*observation[:, 2:].shape) * self.std
+        )
+        return observation
+
+
+# probably useless since it's already perturbed by SAC
+class PerturbActionWrapper(gym.ActionWrapper):
+    def __init__(self, env, perturb_std=0.01):
+        super().__init__(env)
+        self.noise = lambda: np.random.randn(*env.observation_space.shape) * perturb_std
+
+    def observation(self, observation):
+        return observation + self.noise()
+
+
+class TimeAwareWrapper(gym.ObservationWrapper):
+    def __init__(self, env, dt):
+        super().__init__(env)
+        self.dt = dt
+        observation_space = env.observation_space
+        low = np.append(observation_space.low, 0.0)
+        high = np.append(observation_space.high, 1.0)
+        self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
+        self.t = 0
+
+    def observation(self, observation):
+        return np.append(observation, self.t / 10)
+
+    def step(self, action):
+        self.t += self.dt
+        return super().step(action)
+
+    def reset(self, **kwargs):
+        self.t = 0
+        return super().reset(**kwargs)
diff --git a/experiments/design_C.1/saclqr.py b/experiments/design_C.1/saclqr.py
index bac86bae..0bef0c23 100644
--- a/experiments/design_C.1/saclqr.py
+++ b/experiments/design_C.1/saclqr.py
@@ -29,14 +29,14 @@
     active_act = 0
     scaling = False
     load_path = "../data/controller_parameters/design_C.1/model_1.1/pendubot/lqr"
-    model_path = "../data/policies/design_C.1/model_1.0/pendubot/SAC/best_model.zip"
+    model_path = "../data/policies/design_C.1/model_1.0/pendubot/SAC_main_training/best_model.zip"
 
 elif robot == "acrobot":
     torque_limit = [0.5, 5.0]
     active_act = 1
     scaling = True
     load_path = "../data/controller_parameters/design_C.1/model_1.1/acrobot/lqr"
-    model_path = "../data/policies/design_C.1/model_1.0/acrobot/SAC/sac_model.zip"
+    model_path = "../data/policies/design_C.1/model_1.0/acrobot/SAC_main_training/sac_model.zip"
 
 ## set model and controller parameters
 model_par_path = (
diff --git a/leaderboard/acrobot/robustness_v2/con_evolsac.py b/leaderboard/acrobot/robustness_v2/con_evolsac.py
new file mode 100644
index 00000000..1002978e
--- /dev/null
+++ b/leaderboard/acrobot/robustness_v2/con_evolsac.py
@@ -0,0 +1,62 @@
+import stable_baselines3
+from double_pendulum.controller.evolsac.evolsac_controller import EvolSACController
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+from sim_parameters import design, integrator, model, mpar, robot
+from stable_baselines3 import SAC
+
+assert stable_baselines3.__version__ == "2.3.2"
+
+name = "evolsac"
+leaderboard_config = {
+    "csv_path": name + "/sim_swingup.csv",
+    "name": name,
+    "simple_name": "evolsac",
+    "short_description": "Evolutionary SAC for both swingup and stabilisation",
+    "readme_path": f"readmes/{name}.md",
+    "username": "AlbertoSinigaglia",
+}
+
+# All of the variables below are now imported from sim_parameters.py
+# robot = "acrobot"
+# design = "design_C.1"
+# model = "model_1.0"
+# mpar = model_parameters(filepath=model_par_path)
+# mpar.set_torque_limit(torque_limit)
+
+# integrator = "runge_kutta"
+
+max_torque = 3.0
+max_velocity = 50.0
+torque_limit = [max_torque, 0.0] if robot == "pendubot" else [0.0, max_torque]
+mpar.set_torque_limit(torque_limit)
+
+print(f"Loading {robot} trained model...")
+
+model_path = "../../../data/policies/design_C.1/model_1.0/acrobot/evolsac/model.zip"
+# Simulation parameters
+dt = 0.01
+
+# learning environment parameters
+state_representation = 2
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+print("Loading model parameters...")
+simulator = Simulator(plant=plant)
+
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+sac_model = SAC.load(model_path)
+controller = EvolSACController(
+    model=sac_model, dynamics_func=dynamics_func, window_size=0, include_time=False
+)
+controller.init()
diff --git a/leaderboard/acrobot/robustness_v2/readmes/evolsac.md b/leaderboard/acrobot/robustness_v2/readmes/evolsac.md
new file mode 100644
index 00000000..06d0c201
--- /dev/null
+++ b/leaderboard/acrobot/robustness_v2/readmes/evolsac.md
@@ -0,0 +1,11 @@
+# Evolutionary SAC
+
+## Trajectory Learning, Optimization and Stabilization
+
+This controller uses a trajectory dictated by the policy learned in the following way:
+
+  1. SAC training with loose surrogate reward
+  2. SAC training with stricter surrogate reward
+  3. SNES training with challenge reward + injected noise in the action
+
+The controller uses the final policy network
diff --git a/leaderboard/acrobot/robustness_v2/sim_parameters.py b/leaderboard/acrobot/robustness_v2/sim_parameters.py
index aa6e2142..ef0b6407 100644
--- a/leaderboard/acrobot/robustness_v2/sim_parameters.py
+++ b/leaderboard/acrobot/robustness_v2/sim_parameters.py
@@ -15,7 +15,6 @@
 )
 mpar = model_parameters(filepath=model_par_path)
 mpar.set_torque_limit([0.0, 6.0])
-# print(mpar)
 mpar.set_motor_inertia(0.0)
 mpar.set_damping([0.0, 0.0])
 mpar.set_cfric([0.0, 0.0])
@@ -26,4 +25,4 @@
 t0 = 0.0
 t_final = 10.0
 x0 = [0.0, 0.0, 0.0, 0.0]
-goal = [np.pi, 0.0, 0.0, 0.0]
+goal = [np.pi, 0.0, 0.0, 0.0]
\ No newline at end of file
diff --git a/leaderboard/acrobot/simulation_v2/con_evolsac.py b/leaderboard/acrobot/simulation_v2/con_evolsac.py
new file mode 100644
index 00000000..efa0afcf
--- /dev/null
+++ b/leaderboard/acrobot/simulation_v2/con_evolsac.py
@@ -0,0 +1,60 @@
+import stable_baselines3
+from double_pendulum.controller.evolsac.evolsac_controller import EvolSACController
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+from sim_parameters import design, integrator, model, mpar, robot
+from stable_baselines3 import SAC
+
+assert stable_baselines3.__version__ == "2.3.2"
+
+name = "evolsac"
+leaderboard_config = {
+    "csv_path": name + "/sim_swingup.csv",
+    "name": name,
+    "simple_name": "evolsac",
+    "short_description": "Evolutionary SAC for both swingup and stabilisation",
+    "readme_path": f"readmes/{name}.md",
+    "username": "AlbertoSinigaglia",
+}
+
+# All of the variables below are now imported from sim_parameters.py
+# robot = "acrobot"
+# design = "design_C.1"
+# model = "model_1.0"
+
+# integrator = "runge_kutta"
+
+max_torque = 3.0
+max_velocity = 50.0
+torque_limit = [max_torque, 0.0] if robot == "pendubot" else [0.0, max_torque]
+mpar.set_torque_limit(torque_limit)
+
+print(f"Loading {robot} trained model...")
+
+model_path = "../../../data/policies/design_C.1/model_1.0/acrobot/evolsac/model.zip"
+# Simulation parameters
+dt = 0.01
+
+# learning environment parameters
+state_representation = 2
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+print("Loading model parameters...")
+simulator = Simulator(plant=plant)
+
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+sac_model = SAC.load(model_path)
+controller = EvolSACController(
+    model=sac_model, dynamics_func=dynamics_func, window_size=0, include_time=False
+)
+controller.init()
diff --git a/leaderboard/acrobot/simulation_v2/readmes/evolsac.md b/leaderboard/acrobot/simulation_v2/readmes/evolsac.md
new file mode 100644
index 00000000..06d0c201
--- /dev/null
+++ b/leaderboard/acrobot/simulation_v2/readmes/evolsac.md
@@ -0,0 +1,11 @@
+# Evolutionary SAC
+
+## Trajectory Learning, Optimization and Stabilization
+
+This controller uses a trajectory dictated by the policy learned in the following way:
+
+  1. SAC training with loose surrogate reward
+  2. SAC training with stricter surrogate reward
+  3. SNES training with challenge reward + injected noise in the action
+
+The controller uses the final policy network
diff --git a/leaderboard/acrobot/simulation_v2/sim_parameters.py b/leaderboard/acrobot/simulation_v2/sim_parameters.py
index cb3bfdc1..ef0b6407 100644
--- a/leaderboard/acrobot/simulation_v2/sim_parameters.py
+++ b/leaderboard/acrobot/simulation_v2/sim_parameters.py
@@ -25,4 +25,4 @@
 t0 = 0.0
 t_final = 10.0
 x0 = [0.0, 0.0, 0.0, 0.0]
-goal = [np.pi, 0.0, 0.0, 0.0]
+goal = [np.pi, 0.0, 0.0, 0.0]
\ No newline at end of file
diff --git a/leaderboard/pendubot/robustness_v2/con_evolsac.py b/leaderboard/pendubot/robustness_v2/con_evolsac.py
new file mode 100644
index 00000000..3b18050a
--- /dev/null
+++ b/leaderboard/pendubot/robustness_v2/con_evolsac.py
@@ -0,0 +1,63 @@
+import stable_baselines3
+from double_pendulum.controller.evolsac.evolsac_controller import EvolSACController
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+from sim_parameters import design, integrator, model, mpar, robot
+from stable_baselines3 import SAC
+
+assert stable_baselines3.__version__ == "2.3.2"
+
+name = "evolsac"
+leaderboard_config = {
+    "csv_path": name + "/sim_swingup.csv",
+    "name": name,
+    "simple_name": "evolsac",
+    "short_description": "Evolutionary SAC for both swingup and stabilisation",
+    "readme_path": f"readmes/{name}.md",
+    "username": "AlbertoSinigaglia",
+}
+
+# All of the variables below are now imported from sim_parameters.py
+# robot = "pendubot"
+# design = "design_C.1"
+# model = "model_1.0"
+# mpar = model_parameters(filepath=model_par_path)
+# mpar.set_torque_limit(torque_limit)
+
+# integrator = "runge_kutta"
+
+max_torque = 3.0
+max_velocity = 50.0
+torque_limit = [max_torque, 0.0] if robot == "pendubot" else [0.0, max_torque]
+mpar.set_torque_limit(torque_limit)
+
+print(f"Loading {robot} trained model...")
+
+model_path = "../../../data/policies/design_C.1/model_1.0/pendubot/evolsac/model.zip"
+
+# Simulation parameters
+dt = 0.01
+
+# learning environment parameters
+state_representation = 2
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+print("Loading model parameters...")
+simulator = Simulator(plant=plant)
+
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+sac_model = SAC.load(model_path)
+controller = EvolSACController(
+    model=sac_model, dynamics_func=dynamics_func, window_size=0, include_time=False
+)
+controller.init()
diff --git a/leaderboard/pendubot/robustness_v2/readmes/evolsac.md b/leaderboard/pendubot/robustness_v2/readmes/evolsac.md
new file mode 100644
index 00000000..06d0c201
--- /dev/null
+++ b/leaderboard/pendubot/robustness_v2/readmes/evolsac.md
@@ -0,0 +1,11 @@
+# Evolutionary SAC
+
+## Trajectory Learning, Optimization and Stabilization
+
+This controller uses a trajectory dictated by the policy learned in the following way:
+
+  1. SAC training with loose surrogate reward
+  2. SAC training with stricter surrogate reward
+  3. SNES training with challenge reward + injected noise in the action
+
+The controller uses the final policy network
diff --git a/leaderboard/pendubot/robustness_v2/sim_parameters.py b/leaderboard/pendubot/robustness_v2/sim_parameters.py
index 5743e83d..d834c78a 100644
--- a/leaderboard/pendubot/robustness_v2/sim_parameters.py
+++ b/leaderboard/pendubot/robustness_v2/sim_parameters.py
@@ -15,7 +15,6 @@
 )
 mpar = model_parameters(filepath=model_par_path)
 mpar.set_torque_limit([6.0, 0.0])
-# print(mpar)
 mpar.set_motor_inertia(0.0)
 mpar.set_damping([0.0, 0.0])
 mpar.set_cfric([0.0, 0.0])
@@ -26,4 +25,4 @@
 t0 = 0.0
 t_final = 10.0
 x0 = [0.0, 0.0, 0.0, 0.0]
-goal = [np.pi, 0.0, 0.0, 0.0]
+goal = [np.pi, 0.0, 0.0, 0.0]
\ No newline at end of file
diff --git a/leaderboard/pendubot/simulation_v2/con_evolsac.py b/leaderboard/pendubot/simulation_v2/con_evolsac.py
new file mode 100644
index 00000000..47840d68
--- /dev/null
+++ b/leaderboard/pendubot/simulation_v2/con_evolsac.py
@@ -0,0 +1,62 @@
+import stable_baselines3
+from double_pendulum.controller.evolsac.evolsac_controller import EvolSACController
+from double_pendulum.model.symbolic_plant import SymbolicDoublePendulum
+from double_pendulum.simulation.gym_env import double_pendulum_dynamics_func
+from double_pendulum.simulation.simulation import Simulator
+from sim_parameters import design, integrator, model, mpar, robot
+from stable_baselines3 import SAC
+
+assert stable_baselines3.__version__ == "2.3.2"
+
+name = "evolsac"
+leaderboard_config = {
+    "csv_path": name + "/sim_swingup.csv",
+    "name": name,
+    "simple_name": "evolsac",
+    "short_description": "Evolutionary SAC for both swingup and stabilisation",
+    "readme_path": f"readmes/{name}.md",
+    "username": "AlbertoSinigaglia",
+}
+
+# All of the variables below are now imported from sim_parameters.py
+# robot = "pendubot"
+# design = "design_C.1"
+# model = "model_1.0"
+# mpar = model_parameters(filepath=model_par_path)
+# mpar.set_torque_limit(torque_limit)
+
+# integrator = "runge_kutta"
+
+max_torque = 3.0
+max_velocity = 50.0
+torque_limit = [max_torque, 0.0] if robot == "pendubot" else [0.0, max_torque]
+mpar.set_torque_limit(torque_limit)
+
+print(f"Loading {robot} trained model...")
+
+model_path = "../../../data/policies/design_C.1/model_1.0/pendubot/evolsac/model.zip"
+# Simulation parameters
+dt = 0.01
+
+# learning environment parameters
+state_representation = 2
+
+plant = SymbolicDoublePendulum(model_pars=mpar)
+print("Loading model parameters...")
+simulator = Simulator(plant=plant)
+
+dynamics_func = double_pendulum_dynamics_func(
+    simulator=simulator,
+    dt=dt,
+    integrator=integrator,
+    robot=robot,
+    state_representation=state_representation,
+    max_velocity=max_velocity,
+    torque_limit=torque_limit,
+)
+
+sac_model = SAC.load(model_path)
+controller = EvolSACController(
+    model=sac_model, dynamics_func=dynamics_func, window_size=0, include_time=False
+)
+controller.init()
diff --git a/leaderboard/pendubot/simulation_v2/readmes/evolsac.md b/leaderboard/pendubot/simulation_v2/readmes/evolsac.md
new file mode 100644
index 00000000..06d0c201
--- /dev/null
+++ b/leaderboard/pendubot/simulation_v2/readmes/evolsac.md
@@ -0,0 +1,11 @@
+# Evolutionary SAC
+
+## Trajectory Learning, Optimization and Stabilization
+
+This controller uses a trajectory dictated by the policy learned in the following way:
+
+  1. SAC training with loose surrogate reward
+  2. SAC training with stricter surrogate reward
+  3. SNES training with challenge reward + injected noise in the action
+
+The controller uses the final policy network
diff --git a/leaderboard/pendubot/simulation_v2/sim_parameters.py b/leaderboard/pendubot/simulation_v2/sim_parameters.py
index de357f10..d834c78a 100644
--- a/leaderboard/pendubot/simulation_v2/sim_parameters.py
+++ b/leaderboard/pendubot/simulation_v2/sim_parameters.py
@@ -25,4 +25,4 @@
 t0 = 0.0
 t_final = 10.0
 x0 = [0.0, 0.0, 0.0, 0.0]
-goal = [np.pi, 0.0, 0.0, 0.0]
+goal = [np.pi, 0.0, 0.0, 0.0]
\ No newline at end of file
diff --git a/src/python/double_pendulum/controller/evolsac/__init__.py b/src/python/double_pendulum/controller/evolsac/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/double_pendulum/controller/evolsac/evolsac_controller.py b/src/python/double_pendulum/controller/evolsac/evolsac_controller.py
new file mode 100644
index 00000000..93c7d0ab
--- /dev/null
+++ b/src/python/double_pendulum/controller/evolsac/evolsac_controller.py
@@ -0,0 +1,66 @@
+import numpy as np
+import torch
+from double_pendulum.controller.abstract_controller import AbstractController
+
+
+class EvolSACController(AbstractController):
+    def __init__(
+        self,
+        model,
+        dynamics_func,
+        scaling=True,
+        include_time=True,
+        window_size=0,
+        SNES_phase=False,
+        evaluating=True,
+    ):
+        super().__init__()
+        self.model = model
+        self.dynamics_func = dynamics_func
+        self.scaling = scaling
+        self.window_size = window_size
+        self.include_time = include_time
+        self.old_state = [
+            [0] * (5 if include_time else 4) for _ in range(self.window_size)
+        ]
+        self.old_action = [[0.0] for _ in range(self.window_size)]
+        self.timestep = 0
+        self.SNES_phase = SNES_phase
+        self.evaluating = evaluating
+
+    def get_state(self, obs, time):
+        if self.window_size > 0:
+            return np.concatenate(
+                [np.reshape(self.old_state, (-1)), np.reshape(self.old_action, (-1))]
+            )
+        else:
+            if self.include_time:
+                return list(obs) + [time / 10]
+            else:
+                return obs
+
+    def update_old_state(self, obs, t):
+        if self.include_time:
+            self.old_state = self.old_state[1:] + [list(obs) + [t / 10]]
+        else:
+            self.old_state = self.old_state[1:] + [list(obs)]
+
+    def update_old_action(self, action):
+        self.old_action = self.old_action[1:] + [action[0]]
+
+    def get_control_output_(self, x, t=None):
+        self.timestep += 1
+        obs = self.dynamics_func.normalize_state(x)
+        self.update_old_state(obs, t)
+        action = self.model.predict(self.get_state(obs, t), deterministic=True)
+
+        # add gaussian noise to action
+        if not self.evaluating and self.SNES_phase:
+            a = action[0][0]
+            a = torch.atanh(torch.tensor(a))
+            a += np.random.randn() * 0.1
+            a = torch.tanh(a).numpy()
+            action[0][0] = np.clip(a, a_min=-1, a_max=+1)
+
+        self.update_old_action(action)
+        return self.dynamics_func.unscale_action(action)
diff --git a/src/python/setup.py b/src/python/setup.py
index ea4f3177..40a9f200 100644
--- a/src/python/setup.py
+++ b/src/python/setup.py
@@ -1,4 +1,4 @@
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
 
 setup(
     name="DoublePendulum",
@@ -58,6 +58,7 @@
             "flax",
             "tqdm",
             "cloudpickle==3.0.0",
+            "evotorch",
         ],
     },
     classifiers=[