Merge pull request #15 from AlbertoSinigaglia/main

Submission for EvolSAC
dfki-ric-underactuated-lab · Aug 27, 2024 · 386098e · 386098e
2 parents 143b222 + b1cdc9b
commit 386098e
Show file tree

Hide file tree

Showing 41 changed files with 3,319 additions and 9 deletions.
diff --git a/data/policies/design_C.1/model_1.0/acrobot/evolsac/model.zip b/data/policies/design_C.1/model_1.0/acrobot/evolsac/model.zip
diff --git a/data/policies/design_C.1/model_1.0/pendubot/evolsac/model.zip b/data/policies/design_C.1/model_1.0/pendubot/evolsac/model.zip
diff --git a/docker/Dockerfile_evolsac b/docker/Dockerfile_evolsac
@@ -0,0 +1,55 @@
+From ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install wget -y && \
+    apt-get install unzip -y && \
+    apt-get install git -y && \
+    apt-get install vim -y && \
+    apt-get install python3-pip -y && \
+    apt-get install libyaml-cpp-dev -y && \
+    #apt install libeigen3-dev -y && \
+    apt-get install libpython3.10 -y && \
+    apt-get install libx11-6 -y && \
+    apt-get install libsm6 -y && \
+    apt-get install libxt6 -y && \
+    apt-get install libglib2.0-0 -y && \
+    apt-get install python3-sphinx -y && \
+    apt-get install python3-numpydoc -y && \
+    apt-get install python3-sphinx-rtd-theme -y && \
+    apt-get install python-is-python3
+
+# libeigen3-dev install does not work with apt
+RUN wget -O Eigen.zip https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
+RUN unzip Eigen.zip
+RUN cp -r eigen-3.4.0/Eigen /usr/local/include
+
+#RUN python -m ensurepip --upgrade
+RUN pip install -U pip
+RUN pip3 install numpy dill
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install drake
+
+RUN python -m pip install torch
+RUN python -m pip install stable-baselines3==2.3.2
+RUN python -m pip install evotorch
+RUN python -m pip install gymnasium
+RUN python -m pip install ffmpeg-python
+
+# Copy everything
+COPY . ./double_pendulum/
+
+WORKDIR "/double_pendulum"
+
+# RUN git checkout v0.1.0
+
+RUN make install
+RUN make pythonfull
+
+RUN apt-get -y update
+RUN apt-get -y upgrade
+RUN apt-get install -y ffmpeg
+
+
+RUN python -m pip install stable-baselines3==2.3.2
diff --git a/examples/reinforcement_learning/evolsac/README.md b/examples/reinforcement_learning/evolsac/README.md
@@ -0,0 +1,7 @@
+# EvolSAC training
+To train the Evolutionary SAC Agent for both the pendubot and the acrobot, first ensure that the variable robot is consistently set to either acrobot or pendubot in all 3 `main.py` files contained inside the folders `SAC_main_training` and `SNES_finetuning`. 
+
+The scripts below must be ran directly from the folders that contain them to ensure path integrity.
+
+1. Run `python main.py 3.0 0 0 0` from `SAC_main_training`, which trains the agent according to the surrogate reward function defined in the same file
+2. Run `python main.py 3.0 0 0 0 [acrobot/pendubot]` from `SNES_finetuning`, which loads the agent found in step 3, and further trains it based on the performance score defined by the competition's organizers
diff --git a/examples/reinforcement_learning/evolsac/SAC_finetuning/environment.py b/examples/reinforcement_learning/evolsac/SAC_finetuning/environment.py
@@ -0,0 +1,109 @@
+import gymnasium as gym
+import numpy as np
+
+class CustomCustomEnv(gym.Env):
+    def __init__(
+        self,
+        dynamics_func,
+        reset_func,
+        obs_space=gym.spaces.Box(
+            np.array([-1.0, -1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0, 1.0])
+        ),
+        act_space=gym.spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0])),
+        max_episode_steps=1000,
+        scaling=True,
+        terminates=True,
+    ):
+        self.dynamics_func = dynamics_func
+        self.reset_func = reset_func
+        self.observation_space = obs_space
+        self.action_space = act_space
+        self.max_episode_steps = max_episode_steps
+
+        self.previous_action = 0
+        self.terminates = terminates
+
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        self.stabilisation_mode = False
+        self.y = [0,0] 
+        self.update_y()
+        self.scaling = scaling
+
+        l1 = self.dynamics_func.simulator.plant.l[0]
+        l2 = self.dynamics_func.simulator.plant.l[1]
+        self.max_height = l1 + l2
+
+        if self.dynamics_func.robot == "acrobot":
+            self.control_line = 0.75 * self.max_height
+        elif self.dynamics_func.robot == "pendubot":
+            self.control_line = 0.7 * self.max_height
+
+        self.old_obs = None
+
+    def step(self, action):
+        self.old_obs = np.copy(self.observation)
+        self.observation = self.dynamics_func(
+            self.observation, action, scaling=self.scaling
+        )
+
+        self.update_y()
+        self.stabilisation_mode = self.y[1] >= self.control_line
+        terminated = self.terminated_func()
+        reward = self.reward_func(terminated, action)
+        info = {}
+        truncated = False
+        self.step_counter += 1
+        if self.step_counter >= self.max_episode_steps:
+            truncated = True
+        self.previous_action = action[0]
+        return self.observation, reward, terminated, truncated, info
+
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.observation = self.reset_func()
+        self.step_counter = 0
+        info = {}
+        self.previous_action = 0
+        self.stabilisation_mode = False
+        self.old_obs = np.copy(self.observation)
+        return self.observation, info
+
+    def render(self, mode="human"):
+        pass
+
+    def reward_func(self, terminated, action):
+        raise NotImplementedError("You have to define the reward function")
+
+    def terminated_func(self):
+        if self.terminates:
+            # Checks if we're in stabilisation mode and the ee has fallen below the control line
+            if self.stabilisation_mode and self.y[1] < self.control_line:
+                return True
+        return False
+
+    # Update the y coordinate of the first joint and the end effector
+    def update_y(self):
+        theta1, theta2, _, _ = self.dynamics_func.unscale_state(self.observation)
+
+        link_end_points = self.dynamics_func.simulator.plant.forward_kinematics(
+            [theta1, theta2]
+        )
+        self.y[0] = link_end_points[0][1]
+        self.y[1] = link_end_points[1][1]
+
+    def gravitational_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        V = self.dynamics_func.simulator.plant.potential_energy(x)
+        return V
+
+    def V(self):
+        return self.gravitational_reward()
+
+    def kinetic_reward(self):
+        x = self.dynamics_func.unscale_state(self.observation)
+        T = self.dynamics_func.simulator.plant.kinetic_energy(x)
+        return T
+
+    def T(self):
+        return self.kinetic_reward()