Skip to content

Commit

Permalink
Merge pull request #15 from AlbertoSinigaglia/main
Browse files Browse the repository at this point in the history
Submission for EvolSAC
  • Loading branch information
fwiebe authored Aug 27, 2024
2 parents 143b222 + b1cdc9b commit 386098e
Show file tree
Hide file tree
Showing 41 changed files with 3,319 additions and 9 deletions.
Binary file not shown.
Binary file not shown.
55 changes: 55 additions & 0 deletions docker/Dockerfile_evolsac
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
From ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
apt-get install wget -y && \
apt-get install unzip -y && \
apt-get install git -y && \
apt-get install vim -y && \
apt-get install python3-pip -y && \
apt-get install libyaml-cpp-dev -y && \
#apt install libeigen3-dev -y && \
apt-get install libpython3.10 -y && \
apt-get install libx11-6 -y && \
apt-get install libsm6 -y && \
apt-get install libxt6 -y && \
apt-get install libglib2.0-0 -y && \
apt-get install python3-sphinx -y && \
apt-get install python3-numpydoc -y && \
apt-get install python3-sphinx-rtd-theme -y && \
apt-get install python-is-python3

# libeigen3-dev install does not work with apt
RUN wget -O Eigen.zip https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
RUN unzip Eigen.zip
RUN cp -r eigen-3.4.0/Eigen /usr/local/include

#RUN python -m ensurepip --upgrade
RUN pip install -U pip
RUN pip3 install numpy dill
RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install drake

RUN python -m pip install torch
RUN python -m pip install stable-baselines3==2.3.2
RUN python -m pip install evotorch
RUN python -m pip install gymnasium
RUN python -m pip install ffmpeg-python

# Copy everything
COPY . ./double_pendulum/

WORKDIR "/double_pendulum"

# RUN git checkout v0.1.0

RUN make install
RUN make pythonfull

RUN apt-get -y update
RUN apt-get -y upgrade
RUN apt-get install -y ffmpeg


RUN python -m pip install stable-baselines3==2.3.2
7 changes: 7 additions & 0 deletions examples/reinforcement_learning/evolsac/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# EvolSAC training
To train the Evolutionary SAC Agent for both the pendubot and the acrobot, first ensure that the variable robot is consistently set to either acrobot or pendubot in all 3 `main.py` files contained inside the folders `SAC_main_training` and `SNES_finetuning`.

The scripts below must be ran directly from the folders that contain them to ensure path integrity.

1. Run `python main.py 3.0 0 0 0` from `SAC_main_training`, which trains the agent according to the surrogate reward function defined in the same file
2. Run `python main.py 3.0 0 0 0 [acrobot/pendubot]` from `SNES_finetuning`, which loads the agent found in step 3, and further trains it based on the performance score defined by the competition's organizers
109 changes: 109 additions & 0 deletions examples/reinforcement_learning/evolsac/SAC_finetuning/environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import gymnasium as gym
import numpy as np

class CustomCustomEnv(gym.Env):
def __init__(
self,
dynamics_func,
reset_func,
obs_space=gym.spaces.Box(
np.array([-1.0, -1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0, 1.0])
),
act_space=gym.spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0])),
max_episode_steps=1000,
scaling=True,
terminates=True,
):
self.dynamics_func = dynamics_func
self.reset_func = reset_func
self.observation_space = obs_space
self.action_space = act_space
self.max_episode_steps = max_episode_steps

self.previous_action = 0
self.terminates = terminates

self.observation = self.reset_func()
self.step_counter = 0
self.stabilisation_mode = False
self.y = [0,0]
self.update_y()
self.scaling = scaling

l1 = self.dynamics_func.simulator.plant.l[0]
l2 = self.dynamics_func.simulator.plant.l[1]
self.max_height = l1 + l2

if self.dynamics_func.robot == "acrobot":
self.control_line = 0.75 * self.max_height
elif self.dynamics_func.robot == "pendubot":
self.control_line = 0.7 * self.max_height

self.old_obs = None

def step(self, action):
self.old_obs = np.copy(self.observation)
self.observation = self.dynamics_func(
self.observation, action, scaling=self.scaling
)

self.update_y()
self.stabilisation_mode = self.y[1] >= self.control_line
terminated = self.terminated_func()
reward = self.reward_func(terminated, action)
info = {}
truncated = False
self.step_counter += 1
if self.step_counter >= self.max_episode_steps:
truncated = True
self.previous_action = action[0]
return self.observation, reward, terminated, truncated, info

def reset(self, seed=None, options=None):
super().reset(seed=seed)
self.observation = self.reset_func()
self.step_counter = 0
info = {}
self.previous_action = 0
self.stabilisation_mode = False
self.old_obs = np.copy(self.observation)
return self.observation, info

def render(self, mode="human"):
pass

def reward_func(self, terminated, action):
raise NotImplementedError("You have to define the reward function")

def terminated_func(self):
if self.terminates:
# Checks if we're in stabilisation mode and the ee has fallen below the control line
if self.stabilisation_mode and self.y[1] < self.control_line:
return True
return False

# Update the y coordinate of the first joint and the end effector
def update_y(self):
theta1, theta2, _, _ = self.dynamics_func.unscale_state(self.observation)

link_end_points = self.dynamics_func.simulator.plant.forward_kinematics(
[theta1, theta2]
)
self.y[0] = link_end_points[0][1]
self.y[1] = link_end_points[1][1]

def gravitational_reward(self):
x = self.dynamics_func.unscale_state(self.observation)
V = self.dynamics_func.simulator.plant.potential_energy(x)
return V

def V(self):
return self.gravitational_reward()

def kinetic_reward(self):
x = self.dynamics_func.unscale_state(self.observation)
T = self.dynamics_func.simulator.plant.kinetic_energy(x)
return T

def T(self):
return self.kinetic_reward()
Loading

0 comments on commit 386098e

Please sign in to comment.