-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from AlbertoSinigaglia/main
Submission for EvolSAC
- Loading branch information
Showing
41 changed files
with
3,319 additions
and
9 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
From ubuntu:22.04 | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
RUN apt-get update && \ | ||
apt-get install wget -y && \ | ||
apt-get install unzip -y && \ | ||
apt-get install git -y && \ | ||
apt-get install vim -y && \ | ||
apt-get install python3-pip -y && \ | ||
apt-get install libyaml-cpp-dev -y && \ | ||
#apt install libeigen3-dev -y && \ | ||
apt-get install libpython3.10 -y && \ | ||
apt-get install libx11-6 -y && \ | ||
apt-get install libsm6 -y && \ | ||
apt-get install libxt6 -y && \ | ||
apt-get install libglib2.0-0 -y && \ | ||
apt-get install python3-sphinx -y && \ | ||
apt-get install python3-numpydoc -y && \ | ||
apt-get install python3-sphinx-rtd-theme -y && \ | ||
apt-get install python-is-python3 | ||
|
||
# libeigen3-dev install does not work with apt | ||
RUN wget -O Eigen.zip https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip | ||
RUN unzip Eigen.zip | ||
RUN cp -r eigen-3.4.0/Eigen /usr/local/include | ||
|
||
#RUN python -m ensurepip --upgrade | ||
RUN pip install -U pip | ||
RUN pip3 install numpy dill | ||
RUN python3 -m pip install --upgrade pip | ||
RUN python3 -m pip install drake | ||
|
||
RUN python -m pip install torch | ||
RUN python -m pip install stable-baselines3==2.3.2 | ||
RUN python -m pip install evotorch | ||
RUN python -m pip install gymnasium | ||
RUN python -m pip install ffmpeg-python | ||
|
||
# Copy everything | ||
COPY . ./double_pendulum/ | ||
|
||
WORKDIR "/double_pendulum" | ||
|
||
# RUN git checkout v0.1.0 | ||
|
||
RUN make install | ||
RUN make pythonfull | ||
|
||
RUN apt-get -y update | ||
RUN apt-get -y upgrade | ||
RUN apt-get install -y ffmpeg | ||
|
||
|
||
RUN python -m pip install stable-baselines3==2.3.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# EvolSAC training | ||
To train the Evolutionary SAC Agent for both the pendubot and the acrobot, first ensure that the variable robot is consistently set to either acrobot or pendubot in all 3 `main.py` files contained inside the folders `SAC_main_training` and `SNES_finetuning`. | ||
|
||
The scripts below must be ran directly from the folders that contain them to ensure path integrity. | ||
|
||
1. Run `python main.py 3.0 0 0 0` from `SAC_main_training`, which trains the agent according to the surrogate reward function defined in the same file | ||
2. Run `python main.py 3.0 0 0 0 [acrobot/pendubot]` from `SNES_finetuning`, which loads the agent found in step 3, and further trains it based on the performance score defined by the competition's organizers |
109 changes: 109 additions & 0 deletions
109
examples/reinforcement_learning/evolsac/SAC_finetuning/environment.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import gymnasium as gym | ||
import numpy as np | ||
|
||
class CustomCustomEnv(gym.Env): | ||
def __init__( | ||
self, | ||
dynamics_func, | ||
reset_func, | ||
obs_space=gym.spaces.Box( | ||
np.array([-1.0, -1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0, 1.0]) | ||
), | ||
act_space=gym.spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0])), | ||
max_episode_steps=1000, | ||
scaling=True, | ||
terminates=True, | ||
): | ||
self.dynamics_func = dynamics_func | ||
self.reset_func = reset_func | ||
self.observation_space = obs_space | ||
self.action_space = act_space | ||
self.max_episode_steps = max_episode_steps | ||
|
||
self.previous_action = 0 | ||
self.terminates = terminates | ||
|
||
self.observation = self.reset_func() | ||
self.step_counter = 0 | ||
self.stabilisation_mode = False | ||
self.y = [0,0] | ||
self.update_y() | ||
self.scaling = scaling | ||
|
||
l1 = self.dynamics_func.simulator.plant.l[0] | ||
l2 = self.dynamics_func.simulator.plant.l[1] | ||
self.max_height = l1 + l2 | ||
|
||
if self.dynamics_func.robot == "acrobot": | ||
self.control_line = 0.75 * self.max_height | ||
elif self.dynamics_func.robot == "pendubot": | ||
self.control_line = 0.7 * self.max_height | ||
|
||
self.old_obs = None | ||
|
||
def step(self, action): | ||
self.old_obs = np.copy(self.observation) | ||
self.observation = self.dynamics_func( | ||
self.observation, action, scaling=self.scaling | ||
) | ||
|
||
self.update_y() | ||
self.stabilisation_mode = self.y[1] >= self.control_line | ||
terminated = self.terminated_func() | ||
reward = self.reward_func(terminated, action) | ||
info = {} | ||
truncated = False | ||
self.step_counter += 1 | ||
if self.step_counter >= self.max_episode_steps: | ||
truncated = True | ||
self.previous_action = action[0] | ||
return self.observation, reward, terminated, truncated, info | ||
|
||
def reset(self, seed=None, options=None): | ||
super().reset(seed=seed) | ||
self.observation = self.reset_func() | ||
self.step_counter = 0 | ||
info = {} | ||
self.previous_action = 0 | ||
self.stabilisation_mode = False | ||
self.old_obs = np.copy(self.observation) | ||
return self.observation, info | ||
|
||
def render(self, mode="human"): | ||
pass | ||
|
||
def reward_func(self, terminated, action): | ||
raise NotImplementedError("You have to define the reward function") | ||
|
||
def terminated_func(self): | ||
if self.terminates: | ||
# Checks if we're in stabilisation mode and the ee has fallen below the control line | ||
if self.stabilisation_mode and self.y[1] < self.control_line: | ||
return True | ||
return False | ||
|
||
# Update the y coordinate of the first joint and the end effector | ||
def update_y(self): | ||
theta1, theta2, _, _ = self.dynamics_func.unscale_state(self.observation) | ||
|
||
link_end_points = self.dynamics_func.simulator.plant.forward_kinematics( | ||
[theta1, theta2] | ||
) | ||
self.y[0] = link_end_points[0][1] | ||
self.y[1] = link_end_points[1][1] | ||
|
||
def gravitational_reward(self): | ||
x = self.dynamics_func.unscale_state(self.observation) | ||
V = self.dynamics_func.simulator.plant.potential_energy(x) | ||
return V | ||
|
||
def V(self): | ||
return self.gravitational_reward() | ||
|
||
def kinetic_reward(self): | ||
x = self.dynamics_func.unscale_state(self.observation) | ||
T = self.dynamics_func.simulator.plant.kinetic_energy(x) | ||
return T | ||
|
||
def T(self): | ||
return self.kinetic_reward() |
Oops, something went wrong.