Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Mujoco v5 environments #85

Merged
merged 39 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
531aa68
Hopper and HalfCheetah v5
LucasAlegre Feb 18, 2024
cf081b0
Merge branch 'main' into mujoco-v5
LucasAlegre May 21, 2024
67c50f3
Fix wrapper imports
LucasAlegre May 22, 2024
6285bf4
Fix mo-reacher-v0 reset
LucasAlegre May 22, 2024
8096b2c
Bump LunarLander to v3
LucasAlegre May 22, 2024
a20e9e7
Mario subclass Env
LucasAlegre May 22, 2024
bf2dcc9
Skip highway tests
LucasAlegre May 22, 2024
efe8bb7
Migrate wrappers
ffelten May 23, 2024
b2f2b53
WIP
ffelten May 23, 2024
062849a
Rollback Vector env contstructor
ffelten May 23, 2024
7fbaf38
Tests are passing
ffelten May 23, 2024
7e9f5b8
Remove comments
ffelten May 23, 2024
f6914a4
Export wrappers
ffelten May 23, 2024
bbaab1e
Update to use Gymnasium v1.0.0a1
pseudo-rnd-thoughts May 28, 2024
f72773a
Better doc and tests for vector wrappers
ffelten Aug 7, 2024
f442ea4
Enhance wrappers doc and tests
ffelten Aug 7, 2024
9b9a3ea
Remove print
ffelten Aug 7, 2024
98a695e
Fix test
ffelten Aug 8, 2024
c974b3b
Merge branch 'main' into gymnasium-v5
Aug 9, 2024
7480c64
Remove pybullet mo-reacher
Aug 9, 2024
4e39d18
Require highway-env >= 1.9.1
Aug 12, 2024
a870455
Merge main
ffelten Aug 13, 2024
dbddf3a
test type
ffelten Aug 13, 2024
57870fe
Merge branch 'gymnasium-v5' into mujoco-v5
Aug 14, 2024
fbac985
Add Mujoco v5 environments
Aug 16, 2024
eab4592
pre-commit
Aug 16, 2024
d615b48
Merge branch 'main' into mujoco-v5
LucasAlegre Oct 16, 2024
7931b98
Merge branch 'mujoco-v5' of https://github.com/Farama-Foundation/MO-G…
LucasAlegre Oct 16, 2024
f4261ba
Do not treat humanoid contact force as separate objective
LucasAlegre Oct 25, 2024
5188672
Do not treat reward conctact as separate objective in ant-v5
LucasAlegre Oct 25, 2024
311f378
Env ids and variable names refactor
LucasAlegre Oct 27, 2024
b18a31c
hotfix walker2d energy cost
LucasAlegre Oct 27, 2024
d7135c6
Get cost from info dict hopper-v5
LucasAlegre Oct 27, 2024
76e7dc9
Refactor _cost_objective variable
LucasAlegre Oct 27, 2024
b653155
Update HalfCheetah cost to be consistent with other envs
LucasAlegre Oct 28, 2024
c127ae2
Update ant-v5 docs
LucasAlegre Oct 28, 2024
88d0de5
Update docs about recoving original envs
LucasAlegre Oct 28, 2024
24f66b1
Document 2obj version of ant and hopper
LucasAlegre Oct 28, 2024
a515cf1
Fix typo
LucasAlegre Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 66 additions & 9 deletions mo_gymnasium/envs/mujoco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,113 @@

register(
id="mo-halfcheetah-v4",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah:MOHalfCheehtahEnv",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah_v4:MOHalfCheehtahEnv",
max_episode_steps=1000,
)

register(
id="mo-halfcheetah-v5",
entry_point="mo_gymnasium.envs.mujoco.half_cheetah_v5:MOHalfCheehtahEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-v4",
entry_point="mo_gymnasium.envs.mujoco.hopper:MOHopperEnv",
entry_point="mo_gymnasium.envs.mujoco.hopper_v4:MOHopperEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-v5",
entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
max_episode_steps=1000,
)

register(
id="mo-hopper-2d-v4",
entry_point="mo_gymnasium.envs.mujoco.hopper:MOHopperEnv",
entry_point="mo_gymnasium.envs.mujoco.hopper_v4:MOHopperEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-hopper-2obj-v5",
entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-walker2d-v4",
entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
entry_point="mo_gymnasium.envs.mujoco.walker2d_v4:MOWalker2dEnv",
max_episode_steps=1000,
)

register(
id="mo-walker2d-v5",
entry_point="mo_gymnasium.envs.mujoco.walker2d_v5:MOWalker2dEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-2d-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)


register(
id="mo-ant-v5",
entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-2obj-v5",
LucasAlegre marked this conversation as resolved.
Show resolved Hide resolved
entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-swimmer-v4",
entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
entry_point="mo_gymnasium.envs.mujoco.swimmer_v4:MOSwimmerEnv",
max_episode_steps=1000,
)

register(
id="mo-swimmer-v5",
entry_point="mo_gymnasium.envs.mujoco.swimmer_v5:MOSwimmerEnv",
max_episode_steps=1000,
)

register(
id="mo-humanoid-v4",
entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
entry_point="mo_gymnasium.envs.mujoco.humanoid_v4:MOHumanoidEnv",
max_episode_steps=1000,
)

register(
id="mo-humanoid-v5",
entry_point="mo_gymnasium.envs.mujoco.humanoid_v5:MOHumanoidEnv",
max_episode_steps=1000,
)

register(
id="mo-reacher-v4",
entry_point="mo_gymnasium.envs.mujoco.reacher:MOReacherEnv",
entry_point="mo_gymnasium.envs.mujoco.reacher_v4:MOReacherEnv",
max_episode_steps=50,
)

register(
id="mo-reacher-v5",
entry_point="mo_gymnasium.envs.mujoco.reacher_v5:MOReacherEnv",
max_episode_steps=50,
)
File renamed without changes.
56 changes: 56 additions & 0 deletions mo_gymnasium/envs/mujoco/ant_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import numpy as np
from gymnasium.envs.mujoco.ant_v5 import AntEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOAntEnv(AntEnv, EzPickle):
"""
## Description
Multi-objective version of the AntEnv environment.

See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information.

The original Gymnasium's 'Ant-v5' is recovered by the following linear scalarization:

env = mo_gym.make('mo-ant-v4', cost_objective=False)
LinearReward(env, weight=np.array([1.0, 0.0]))

## Reward Space
The reward is 2- or 3-dimensional:
- 0: x-velocity
- 1: y-velocity
- 2: Control cost of the action
If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
A healthy reward is added to all objectives.
LucasAlegre marked this conversation as resolved.
Show resolved Hide resolved

## Version History
- v5: Now includes contact forces in the reward and observation.
See https://gymnasium.farama.org/environments/mujoco/ant/#version-history
"""

def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self.cost_objetive = cost_objective
LucasAlegre marked this conversation as resolved.
Show resolved Hide resolved
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
y_velocity = info["y_velocity"]
cost = info["reward_ctrl"]
healthy_reward = info["reward_survive"]

if self.cost_objetive:
cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv
vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32)
vec_reward += cost

vec_reward += healthy_reward
vec_reward += info["reward_contact"] # Do not treat contact forces as a separate objective

return observation, vec_reward, terminated, truncated, info
29 changes: 29 additions & 0 deletions mo_gymnasium/envs/mujoco/half_cheetah_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
from gymnasium.envs.mujoco.half_cheetah_v5 import HalfCheetahEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHalfCheehtahEnv(HalfCheetahEnv, EzPickle):
"""
## Description
Multi-objective version of the HalfCheetahEnv environment.

See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) for more information.
LucasAlegre marked this conversation as resolved.
Show resolved Hide resolved

## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward
- 1: Control cost of the action
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
vec_reward = np.array([info["reward_forward"], info["reward_ctrl"]], dtype=np.float32)
LucasAlegre marked this conversation as resolved.
Show resolved Hide resolved
return observation, vec_reward, terminated, truncated, info
42 changes: 42 additions & 0 deletions mo_gymnasium/envs/mujoco/hopper_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy as np
from gymnasium.envs.mujoco.hopper_v5 import HopperEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHopperEnv(HopperEnv, EzPickle):
"""
## Description
Multi-objective version of the HopperEnv environment.

See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/hopper/) for more information.

## Reward Space
The reward is 3-dimensional:
- 0: Reward for going forward on the x-axis
- 1: Reward for jumping high on the z-axis
- 2: Control cost of the action
If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
"""

def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self.cost_objetive = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
height = 10 * info["z_distance_from_origin"]
energy_cost = np.sum(np.square(action))
LucasAlegre marked this conversation as resolved.
Show resolved Hide resolved
if self.cost_objetive:
vec_reward = np.array([x_velocity, height, -energy_cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, height], dtype=np.float32)
vec_reward -= self._ctrl_cost_weight * energy_cost

vec_reward += info["reward_survive"]

return observation, vec_reward, terminated, truncated, info
39 changes: 39 additions & 0 deletions mo_gymnasium/envs/mujoco/humanoid_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np
from gymnasium.envs.mujoco.humanoid_v5 import HumanoidEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHumanoidEnv(HumanoidEnv, EzPickle):
"""
## Description
Multi-objective version of the HumanoidEnv environment.

See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information.

## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward (x-velocity)
- 1: Control cost of the action

## Version History:
- v5: Now includes contact forces. See: https://gymnasium.farama.org/environments/mujoco/humanoid/#version-history
The scales of the control cost has changed from v4.
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment
vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls
vec_reward += info["reward_contact"] # Do not treat contact forces as a separate objective

return observation, vec_reward, terminated, truncated, info
101 changes: 101 additions & 0 deletions mo_gymnasium/envs/mujoco/reacher_v5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from os import path

import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.envs.mujoco.reacher_v5 import ReacherEnv
from gymnasium.spaces import Box, Discrete


DEFAULT_CAMERA_CONFIG = {"trackbodyid": 0}


class MOReacherEnv(ReacherEnv):
"""
## Description
Multi-objective version of the [`Reacher-v4` environment](https://gymnasium.farama.org/environments/mujoco/reacher/).

## Observation Space
The observation is 6-dimensional and contains:
- sin and cos of the angles of the central and elbow joints
- angular velocity of the central and elbow joints

## Action Space
The action space is discrete and contains the 3^2=9 possible actions based on applying positive (+1), negative (-1) or zero (0) torque to each of the two joints.

## Reward Space
The reward is 4-dimensional and is defined based on the distance of the tip of the arm and the four target locations.
For each i={1,2,3,4} it is computed as:
```math
r_i = 1 - 4 * || finger_tip_coord - target_i ||^2
```

## Version History:
See https://gymnasium.farama.org/environments/mujoco/reacher/#version-history
"""

def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
self.observation_space = Box(low=-np.inf, high=np.inf, shape=(6,), dtype=np.float64)
MujocoEnv.__init__(
self,
path.join(path.dirname(__file__), "assets", "mo_reacher.xml"),
2,
observation_space=self.observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
actions = [-1.0, 0.0, 1.0]
self.action_dict = dict()
for a1 in actions:
for a2 in actions:
self.action_dict[len(self.action_dict)] = (a1, a2)
self.action_space = Discrete(9)
# Target goals: x1, y1, x2, y2, ... x4, y4
self.goal = np.array([0.14, 0.0, -0.14, 0.0, 0.0, 0.14, 0.0, -0.14])
self.reward_space = Box(low=-1.0, high=1.0, shape=(4,))
self.reward_dim = 4

def step(self, a):
real_action = self.action_dict[int(a)]
vec_reward = np.array(
[
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target1")[:2]),
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target2")[:2]),
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target3")[:2]),
1 - 4 * np.linalg.norm(self.get_body_com("fingertip")[:2] - self.get_body_com("target4")[:2]),
],
dtype=np.float32,
)

self._step_mujoco_simulation(real_action, self.frame_skip)
if self.render_mode == "human":
self.render()

ob = self._get_obs()
return (
ob,
vec_reward,
False,
False,
{},
)

def reset_model(self):
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
qpos[:2] = np.array([0, 3.1415 / 2]) # init position
qpos[-len(self.goal) :] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv)
qvel[-len(self.goal) :] = 0
self.set_state(qpos, qvel)
return self._get_obs()

def _get_obs(self):
theta = self.data.qpos.flatten()[:2]
return np.concatenate(
[
np.cos(theta),
np.sin(theta),
self.data.qvel.flatten()[:2] * 0.1,
]
)
Loading
Loading