Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Mujoco v5 environments #85

Merged
merged 39 commits into from
Oct 28, 2024
Merged
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
531aa68
Hopper and HalfCheetah v5
LucasAlegre Feb 18, 2024
cf081b0
Merge branch 'main' into mujoco-v5
LucasAlegre May 21, 2024
67c50f3
Fix wrapper imports
LucasAlegre May 22, 2024
6285bf4
Fix mo-reacher-v0 reset
LucasAlegre May 22, 2024
8096b2c
Bump LunarLander to v3
LucasAlegre May 22, 2024
a20e9e7
Mario subclass Env
LucasAlegre May 22, 2024
bf2dcc9
Skip highway tests
LucasAlegre May 22, 2024
efe8bb7
Migrate wrappers
ffelten May 23, 2024
b2f2b53
WIP
ffelten May 23, 2024
062849a
Rollback Vector env contstructor
ffelten May 23, 2024
7fbaf38
Tests are passing
ffelten May 23, 2024
7e9f5b8
Remove comments
ffelten May 23, 2024
f6914a4
Export wrappers
ffelten May 23, 2024
bbaab1e
Update to use Gymnasium v1.0.0a1
pseudo-rnd-thoughts May 28, 2024
f72773a
Better doc and tests for vector wrappers
ffelten Aug 7, 2024
f442ea4
Enhance wrappers doc and tests
ffelten Aug 7, 2024
9b9a3ea
Remove print
ffelten Aug 7, 2024
98a695e
Fix test
ffelten Aug 8, 2024
c974b3b
Merge branch 'main' into gymnasium-v5
Aug 9, 2024
7480c64
Remove pybullet mo-reacher
Aug 9, 2024
4e39d18
Require highway-env >= 1.9.1
Aug 12, 2024
a870455
Merge main
ffelten Aug 13, 2024
dbddf3a
test type
ffelten Aug 13, 2024
57870fe
Merge branch 'gymnasium-v5' into mujoco-v5
Aug 14, 2024
fbac985
Add Mujoco v5 environments
Aug 16, 2024
eab4592
pre-commit
Aug 16, 2024
d615b48
Merge branch 'main' into mujoco-v5
LucasAlegre Oct 16, 2024
7931b98
Merge branch 'mujoco-v5' of https://github.com/Farama-Foundation/MO-G…
LucasAlegre Oct 16, 2024
f4261ba
Do not treat humanoid contact force as separate objective
LucasAlegre Oct 25, 2024
5188672
Do not treat reward conctact as separate objective in ant-v5
LucasAlegre Oct 25, 2024
311f378
Env ids and variable names refactor
LucasAlegre Oct 27, 2024
b18a31c
hotfix walker2d energy cost
LucasAlegre Oct 27, 2024
d7135c6
Get cost from info dict hopper-v5
LucasAlegre Oct 27, 2024
76e7dc9
Refactor _cost_objective variable
LucasAlegre Oct 27, 2024
b653155
Update HalfCheetah cost to be consistent with other envs
LucasAlegre Oct 28, 2024
c127ae2
Update ant-v5 docs
LucasAlegre Oct 28, 2024
88d0de5
Update docs about recoving original envs
LucasAlegre Oct 28, 2024
24f66b1
Document 2obj version of ant and hopper
LucasAlegre Oct 28, 2024
a515cf1
Fix typo
LucasAlegre Oct 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Better doc and tests for vector wrappers
  • Loading branch information
ffelten committed Aug 7, 2024
commit f72773a096e16307736760da1129aafb86fac2ae
28 changes: 26 additions & 2 deletions mo_gymnasium/wrappers/vector/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Vector wrappers."""

import time
from copy import deepcopy
from typing import Any, Dict, Iterator, Tuple
@@ -13,7 +14,29 @@


class MOSyncVectorEnv(SyncVectorEnv):
"""Vectorized environment that serially runs multiple environments."""
"""Vectorized environment that serially runs multiple environments.

Example:
>>> import mo_gymnasium as mo_gym

>>> envs = mo_gym.wrappers.vector.MOSyncVectorEnv([
... lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(4)
... ])
>>> envs
MOSyncVectorEnv(num_envs=4)
>>> obs, infos = envs.reset()
>>> obs
array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=int32)
>>> _ = envs.action_space.seed(42)
>>> actions = envs.action_space.sample()
>>> obs, rewards, terminateds, truncateds, infos = envs.step([0, 1, 2, 3])
>>> obs
array([[0, 0], [1, 0], [0, 0], [0, 3]], dtype=int32)
>>> rewards
array([[0., -1.], [0.7, -1.], [0., -1.], [0., -1.]], dtype=float32)
>>> terminateds
array([False, True, False, False])
"""

def __init__(
self,
@@ -124,6 +147,7 @@ def __init__(
"""
gym.utils.RecordConstructorArgs.__init__(self, buffer_length=buffer_length, stats_key=stats_key)
RecordEpisodeStatistics.__init__(self, env, buffer_length=buffer_length, stats_key=stats_key)
self.disc_episode_returns = None
self.reward_dim = self.env.unwrapped.reward_space.shape[0]
self.rewards_shape = (self.num_envs, self.reward_dim)
self.gamma = gamma
@@ -156,12 +180,12 @@ def step(self, actions: ActType) -> Tuple[ObsType, ArrayType, ArrayType, ArrayTy
self.episode_lengths[self.prev_dones] = 0
self.episode_start_times[self.prev_dones] = time.perf_counter()
self.episode_returns[~self.prev_dones] += rewards[~self.prev_dones]
self.episode_lengths[~self.prev_dones] += 1

# CHANGE: The discounted returns are also computed here
self.disc_episode_returns += rewards * np.repeat(self.gamma**self.episode_lengths, self.reward_dim).reshape(
self.episode_returns.shape
)
self.episode_lengths[~self.prev_dones] += 1

self.prev_dones = dones = np.logical_or(terminations, truncations)
num_dones = np.sum(dones)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ classifiers = [
]
dependencies = [
"gymnasium >=1.0.0a1",
"numpy >=1.21.0",
"numpy >=1.21.0,<2.0",
"pygame >=2.1.0",
"scipy >=1.7.3",
"pymoo >=0.6.0",
63 changes: 48 additions & 15 deletions tests/test_vector_wrappers.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,53 @@
import gymnasium as gym
import numpy as np

import mo_gymnasium as mo_gym
from mo_gymnasium.wrappers.vector import MORecordEpisodeStatistics, MOSyncVectorEnv


def test_mo_sync_wrapper():
def make_env(env_id):
def thunk():
env = mo_gym.make(env_id)
return env

return thunk

num_envs = 3
envs = MOSyncVectorEnv([make_env("deep-sea-treasure-v0") for _ in range(num_envs)])
envs = MOSyncVectorEnv([lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(num_envs)])

envs.reset()
obs, rewards, terminateds, truncateds, infos = envs.step(envs.action_space.sample())
assert len(obs) == num_envs, "Number of observations do not match the number of envs"
assert len(rewards) == num_envs, "Number of rewards do not match the number of envs"
assert len(terminateds) == num_envs, "Number of terminateds do not match the number of envs"
assert len(truncateds) == num_envs, "Number of truncateds do not match the number of envs"
envs.close()


def test_mo_record_ep_statistic_vector_env():
def make_env(env_id):
def thunk():
env = mo_gym.make(env_id)
return env
def test_mo_sync_autoreset():
num_envs = 2
envs = MOSyncVectorEnv([lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(num_envs)])

obs, infos = envs.reset()
assert (obs[0] == [0, 0]).all()
assert (obs[1] == [0, 0]).all()
obs, rewards, terminateds, truncateds, infos = envs.step([0, 1])
assert (obs[0] == [0, 0]).all()
assert (obs[1] == [1, 0]).all()
# Use np assert almost equal to avoid floating point errors
np.testing.assert_almost_equal(rewards[0], np.array([0.0, -1.0], dtype=np.float32), decimal=2)
np.testing.assert_almost_equal(rewards[1], np.array([0.7, -1.0], dtype=np.float32), decimal=2)
assert not terminateds[0]
assert terminateds[1] # This one is done
assert not truncateds[0]
assert not truncateds[1]
obs, rewards, terminateds, truncateds, infos = envs.step([0, 1])
assert (obs[0] == [0, 0]).all()
assert (obs[1] == [0, 0]).all()
assert (rewards[0] == [0.0, -1.0]).all()
assert (rewards[1] == [0.0, 0.0]).all() # Reset step
assert not terminateds[0]
assert not terminateds[1] # Not done anymore
envs.close()

return thunk

def test_mo_record_ep_statistic_vector_env():
num_envs = 3
envs = MOSyncVectorEnv([make_env("deep-sea-treasure-v0") for _ in range(num_envs)])
envs = MOSyncVectorEnv([lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(num_envs)])
envs = MORecordEpisodeStatistics(envs)

envs.reset()
@@ -48,3 +63,21 @@ def thunk():
assert info["episode"]["dr"].shape == (num_envs, 2)
assert isinstance(info["episode"]["l"], np.ndarray)
assert isinstance(info["episode"]["t"], np.ndarray)
envs.close()


def test_gym_wrapper_and_vector():
# This tests the integration of gym-wrapped envs with MO-Gymnasium vectorized envs
num_envs = 2
envs = MOSyncVectorEnv(
[lambda: gym.wrappers.NormalizeObservation(mo_gym.make("deep-sea-treasure-v0")) for _ in range(num_envs)]
)

envs.reset()
for i in range(30):
obs, rewards, terminateds, truncateds, infos = envs.step(envs.action_space.sample())
assert len(obs) == num_envs, "Number of observations do not match the number of envs"
assert len(rewards) == num_envs, "Number of rewards do not match the number of envs"
assert len(terminateds) == num_envs, "Number of terminateds do not match the number of envs"
assert len(truncateds) == num_envs, "Number of truncateds do not match the number of envs"
envs.close()
Loading
Oops, something went wrong.