Merge pull request #34 from Farama-Foundation/feature/pickle-all-envs

Add EzPickle to all envs
Farama-Foundation · Feb 10, 2023 · 6dc669c · 6dc669c
2 parents 9070a74 + 6d1b876
commit 6dc669c
Show file tree

Hide file tree

Showing 18 changed files with 97 additions and 23 deletions.
diff --git a/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py b/mo_gymnasium/envs/breakable_bottles/breakable_bottles.py
@@ -3,9 +3,10 @@
 import numpy as np
 from gymnasium import Env
 from gymnasium.spaces import Box, Dict, Discrete, MultiBinary
+from gymnasium.utils import EzPickle
 
 
-class BreakableBottles(Env):
+class BreakableBottles(Env, EzPickle):
     """
     ## Description
     This environment implements the problems UnbreakableBottles and BreakableBottles defined in Section 4.1.2 of the paper
@@ -64,6 +65,8 @@ def __init__(
         bottle_reward=25,
         unbreakable_bottles=False,
     ):
+        EzPickle.__init__(self, render_mode, size, prob_drop, time_penalty, bottle_reward, unbreakable_bottles)
+
         self.render_mode = render_mode
 
         # settings

diff --git a/mo_gymnasium/envs/continuous_mountain_car/continuous_mountain_car.py b/mo_gymnasium/envs/continuous_mountain_car/continuous_mountain_car.py
@@ -6,9 +6,10 @@
 from gymnasium.envs.classic_control.continuous_mountain_car import (
     Continuous_MountainCarEnv,
 )
+from gymnasium.utils import EzPickle
 
 
-class MOContinuousMountainCar(Continuous_MountainCarEnv):
+class MOContinuousMountainCar(Continuous_MountainCarEnv, EzPickle):
     """
     A continuous version of the MountainCar environment, where the goal is to reach the top of the mountain.
 
@@ -22,6 +23,7 @@ class MOContinuousMountainCar(Continuous_MountainCarEnv):
 
     def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
         super().__init__(render_mode, goal_velocity)
+        EzPickle.__init__(self, render_mode, goal_velocity)
 
         self.reward_space = spaces.Box(low=np.array([-1.0, -1.0]), high=np.array([0.0, 0.0]), shape=(2,), dtype=np.float32)
 

diff --git a/mo_gymnasium/envs/deep_sea_treasure/deep_sea_treasure.py b/mo_gymnasium/envs/deep_sea_treasure/deep_sea_treasure.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pygame
 from gymnasium.spaces import Box, Discrete
+from gymnasium.utils import EzPickle
 
 
 # As in Yang et al. (2019):
@@ -42,7 +43,7 @@
 )
 
 
-class DeepSeaTreasure(gym.Env):
+class DeepSeaTreasure(gym.Env, EzPickle):
     """
     ## Description
     The Deep Sea Treasure environment is classic MORL problem in which the agent controls a submarine in a 2D grid world.
@@ -79,6 +80,8 @@ class DeepSeaTreasure(gym.Env):
     metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
 
     def __init__(self, render_mode: Optional[str] = None, dst_map=DEFAULT_MAP, float_state=False):
+        EzPickle.__init__(self, render_mode, dst_map, float_state)
+
         self.render_mode = render_mode
         self.size = 11
         self.window_size = 512

diff --git a/mo_gymnasium/envs/fishwood/fishwood.py b/mo_gymnasium/envs/fishwood/fishwood.py
@@ -3,9 +3,10 @@
 import gymnasium as gym
 import numpy as np
 from gymnasium import spaces
+from gymnasium.utils import EzPickle
 
 
-class FishWood(gym.Env):
+class FishWood(gym.Env, EzPickle):
     """
     ## Description
     The FishWood environment is a simple MORL problem in which the agent controls a fisherman which can either fish or go collect wood.
@@ -46,6 +47,8 @@ class FishWood(gym.Env):
     MAX_TS = 200
 
     def __init__(self, render_mode: Optional[str] = None, fishproba=0.1, woodproba=0.9):
+        EzPickle.__init__(self, render_mode, fishproba, woodproba)
+
         self.render_mode = render_mode
         self._fishproba = fishproba
         self._woodproba = woodproba

diff --git a/mo_gymnasium/envs/four_room/four_room.py b/mo_gymnasium/envs/four_room/four_room.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pygame
 from gymnasium.spaces import Box, Discrete
+from gymnasium.utils import EzPickle
 
 
 MAZE = np.array(
@@ -30,7 +31,7 @@
 BLACK = (0, 0, 0)
 
 
-class FourRoom(gym.Env):
+class FourRoom(gym.Env, EzPickle):
     """
     ## Description
     A discretized version of the gridworld environment introduced in [1]. Here, an agent learns to
@@ -85,6 +86,8 @@ def __init__(self, render_mode: Optional[str] = None, maze=MAZE):
                 0, 1, .... 9 indicates the type of shape to be placed in the corresponding cell
                 entries containing other characters are treated as regular empty cells
         """
+        EzPickle.__init__(self, render_mode, maze)
+
         self.render_mode = render_mode
         self.window_size = 512
         self.window = None

diff --git a/mo_gymnasium/envs/fruit_tree/fruit_tree.py b/mo_gymnasium/envs/fruit_tree/fruit_tree.py
@@ -2,6 +2,7 @@
 import gymnasium as gym
 import numpy as np
 from gymnasium import spaces
+from gymnasium.utils import EzPickle
 
 
 FRUITS = {
@@ -238,7 +239,7 @@
 }
 
 
-class FruitTreeEnv(gym.Env):
+class FruitTreeEnv(gym.Env, EzPickle):
     """
     ## Description
 
@@ -263,6 +264,8 @@ class FruitTreeEnv(gym.Env):
 
     def __init__(self, depth=6):
         assert depth in [5, 6, 7], "Depth must be 5, 6 or 7."
+        EzPickle.__init__(self, depth)
+
         self.reward_dim = 6
         self.tree_depth = depth  # zero based depth
         branches = np.zeros((int(2**self.tree_depth - 1), self.reward_dim))

diff --git a/mo_gymnasium/envs/highway/highway.py b/mo_gymnasium/envs/highway/highway.py
@@ -12,10 +12,11 @@
     Text,
     Tuple,
 )
+from gymnasium.utils import EzPickle
 from highway_env.envs import HighwayEnv, HighwayEnvFast
 
 
-class MOHighwayEnv(HighwayEnv):
+class MOHighwayEnv(HighwayEnv, EzPickle):
     """
     ## Description
     Multi-objective version of the HighwayEnv environment.
@@ -30,6 +31,8 @@ class MOHighwayEnv(HighwayEnv):
     """
 
     def __init__(self, *args, **kwargs):
+        EzPickle.__init__(self, *args, **kwargs)
+
         super().__init__(*args, **kwargs)
         self.reward_space = Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32)
         self.observation_space = _convert_space(self.observation_space)

diff --git a/mo_gymnasium/envs/lunar_lander/lunar_lander.py b/mo_gymnasium/envs/lunar_lander/lunar_lander.py
@@ -16,7 +16,7 @@
 )
 
 
-class MOLunarLander(LunarLander):
+class MOLunarLander(LunarLander):  # no need for EzPickle, it's already in LunarLander
     """
     ## Description
     Multi-objective version of the LunarLander environment.

diff --git a/mo_gymnasium/envs/mario/mario.py b/mo_gymnasium/envs/mario/mario.py
@@ -4,7 +4,7 @@
 import numpy as np
 from gym_super_mario_bros import SuperMarioBrosEnv
 from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
-from gymnasium.utils import seeding
+from gymnasium.utils import EzPickle, seeding
 
 # from stable_baselines3.common.atari_wrappers import MaxAndSkipEnv
 from gymnasium.wrappers import GrayScaleObservation, ResizeObservation
@@ -16,7 +16,7 @@
 from mo_gymnasium.envs.mario.joypad_space import JoypadSpace
 
 
-class MOSuperMarioBros(SuperMarioBrosEnv):
+class MOSuperMarioBros(SuperMarioBrosEnv, EzPickle):
     """
     ## Description
     Multi-objective version of the SuperMarioBro environment.
@@ -45,6 +45,7 @@ def __init__(
         objectives=["x_pos", "time", "death", "coin", "enemy"],
         render_mode: Optional[str] = None,
     ):
+        EzPickle.__init__(self, rom_mode, lost_levels, target, objectives, render_mode)
         super().__init__(rom_mode, lost_levels, target)
 
         self.render_mode = render_mode

diff --git a/mo_gymnasium/envs/minecart/minecart.py b/mo_gymnasium/envs/minecart/minecart.py
@@ -11,6 +11,7 @@
 import pygame
 import scipy.stats
 from gymnasium.spaces import Box, Discrete
+from gymnasium.utils import EzPickle
 from scipy.spatial import ConvexHull
 
 
@@ -86,7 +87,7 @@
 MINE_IMG = str(Path(__file__).parent.absolute()) + "/assets/mine.png"
 
 
-class Minecart(gym.Env):
+class Minecart(gym.Env, EzPickle):
     """
     ## Description
     Agent must collect two types of ores and minimize fuel consumption.
@@ -133,6 +134,8 @@ def __init__(
         image_observation=False,
         config=str(Path(__file__).parent.absolute()) + "/mine_config.json",
     ):
+        EzPickle.__init__(self, render_mode, image_observation, config)
+
         self.render_mode = render_mode
         self.screen = None
         self.last_render_mode_used = None

diff --git a/mo_gymnasium/envs/mountain_car/mountain_car.py b/mo_gymnasium/envs/mountain_car/mountain_car.py
@@ -4,9 +4,10 @@
 import numpy as np
 from gymnasium import spaces
 from gymnasium.envs.classic_control.mountain_car import MountainCarEnv
+from gymnasium.utils import EzPickle
 
 
-class MOMountainCar(MountainCarEnv):
+class MOMountainCar(MountainCarEnv, EzPickle):
     """
     A multi-objective version of the MountainCar environment, where the goal is to reach the top of the mountain.
 
@@ -21,6 +22,7 @@ class MOMountainCar(MountainCarEnv):
 
     def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
         super().__init__(render_mode, goal_velocity)
+        EzPickle.__init__(self, render_mode, goal_velocity)
 
         self.reward_space = spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32)
 

diff --git a/mo_gymnasium/envs/mujoco/half_cheetah.py b/mo_gymnasium/envs/mujoco/half_cheetah.py
@@ -1,9 +1,10 @@
 import numpy as np
 from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
 from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
 
 
-class MOHalfCheehtahEnv(HalfCheetahEnv):
+class MOHalfCheehtahEnv(HalfCheetahEnv, EzPickle):
     """
     ## Description
     Multi-objective version of the HalfCheetahEnv environment.
@@ -18,6 +19,7 @@ class MOHalfCheehtahEnv(HalfCheetahEnv):
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        EzPickle.__init__(self, **kwargs)
         self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
 
     def step(self, action):

diff --git a/mo_gymnasium/envs/mujoco/hopper.py b/mo_gymnasium/envs/mujoco/hopper.py
@@ -1,9 +1,10 @@
 import numpy as np
 from gymnasium.envs.mujoco.hopper_v4 import HopperEnv
 from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
 
 
-class MOHopperEnv(HopperEnv):
+class MOHopperEnv(HopperEnv, EzPickle):
     """
     ## Description
     Multi-objective version of the HopperEnv environment.
@@ -20,6 +21,7 @@ class MOHopperEnv(HopperEnv):
 
     def __init__(self, cost_objective=True, **kwargs):
         super().__init__(**kwargs)
+        EzPickle.__init__(self, cost_objective, **kwargs)
         self.cost_objetive = cost_objective
         self.rew_dim = 3 if cost_objective else 2
         self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.rew_dim,))

diff --git a/mo_gymnasium/envs/reacher/reacher.py b/mo_gymnasium/envs/reacher/reacher.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from gymnasium import spaces
+from gymnasium.utils import EzPickle
 from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
 from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
 from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
@@ -10,7 +11,7 @@
 target_positions = list(map(lambda l: np.array(l), [(0.14, 0.0), (-0.14, 0.0), (0.0, 0.14), (0.0, -0.14)]))
 
 
-class ReacherBulletEnv(BaseBulletEnv):
+class ReacherBulletEnv(BaseBulletEnv, EzPickle):
 
     metadata = {"render_modes": ["human", "rgb_array"]}
 
@@ -20,6 +21,7 @@ def __init__(
         target=(0.14, 0.0),
         fixed_initial_state: Optional[tuple] = (3.14, 0),
     ):
+        EzPickle.__init__(self, render_mode, target, fixed_initial_state)
         self.robot = ReacherRobot(target, fixed_initial_state=fixed_initial_state)
         self.render_mode = render_mode
         BaseBulletEnv.__init__(self, self.robot, render=render_mode == "human")

diff --git a/mo_gymnasium/envs/resource_gathering/resource_gathering.py b/mo_gymnasium/envs/resource_gathering/resource_gathering.py
@@ -5,9 +5,10 @@
 import numpy as np
 import pygame
 from gymnasium.spaces import Box, Discrete
+from gymnasium.utils import EzPickle
 
 
-class ResourceGathering(gym.Env):
+class ResourceGathering(gym.Env, EzPickle):
     """
     ## Description
     From "Barrett, Leon & Narayanan, Srini. (2008). Learning all optimal policies with multiple criteria.
@@ -43,6 +44,8 @@ class ResourceGathering(gym.Env):
     metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
 
     def __init__(self, render_mode: Optional[str] = None):
+        EzPickle.__init__(self, render_mode)
+
         self.render_mode = render_mode
         self.size = 5
         self.window_size = 512

diff --git a/mo_gymnasium/envs/water_reservoir/dam_env.py b/mo_gymnasium/envs/water_reservoir/dam_env.py
@@ -3,9 +3,10 @@
 import gymnasium as gym
 import numpy as np
 from gymnasium.spaces.box import Box
+from gymnasium.utils import EzPickle
 
 
-class DamEnv(gym.Env):
+class DamEnv(gym.Env, EzPickle):
     """
     ## Description
     A Water reservoir environment.
@@ -73,6 +74,8 @@ def __init__(
         nO=2,
         penalize: bool = False,
     ):
+        EzPickle.__init__(self, render_mode, time_limit, nO, penalize)
+
         self.observation_space = Box(low=0.0, high=np.inf, shape=(1,), dtype=np.float32)
         self.action_space = Box(low=0, high=np.inf, shape=(1,), dtype=np.float32)