Skip to content

Commit

Permalink
Compatibility with Gym>=0.26 (#10)
Browse files Browse the repository at this point in the history
* Update gym envs to gym 0.25

* Fix LinearReward wrapper

* Fix hopper

* Fix breakable bottles

* Fix deep sea treasure

* Fix four room

* Fix fruit tree

* Fix mario

* Fix minecart

* Fix mountaincar

* Fix reacher

* Fix resource gathering

* Adapt wrappers to new API

* Fix hypervolume

* Requires gym>=0.26

* New version

* Update test.yml

* Remove comment

Co-authored-by: Florian Felten <>
  • Loading branch information
LucasAlegre authored Sep 25, 2022
1 parent e9a9b88 commit 12f372c
Show file tree
Hide file tree
Showing 14 changed files with 154 additions and 110 deletions.
30 changes: 15 additions & 15 deletions mo_gym/breakable_bottles/breakable_bottles.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from os import terminal_size
from typing import Optional
from gym import Env
from gym.spaces import Dict, Discrete, MultiBinary, Box
import numpy as np


class BreakableBottles(Env):
metadata = {"render_modes": ["human", "rgb_array"]}
metadata = {"render_modes": ["human"]}

# actions
LEFT = 0
RIGHT = 1
PICKUP = 2

def __init__(self, size=5, prob_drop=0.1, time_penalty=-1, bottle_reward=25, unbreakable_bottles=False, seed=None):
def __init__(self, render_mode: Optional[str] = None, size=5, prob_drop=0.1, time_penalty=-1, bottle_reward=25, unbreakable_bottles=False):
self.render_mode = render_mode

# settings
self.prob_drop = prob_drop
self.time_penalty = time_penalty
Expand Down Expand Up @@ -101,18 +105,21 @@ def step(self, action):
reward[2] = self.potential(observation) - old_potential

info = {}
return observation, reward, terminal, info
if self.render_mode == "human":
self.render()
return observation, reward, terminal, False, info

def reset(self, seed=None, return_info=False, **kwargs):
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed)
self.np_random.seed(seed)
self.r_star = 0
self.location = self.size - 1
self.bottles_carrying = 0
self.bottles_delivered = 0
self.bottles_dropped = [0]*(self.size - 2)
state = self._get_obs()
return (state, {}) if return_info else state
if self.render_mode == "human":
self.render()
return state, {}

def get_obs_idx(self, obs):
multi_index = np.array([[obs["location"]],
Expand All @@ -127,22 +134,15 @@ def _get_obs(self):
"bottles_delivered": self.bottles_delivered,
"bottles_dropped": self.bottles_dropped.copy()}

def render(self, mode="human"):
if mode == 'rgb_array':
return np.array([self.state[:3], *self.state[3]]) # return RGB frame suitable for video
elif mode == 'human':
def render(self):
if self.render_mode == 'human':
print("-----")
print(f"Location: {self.location}\nCarrying {self.bottles_carrying} bottles.\nDelivered {self.bottles_delivered} so far.\nBottles have been dropped at tiles {'1' if self.bottles_dropped[0] > 0 else ''} {'2' if self.bottles_dropped[1] > 0 else ''} {'3' if self.bottles_dropped[2] > 0 else ''}")
print("-----")
else:
super(BreakableBottles, self).render(mode=mode) # just raise an exception

def close(self):
pass

def seed(self, seed=None):
self.seed = seed if not seed is None else np.random.randint(2**32)

def potential(self, obs):
if sum(obs["bottles_dropped"]) > 0:
return -1
Expand Down
26 changes: 15 additions & 11 deletions mo_gym/deep_sea_treasure/deep_sea_treasure.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from typing import Optional

import gym
import numpy as np
Expand Down Expand Up @@ -46,7 +47,8 @@ class DeepSeaTreasure(gym.Env):

metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

def __init__(self, dst_map=DEFAULT_MAP, float_state=False):
def __init__(self, render_mode: Optional[str] = None, dst_map=DEFAULT_MAP, float_state=False):
self.render_mode = render_mode
self.size = 11
self.window_size = 512
self.window = None
Expand Down Expand Up @@ -87,7 +89,7 @@ def is_valid_state(self, state):
return True
return False

def render(self, mode='human'):
def render(self):
# The size of a single grid square in pixels
pix_square_size = self.window_size / self.size
if self.window is None:
Expand All @@ -97,11 +99,11 @@ def render(self, mode='human'):
self.treasure_img = pygame.image.load(str(Path(__file__).parent.absolute()) + '/assets/treasure.png')
self.treasure_img = pygame.transform.scale(self.treasure_img, (pix_square_size, pix_square_size))

if self.window is None and mode == "human":
if self.window is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.window = pygame.display.set_mode((self.window_size, self.window_size))
if self.clock is None and mode == "human":
if self.clock is None and self.render_mode == "human":
self.clock = pygame.time.Clock()

self.font = pygame.font.SysFont(None, 30)
Expand Down Expand Up @@ -142,7 +144,7 @@ def render(self, mode='human'):
width=1,
)

if mode == "human":
if self.render_mode == "human":
# The following line copies our drawings from `canvas` to the visible window
self.window.blit(canvas, canvas.get_rect())
pygame.event.pump()
Expand All @@ -151,7 +153,7 @@ def render(self, mode='human'):
# We need to ensure that human-rendering occurs at the predefined framerate.
# The following line will automatically add a delay to keep the framerate stable.
self.clock.tick(self.metadata["render_fps"])
else: # rgb_array
elif self.render_mode == 'rgb_array':
return np.transpose(
np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
)
Expand All @@ -163,14 +165,15 @@ def get_state(self):
state = self.current_state.copy()
return state

def reset(self, seed=None, return_info=False, **kwargs):
def reset(self, seed=None, **kwargs):
super().reset(seed=seed)
self.np_random.seed(seed)

self.current_state = np.array([0, 0], dtype=np.int32)
self.step_count = 0.0
state = self.get_state()
return (state, {}) if return_info else state
if self.render_mode == "human":
self.render()
return state, {}

def step(self, action):
next_state = self.current_state + self.dir[action]
Expand All @@ -188,8 +191,9 @@ def step(self, action):
vec_reward = np.array([treasure_value, time_penalty], dtype=np.float32)

state = self.get_state()

return state, vec_reward, terminal, {}
if self.render_mode == "human":
self.render()
return state, vec_reward, terminal, False, {}

def close(self):
if self.window is not None:
Expand Down
37 changes: 22 additions & 15 deletions mo_gym/four_room/four_room.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import random
from typing import Optional

import gym
import numpy as np
Expand All @@ -24,6 +25,7 @@
GREEN = (0, 128, 0)
BLACK = (0, 0, 0)


class FourRoom(gym.Env):
"""
A discretized version of the gridworld environment introduced in [1]. Here, an agent learns to
Expand All @@ -41,7 +43,7 @@ class FourRoom(gym.Env):

metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

def __init__(self, maze=MAZE):
def __init__(self, render_mode: Optional[str] = None, maze=MAZE):
"""
Creates a new instance of the shapes environment.
Expand All @@ -56,6 +58,7 @@ def __init__(self, maze=MAZE):
0, 1, .... 9 indicates the type of shape to be placed in the corresponding cell
entries containing other characters are treated as regular empty cells
"""
self.render_mode = render_mode
self.window_size = 512
self.window = None
self.clock = None
Expand Down Expand Up @@ -88,12 +91,13 @@ def state_to_array(self, state):
s = [element for tupl in state for element in tupl]
return np.array(s, dtype=np.int32)

def reset(self, seed=None, return_info=False, **kwargs):
def reset(self, seed=None, **kwargs):
super().reset(seed=seed)
self.np_random.seed(seed)

self.state = (random.choice(self.initial), tuple(0 for _ in range(len(self.shape_ids))))
return (self.state_to_array(self.state), {}) if return_info else self.state_to_array(self.state)
if self.render_mode == 'human':
self.render()
return self.state_to_array(self.state), {}

def step(self, action):
old_state = self.state
Expand All @@ -111,40 +115,43 @@ def step(self, action):
else:
raise Exception('bad action {}'.format(action))

terminated = False

# out of bounds, cannot move
if col < 0 or col >= self.width or row < 0 or row >= self.height:
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), False, {}
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), terminated, False, {}

# into a blocked cell, cannot move
s1 = (row, col)
if s1 in self.occupied:
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), False, {}
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), terminated, False, {}

# can now move
self.state = (s1, collected)

# into a goal cell
if s1 == self.goal:
phi = np.ones(len(self.all_shapes), dtype=np.float32)
return self.state_to_array(self.state), phi, True, {}
terminated = True
return self.state_to_array(self.state), phi, terminated, False, {}

# into a shape cell
if s1 in self.shape_ids:
shape_id = self.shape_ids[s1]
if collected[shape_id] == 1:
# already collected this flag
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), False, {}
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), terminated, False, {}
else:
# collect the new flag
collected = list(collected)
collected[shape_id] = 1
collected = tuple(collected)
self.state = (s1, collected)
phi = self.features(old_state, action, self.state)
return self.state_to_array(self.state), phi, False, {}
return self.state_to_array(self.state), phi, terminated, False, {}

# into an empty cell
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), False, {}
return self.state_to_array(self.state), np.zeros(len(self.all_shapes), dtype=np.float32), terminated, False, {}

def features(self, state, action, next_state):
s1, _ = next_state
Expand All @@ -160,15 +167,15 @@ def features(self, state, action, next_state):
phi[nc] = np.ones(nc, dtype=np.float32)
return phi

def render(self, mode='human'):
def render(self):
# The size of a single grid square in pixels
pix_square_size = self.window_size / 13

if self.window is None and mode == "human":
if self.window is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.window = pygame.display.set_mode((self.window_size, self.window_size))
if self.clock is None and mode == "human":
if self.clock is None and self.render_mode == "human":
self.clock = pygame.time.Clock()

canvas = pygame.Surface((self.window_size, self.window_size))
Expand Down Expand Up @@ -232,7 +239,7 @@ def render(self, mode='human'):
width=1,
)

if mode == "human":
if self.render_mode == "human":
# The following line copies our drawings from `canvas` to the visible window
self.window.blit(canvas, canvas.get_rect())
pygame.event.pump()
Expand All @@ -241,7 +248,7 @@ def render(self, mode='human'):
# We need to ensure that human-rendering occurs at the predefined framerate.
# The following line will automatically add a delay to keep the framerate stable.
self.clock.tick(self.metadata["render_fps"])
else: # rgb_array
elif self.render_mode == 'rgb_array':
return np.transpose(
np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
)
Expand Down
7 changes: 3 additions & 4 deletions mo_gym/fruit_tree/fruit_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,12 @@ def get_ind(self, pos):
def get_tree_value(self, pos):
return self.tree[self.get_ind(pos)]

def reset(self, seed=None, return_info=False, **kwargs):
def reset(self, seed=None, **kwargs):
super().reset(seed=seed)
self.np_random.seed(seed)

self.current_state = np.array([0, 0], dtype=np.int32)
self.terminal = False
return (self.current_state.copy(), {}) if return_info else self.current_state.copy()
return self.current_state.copy(), {}

def step(self, action):
direction = {
Expand All @@ -288,4 +287,4 @@ def step(self, action):
if self.current_state[0] == self.tree_depth:
self.terminal = True

return self.current_state.copy(), reward, self.terminal, {}
return self.current_state.copy(), reward, self.terminal, False, {}
6 changes: 3 additions & 3 deletions mo_gym/mario/mario.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ def __init__(self, rom_mode='pixel', lost_levels=False, target=None, objectives=
self.single_stage = True
self.done_when_dead = True

def reset(self, seed=None, return_info=False, **kwargs):
def reset(self, seed=None, **kwargs):
self._np_random, seed = seeding.np_random(seed) # this is not used
self.coin = 0
self.x_pos = 0
self.time = 0
self.score = 0
self.stage_bonus = 0
self.lives = 2
return (super().reset(), {}) if return_info else super().reset()
return super().reset(), {}

def step(self, action):
obs, reward, done, info = super().step(action)
Expand Down Expand Up @@ -97,7 +97,7 @@ def step(self, action):

info['score'] = info['score'] + self.stage_bonus

return obs, mor, bool(done), info
return obs, mor, bool(done), False, info


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 12f372c

Please sign in to comment.