diff --git a/ChangeLog.md b/ChangeLog.md index b8c0d368..bebc67a4 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,9 @@ # ChangeLog +### v2.2.0 + +Upgraded to gymnasium format + ### v2.1.9 Fix bug where setuptools was causing runtime errors diff --git a/docs/advanced_specs.md b/docs/advanced_specs.md index ea8fcf88..f59b84b3 100644 --- a/docs/advanced_specs.md +++ b/docs/advanced_specs.md @@ -12,7 +12,7 @@ There are two ways to create the LuxAI environment, of which the recommended way from luxai_s2 import LuxAI_S2 custom_env_cfg = dict() env = LuxAI_S2(collect_stats=False, **custom_env_cfg) -env.reset() +obs, _ = env.reset() ``` where `collect_stats=True` will collect aggregate stats for an episode stored in `env.state.stats` and `custom_env_cfg` can be a custom env configuration to override the default. The custom env configuration may only replace existing keys as defined in [config.py](https://github.com/Lux-AI-Challenge/Lux-Design-S2/blob/main/luxai_s2/luxai_s2/config.py). @@ -23,7 +23,7 @@ The other way to create an environment is to do import luxai_s2 custom_env_cfg = dict() env = gym.make("LuxAI_S2-v0", collect_stats=False, **custom_env_cfg) -env.reset() +obs, _ = env.reset() ``` Upon creation, an empty `State` object is created and the default agent names given are `"player_0", "player_1"`. diff --git a/kits/js/main.py b/kits/js/main.py index 8f8f47cc..d25eaf88 100644 --- a/kits/js/main.py +++ b/kits/js/main.py @@ -121,7 +121,7 @@ def agent(observation, configuration): env.env_steps = env.state.env_steps obs_inputs = [obs_inputs] for _ in range(FORWARD_SIM): - obs, _, _, _ = env.step(dict(player_0=dict(), player_1=dict())) + obs, _, _, _, _ = env.step(dict(player_0=dict(), player_1=dict())) obs_inputs.append(to_json(obs[observation.player])) # except: # pass diff --git a/kits/python/lux/forward_sim.py b/kits/python/lux/forward_sim.py index d0545714..0e10da8c 100644 --- a/kits/python/lux/forward_sim.py +++ b/kits/python/lux/forward_sim.py @@ -22,6 +22,6 @@ def forward_sim(full_obs, env_cfg, n=2): if len(env.agents) == 0: # can't step any further return [full_obs] - obs, _, _, _ = env.step(empty_actions) + obs, _, _, _, _ = env.step(empty_actions) forward_obs.append(obs[agent]) return forward_obs \ No newline at end of file diff --git a/kits/rl/sb3/train.py b/kits/rl/sb3/train.py index 6a14f642..5a231c60 100644 --- a/kits/rl/sb3/train.py +++ b/kits/rl/sb3/train.py @@ -6,12 +6,12 @@ import copy import os.path as osp -import gym +import gymnasium as gym import numpy as np import torch as th import torch.nn as nn -from gym import spaces -from gym.wrappers import TimeLimit +from gymnasium import spaces +from gymnasium.wrappers import TimeLimit from luxai_s2.state import ObservationStateDict, StatsStateDict from luxai_s2.utils.heuristics.factory_placement import place_near_random_ice from luxai_s2.wrappers import SB3Wrapper @@ -54,9 +54,11 @@ def step(self, action): # submit actions for just one agent to make it single-agent # and save single-agent versions of the data below action = {agent: action} - obs, _, done, info = self.env.step(action) + obs, _, termination, truncation, info = self.env.step(action) + done = dict() + for k in termination: + done[k] = termination[k] | truncation[k] obs = obs[agent] - done = done[agent] # we collect stats on teams here. These are useful stats that can be used to help generate reward functions stats: StatsStateDict = self.env.state.stats[agent] @@ -87,12 +89,12 @@ def step(self, action): reward = ice_dug_this_step / 100 + water_produced_this_step self.prev_step_metrics = copy.deepcopy(metrics) - return obs, reward, done, info + return obs, reward, termination[agent], truncation[agent], info def reset(self, **kwargs): - obs = self.env.reset(**kwargs)["player_0"] + obs, reset_info = self.env.reset(**kwargs)["player_0"] self.prev_step_metrics = None - return obs + return obs, reset_info def parse_args(): diff --git a/kits/rl/sb3/wrappers/controllers.py b/kits/rl/sb3/wrappers/controllers.py index 899bfa1d..15a8e5fa 100644 --- a/kits/rl/sb3/wrappers/controllers.py +++ b/kits/rl/sb3/wrappers/controllers.py @@ -3,7 +3,7 @@ import numpy as np import numpy.typing as npt -from gym import spaces +from gymnasium import spaces # Controller class copied here since you won't have access to the luxai_s2 package directly on the competition server diff --git a/kits/rl/sb3/wrappers/obs_wrappers.py b/kits/rl/sb3/wrappers/obs_wrappers.py index f6c889da..0e1e5354 100644 --- a/kits/rl/sb3/wrappers/obs_wrappers.py +++ b/kits/rl/sb3/wrappers/obs_wrappers.py @@ -1,9 +1,9 @@ from typing import Any, Dict -import gym +import gymnasium as gym import numpy as np import numpy.typing as npt -from gym import spaces +from gymnasium import spaces class SimpleUnitObservationWrapper(gym.ObservationWrapper): diff --git a/luxai_s2/luxai_runner/bot.py b/luxai_s2/luxai_runner/bot.py index 81813f8b..2f020174 100644 --- a/luxai_s2/luxai_runner/bot.py +++ b/luxai_s2/luxai_runner/bot.py @@ -44,8 +44,8 @@ def __init__( direct_import_python_bots=direct_import_python_bots, ) # timing - self.remainingOverageTime = 60 - self.time_per_step = 3 + self.remainingOverageTime = 600 + self.time_per_step = 9 self.log = Logger( identifier=f"{self.agent}, {self.main_file_path}", verbosity=verbose diff --git a/luxai_s2/luxai_runner/episode.py b/luxai_s2/luxai_runner/episode.py index d12531a6..f7fc8bdd 100644 --- a/luxai_s2/luxai_runner/episode.py +++ b/luxai_s2/luxai_runner/episode.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional -import gym +import gymnasium as gym import numpy as np from luxai_runner.bot import Bot from luxai_runner.logger import Logger @@ -105,7 +105,7 @@ async def run(self): metadata = dict() - obs = self.env.reset(seed=self.seed) + obs, _ = self.env.reset(seed=self.seed) env_cfg = self.env.state.env_cfg state_obs = self.env.state.get_compressed_obs() obs = to_json(state_obs) @@ -165,7 +165,10 @@ async def run(self): else: print(f"{agent_id} sent a invalid action {action}") actions[agent_id] = None - new_state_obs, rewards, dones, infos = self.env.step(actions) + new_state_obs, rewards, terminations, truncations, infos = self.env.step(actions) + dones = dict() + for k in terminations: + dones[k] = terminations[k] | truncations[k] change_obs = self.env.state.get_change_obs(state_obs) state_obs = new_state_obs["player_0"] obs = to_json(change_obs) diff --git a/luxai_s2/luxai_s2/env.py b/luxai_s2/luxai_s2/env.py index cd96248e..6a261bcc 100644 --- a/luxai_s2/luxai_s2/env.py +++ b/luxai_s2/luxai_s2/env.py @@ -207,7 +207,7 @@ def reset(self, seed=None): self.state.stats[agent] = create_empty_stats() obs = self.state.get_obs() observations = {agent: obs for agent in self.agents} - return observations + return observations, {} def log_error(self, *m): if self.env_cfg.verbose > 0: @@ -762,7 +762,8 @@ def step( Dict[str, ObservationStateDict], Dict[str, float], Dict[str, bool], - Dict[str, Any], + Dict[str, bool], + Dict[str, dict], ]: """ step(action) takes in an action for each agent and should return the @@ -996,8 +997,8 @@ def step( env_done = ( env_done or failed_agents["player_0"] or failed_agents["player_1"] ) # env is done if any agent fails. - dones = {agent: env_done or failed_agents[agent] for agent in self.agents} - + terminations = {agent: env_done or failed_agents[agent] for agent in self.agents} + truncations = {agent: False or failed_agents[agent] for agent in self.agents} # generate observations obs = self.state.get_obs() observations = {} @@ -1010,7 +1011,7 @@ def step( if env_done: self.agents = [] - return observations, rewards, dones, infos + return observations, rewards, terminations, truncations, infos ### Game Logic ### def add_unit(self, team: Team, unit_type, pos: np.ndarray): @@ -1110,7 +1111,7 @@ def raw_env() -> LuxAI_S2: return env -import gym +import gymnasium as gym gym.register( id="LuxAI_S2-v0", diff --git a/luxai_s2/luxai_s2/spaces/act_space.py b/luxai_s2/luxai_s2/spaces/act_space.py index 97c8490b..fc4dc4ad 100644 --- a/luxai_s2/luxai_s2/spaces/act_space.py +++ b/luxai_s2/luxai_s2/spaces/act_space.py @@ -1,9 +1,9 @@ import random from typing import Any, Dict, List -import gym +import gymnasium as gym import numpy as np -from gym import spaces +from gymnasium import spaces from luxai_s2.config import EnvConfig from luxai_s2.factory import Factory diff --git a/luxai_s2/luxai_s2/spaces/obs_space.py b/luxai_s2/luxai_s2/spaces/obs_space.py index d3586a72..8bd1d3b6 100644 --- a/luxai_s2/luxai_s2/spaces/obs_space.py +++ b/luxai_s2/luxai_s2/spaces/obs_space.py @@ -2,7 +2,7 @@ from typing import Any, List import numpy as np -from gym import spaces +from gymnasium import spaces from luxai_s2.config import EnvConfig from luxai_s2.spaces.act_space import ActionsQueue, FactionString diff --git a/luxai_s2/luxai_s2/wrappers/controllers.py b/luxai_s2/luxai_s2/wrappers/controllers.py index d463bfca..f975a94e 100644 --- a/luxai_s2/luxai_s2/wrappers/controllers.py +++ b/luxai_s2/luxai_s2/wrappers/controllers.py @@ -1,7 +1,7 @@ from typing import Dict, Any import numpy.typing as npt -from gym import spaces +from gymnasium import spaces class Controller: def __init__(self, action_space: spaces.Space) -> None: diff --git a/luxai_s2/luxai_s2/wrappers/sb3.py b/luxai_s2/luxai_s2/wrappers/sb3.py index 9da6965f..af0399bb 100644 --- a/luxai_s2/luxai_s2/wrappers/sb3.py +++ b/luxai_s2/luxai_s2/wrappers/sb3.py @@ -1,9 +1,9 @@ from typing import Callable, Dict -import gym +import gymnasium as gym import numpy as np import numpy.typing as npt -from gym import spaces +from gymnasium import spaces import luxai_s2.env from luxai_s2.env import LuxAI_S2 @@ -92,21 +92,21 @@ def step(self, action: Dict[str, npt.NDArray]): lux_action[agent] = dict() # lux_action is now a dict mapping agent name to an action - obs, reward, done, info = self.env.step(lux_action) + obs, reward, terminated, truncated, info = self.env.step(lux_action) self.prev_obs = obs - return obs, reward, done, info + return obs, reward, terminated, truncated, info def reset(self, **kwargs): # we upgrade the reset function here # we call the original reset function first - obs = self.env.reset(**kwargs) + obs, _ = self.env.reset(**kwargs) # then use the bid policy to go through the bidding phase action = dict() for agent in self.env.agents: action[agent] = self.bid_policy(agent, obs[agent]) - obs, _, _, _ = self.env.step(action) + obs, _, _, _, _ = self.env.step(action) # while real_env_steps < 0, we are in the factory placement phase # so we use the factory placement policy to step through this @@ -120,7 +120,7 @@ def reset(self, **kwargs): action[agent] = self.factory_placement_policy(agent, obs[agent]) else: action[agent] = dict() - obs, _, _, _ = self.env.step(action) + obs, _, _, _, _ = self.env.step(action) self.prev_obs = obs - return obs + return obs, {}