Merge pull request #232 from Lux-AI-Challenge/gymnasium

Gymnasium upgrade
Lux-AI-Challenge · Sep 20, 2023 · 555af09 · 555af09
2 parents c65a833 + 4159fd8
commit 555af09
Show file tree

Hide file tree

Showing 16 changed files with 55 additions and 45 deletions.
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -1,5 +1,9 @@
 # ChangeLog
 
+### v2.2.0
+
+Upgraded to gymnasium format
+
 ### v2.1.9
 
 Fix bug where setuptools was causing runtime errors

diff --git a/README.md b/README.md
@@ -35,19 +35,20 @@ If you use the Lux AI Season 2 environment in your work, please cite this reposi
 
 ## Getting Started
 
-You will need Python >=3.7, <3.11  installed on your system. Once installed, you can install the Lux AI season 2 environment and optionally the GPU version with
+You will need Python >=3.8, <3.11  installed on your system. Once installed, you can install the Lux AI season 2 environment and optionally the GPU version with
 
 ```
 pip install --upgrade luxai_s2
 pip install juxai-s2 # installs the GPU version, requires a compatible GPU
 ```
 
-If you have `gym` installation issues, we recommend running `pip install setuptools==59.8.0`. If you have issues installing `vec-noise`, make sure to read the error output, it's usually because you are missing some C/C++ build tools. If you use conda, we highly recommend creating an environment based on the [environment.yml file in this repo](https://github.com/Lux-AI-Challenge/Lux-Design-S2/blob/main/environment.yml). If you don't know how conda works, I highly recommend setting it up, see the [install instructions](https://conda.io/projects/conda/en/latest/user-guide/install/index.html#regular-installation).
 
-To create a conda environment and use it run
+If you don't know how conda works, I highly recommend setting it up, see the [install instructions](https://conda.io/projects/conda/en/latest/user-guide/install/index.html#regular-installation). You can then setup the environment as follows
+
 ```
-conda env create -f environment.yml
+conda create -n "luxai_s2" "python==3.9"
 conda activate luxai_s2
+pip install --upgrade luxai-s2
 ```
 
 

diff --git a/docs/advanced_specs.md b/docs/advanced_specs.md
@@ -12,7 +12,7 @@ There are two ways to create the LuxAI environment, of which the recommended way
 from luxai_s2 import LuxAI_S2
 custom_env_cfg = dict()
 env = LuxAI_S2(collect_stats=False, **custom_env_cfg)
-env.reset()
+obs, _ = env.reset()
 ```
 
 where `collect_stats=True` will collect aggregate stats for an episode stored in `env.state.stats` and `custom_env_cfg` can be a custom env configuration to override the default. The custom env configuration may only replace existing keys as defined in [config.py](https://github.com/Lux-AI-Challenge/Lux-Design-S2/blob/main/luxai_s2/luxai_s2/config.py).
@@ -23,7 +23,7 @@ The other way to create an environment is to do
 import luxai_s2
 custom_env_cfg = dict()
 env = gym.make("LuxAI_S2-v0", collect_stats=False, **custom_env_cfg)
-env.reset()
+obs, _ = env.reset()
 ```
 
 Upon creation, an empty `State` object is created and the default agent names given are `"player_0", "player_1"`.

diff --git a/kits/js/main.py b/kits/js/main.py
@@ -121,7 +121,7 @@ def agent(observation, configuration):
             env.env_steps = env.state.env_steps
             obs_inputs = [obs_inputs]
             for _ in range(FORWARD_SIM):
-                obs, _, _, _ = env.step(dict(player_0=dict(), player_1=dict()))
+                obs, _, _, _, _ = env.step(dict(player_0=dict(), player_1=dict()))
                 obs_inputs.append(to_json(obs[observation.player]))
         # except:
             # pass

diff --git a/kits/python/lux/forward_sim.py b/kits/python/lux/forward_sim.py
@@ -22,6 +22,6 @@ def forward_sim(full_obs, env_cfg, n=2):
         if len(env.agents) == 0:
             # can't step any further
             return [full_obs]
-        obs, _, _, _ = env.step(empty_actions)
+        obs, _, _, _, _ = env.step(empty_actions)
         forward_obs.append(obs[agent])
     return forward_obs
diff --git a/kits/rl/sb3/train.py b/kits/rl/sb3/train.py
@@ -6,12 +6,12 @@
 import copy
 import os.path as osp
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch as th
 import torch.nn as nn
-from gym import spaces
-from gym.wrappers import TimeLimit
+from gymnasium import spaces
+from gymnasium.wrappers import TimeLimit
 from luxai_s2.state import ObservationStateDict, StatsStateDict
 from luxai_s2.utils.heuristics.factory_placement import place_near_random_ice
 from luxai_s2.wrappers import SB3Wrapper
@@ -54,9 +54,11 @@ def step(self, action):
         # submit actions for just one agent to make it single-agent
         # and save single-agent versions of the data below
         action = {agent: action}
-        obs, _, done, info = self.env.step(action)
+        obs, _, termination, truncation, info = self.env.step(action)
+        done = dict()
+        for k in termination:
+            done[k] = termination[k] | truncation[k]
         obs = obs[agent]
-        done = done[agent]
 
         # we collect stats on teams here. These are useful stats that can be used to help generate reward functions
         stats: StatsStateDict = self.env.state.stats[agent]
@@ -87,12 +89,12 @@ def step(self, action):
             reward = ice_dug_this_step / 100 + water_produced_this_step
 
         self.prev_step_metrics = copy.deepcopy(metrics)
-        return obs, reward, done, info
+        return obs, reward, termination[agent], truncation[agent], info
 
     def reset(self, **kwargs):
-        obs = self.env.reset(**kwargs)["player_0"]
+        obs, reset_info = self.env.reset(**kwargs)["player_0"]
         self.prev_step_metrics = None
-        return obs
+        return obs, reset_info
 
 
 def parse_args():

diff --git a/kits/rl/sb3/wrappers/controllers.py b/kits/rl/sb3/wrappers/controllers.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import numpy.typing as npt
-from gym import spaces
+from gymnasium import spaces
 
 
 # Controller class copied here since you won't have access to the luxai_s2 package directly on the competition server

diff --git a/kits/rl/sb3/wrappers/obs_wrappers.py b/kits/rl/sb3/wrappers/obs_wrappers.py
@@ -1,9 +1,9 @@
 from typing import Any, Dict
 
-import gym
+import gymnasium as gym
 import numpy as np
 import numpy.typing as npt
-from gym import spaces
+from gymnasium import spaces
 
 
 class SimpleUnitObservationWrapper(gym.ObservationWrapper):

diff --git a/luxai_s2/luxai_runner/bot.py b/luxai_s2/luxai_runner/bot.py
@@ -44,8 +44,8 @@ def __init__(
             direct_import_python_bots=direct_import_python_bots,
         )
         # timing
-        self.remainingOverageTime = 60
-        self.time_per_step = 3
+        self.remainingOverageTime = 600
+        self.time_per_step = 9
 
         self.log = Logger(
             identifier=f"{self.agent}, {self.main_file_path}", verbosity=verbose

diff --git a/luxai_s2/luxai_runner/episode.py b/luxai_s2/luxai_runner/episode.py
@@ -6,7 +6,7 @@
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional
 
-import gym
+import gymnasium as gym
 import numpy as np
 from luxai_runner.bot import Bot
 from luxai_runner.logger import Logger
@@ -105,7 +105,7 @@ async def run(self):
 
         metadata = dict()
 
-        obs = self.env.reset(seed=self.seed)
+        obs, _ = self.env.reset(seed=self.seed)
         env_cfg = self.env.state.env_cfg
         state_obs = self.env.state.get_compressed_obs()
         obs = to_json(state_obs)
@@ -165,7 +165,10 @@ async def run(self):
                         else:
                             print(f"{agent_id} sent a invalid action {action}")
                     actions[agent_id] = None
-            new_state_obs, rewards, dones, infos = self.env.step(actions)
+            new_state_obs, rewards, terminations, truncations, infos = self.env.step(actions)
+            dones = dict()
+            for k in terminations:
+                dones[k] = terminations[k] | truncations[k]
             change_obs = self.env.state.get_change_obs(state_obs)
             state_obs = new_state_obs["player_0"]
             obs = to_json(change_obs)

diff --git a/luxai_s2/luxai_s2/env.py b/luxai_s2/luxai_s2/env.py
@@ -207,7 +207,7 @@ def reset(self, seed=None):
                 self.state.stats[agent] = create_empty_stats()
         obs = self.state.get_obs()
         observations = {agent: obs for agent in self.agents}
-        return observations
+        return observations, {}
 
     def log_error(self, *m):
         if self.env_cfg.verbose > 0:
@@ -762,7 +762,8 @@ def step(
         Dict[str, ObservationStateDict],
         Dict[str, float],
         Dict[str, bool],
-        Dict[str, Any],
+        Dict[str, bool],
+        Dict[str, dict],
     ]:
         """
         step(action) takes in an action for each agent and should return the
@@ -996,8 +997,8 @@ def step(
         env_done = (
             env_done or failed_agents["player_0"] or failed_agents["player_1"]
         )  # env is done if any agent fails.
-        dones = {agent: env_done or failed_agents[agent] for agent in self.agents}
-
+        terminations = {agent: env_done or failed_agents[agent] for agent in self.agents}
+        truncations = {agent: False or failed_agents[agent] for agent in self.agents}
         # generate observations
         obs = self.state.get_obs()
         observations = {}
@@ -1010,7 +1011,7 @@ def step(
         if env_done:
             self.agents = []
 
-        return observations, rewards, dones, infos
+        return observations, rewards, terminations, truncations, infos
 
     ### Game Logic ###
     def add_unit(self, team: Team, unit_type, pos: np.ndarray):
@@ -1110,7 +1111,7 @@ def raw_env() -> LuxAI_S2:
     return env
 
 
-import gym
+import gymnasium as gym
 
 gym.register(
     id="LuxAI_S2-v0",

diff --git a/luxai_s2/luxai_s2/spaces/act_space.py b/luxai_s2/luxai_s2/spaces/act_space.py
@@ -1,9 +1,9 @@
 import random
 from typing import Any, Dict, List
 
-import gym
+import gymnasium as gym
 import numpy as np
-from gym import spaces
+from gymnasium import spaces
 
 from luxai_s2.config import EnvConfig
 from luxai_s2.factory import Factory

diff --git a/luxai_s2/luxai_s2/spaces/obs_space.py b/luxai_s2/luxai_s2/spaces/obs_space.py
@@ -2,7 +2,7 @@
 from typing import Any, List
 
 import numpy as np
-from gym import spaces
+from gymnasium import spaces
 
 from luxai_s2.config import EnvConfig
 from luxai_s2.spaces.act_space import ActionsQueue, FactionString

diff --git a/luxai_s2/luxai_s2/wrappers/controllers.py b/luxai_s2/luxai_s2/wrappers/controllers.py
@@ -1,7 +1,7 @@
 from typing import Dict, Any
 
 import numpy.typing as npt
-from gym import spaces
+from gymnasium import spaces
 
 class Controller:
     def __init__(self, action_space: spaces.Space) -> None:

diff --git a/luxai_s2/luxai_s2/wrappers/sb3.py b/luxai_s2/luxai_s2/wrappers/sb3.py
@@ -1,9 +1,9 @@
 from typing import Callable, Dict
 
-import gym
+import gymnasium as gym
 import numpy as np
 import numpy.typing as npt
-from gym import spaces
+from gymnasium import spaces
 
 import luxai_s2.env
 from luxai_s2.env import LuxAI_S2
@@ -92,21 +92,21 @@ def step(self, action: Dict[str, npt.NDArray]):
                 lux_action[agent] = dict()
 
         # lux_action is now a dict mapping agent name to an action
-        obs, reward, done, info = self.env.step(lux_action)
+        obs, reward, terminated, truncated, info = self.env.step(lux_action)
         self.prev_obs = obs
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
         # we upgrade the reset function here
 
         # we call the original reset function first
-        obs = self.env.reset(**kwargs)
+        obs, _ = self.env.reset(**kwargs)
 
         # then use the bid policy to go through the bidding phase
         action = dict()
         for agent in self.env.agents:
             action[agent] = self.bid_policy(agent, obs[agent])
-        obs, _, _, _ = self.env.step(action)
+        obs, _, _, _, _ = self.env.step(action)
 
         # while real_env_steps < 0, we are in the factory placement phase
         # so we use the factory placement policy to step through this
@@ -120,7 +120,7 @@ def reset(self, **kwargs):
                     action[agent] = self.factory_placement_policy(agent, obs[agent])
                 else:
                     action[agent] = dict()
-            obs, _, _, _ = self.env.step(action)
+            obs, _, _, _, _ = self.env.step(action)
         self.prev_obs = obs
 
-        return obs
+        return obs, {}
diff --git a/luxai_s2/setup.py b/luxai_s2/setup.py
@@ -24,16 +24,15 @@ def get_version(rel_path):
     packages=find_packages(exclude="kits"),
     entry_points={"console_scripts": ["luxai-s2 = luxai_runner.cli:main"]},
     version=get_version("luxai_s2/version.py"),
-    python_requires=">=3.7",
+    python_requires=">=3.8",
     install_requires=[
         "numpy",
         "pygame",
         "termcolor",
         "matplotlib",
         "pettingzoo",
         "vec_noise",
-        "gym==0.21.0",
+        "gymnasium",
         "scipy",
-        "importlib-metadata<5.0" # fixes bug where they deprecated an endpoint that openai gym uses
     ],
 )