Skip to content

Commit

Permalink
init work
Browse files Browse the repository at this point in the history
  • Loading branch information
StoneT2000 committed Aug 31, 2023
1 parent c65a833 commit c4d6e4e
Show file tree
Hide file tree
Showing 14 changed files with 48 additions and 38 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# ChangeLog

### v2.2.0

Upgraded to gymnasium format

### v2.1.9

Fix bug where setuptools was causing runtime errors
Expand Down
4 changes: 2 additions & 2 deletions docs/advanced_specs.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ There are two ways to create the LuxAI environment, of which the recommended way
from luxai_s2 import LuxAI_S2
custom_env_cfg = dict()
env = LuxAI_S2(collect_stats=False, **custom_env_cfg)
env.reset()
obs, _ = env.reset()
```

where `collect_stats=True` will collect aggregate stats for an episode stored in `env.state.stats` and `custom_env_cfg` can be a custom env configuration to override the default. The custom env configuration may only replace existing keys as defined in [config.py](https://github.com/Lux-AI-Challenge/Lux-Design-S2/blob/main/luxai_s2/luxai_s2/config.py).
Expand All @@ -23,7 +23,7 @@ The other way to create an environment is to do
import luxai_s2
custom_env_cfg = dict()
env = gym.make("LuxAI_S2-v0", collect_stats=False, **custom_env_cfg)
env.reset()
obs, _ = env.reset()
```

Upon creation, an empty `State` object is created and the default agent names given are `"player_0", "player_1"`.
Expand Down
2 changes: 1 addition & 1 deletion kits/js/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def agent(observation, configuration):
env.env_steps = env.state.env_steps
obs_inputs = [obs_inputs]
for _ in range(FORWARD_SIM):
obs, _, _, _ = env.step(dict(player_0=dict(), player_1=dict()))
obs, _, _, _, _ = env.step(dict(player_0=dict(), player_1=dict()))
obs_inputs.append(to_json(obs[observation.player]))
# except:
# pass
Expand Down
2 changes: 1 addition & 1 deletion kits/python/lux/forward_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ def forward_sim(full_obs, env_cfg, n=2):
if len(env.agents) == 0:
# can't step any further
return [full_obs]
obs, _, _, _ = env.step(empty_actions)
obs, _, _, _, _ = env.step(empty_actions)
forward_obs.append(obs[agent])
return forward_obs
18 changes: 10 additions & 8 deletions kits/rl/sb3/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
import copy
import os.path as osp

import gym
import gymnasium as gym
import numpy as np
import torch as th
import torch.nn as nn
from gym import spaces
from gym.wrappers import TimeLimit
from gymnasium import spaces
from gymnasium.wrappers import TimeLimit
from luxai_s2.state import ObservationStateDict, StatsStateDict
from luxai_s2.utils.heuristics.factory_placement import place_near_random_ice
from luxai_s2.wrappers import SB3Wrapper
Expand Down Expand Up @@ -54,9 +54,11 @@ def step(self, action):
# submit actions for just one agent to make it single-agent
# and save single-agent versions of the data below
action = {agent: action}
obs, _, done, info = self.env.step(action)
obs, _, termination, truncation, info = self.env.step(action)
done = dict()
for k in termination:
done[k] = termination[k] | truncation[k]
obs = obs[agent]
done = done[agent]

# we collect stats on teams here. These are useful stats that can be used to help generate reward functions
stats: StatsStateDict = self.env.state.stats[agent]
Expand Down Expand Up @@ -87,12 +89,12 @@ def step(self, action):
reward = ice_dug_this_step / 100 + water_produced_this_step

self.prev_step_metrics = copy.deepcopy(metrics)
return obs, reward, done, info
return obs, reward, termination[agent], truncation[agent], info

def reset(self, **kwargs):
obs = self.env.reset(**kwargs)["player_0"]
obs, reset_info = self.env.reset(**kwargs)["player_0"]
self.prev_step_metrics = None
return obs
return obs, reset_info


def parse_args():
Expand Down
2 changes: 1 addition & 1 deletion kits/rl/sb3/wrappers/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import numpy as np
import numpy.typing as npt
from gym import spaces
from gymnasium import spaces


# Controller class copied here since you won't have access to the luxai_s2 package directly on the competition server
Expand Down
4 changes: 2 additions & 2 deletions kits/rl/sb3/wrappers/obs_wrappers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Any, Dict

import gym
import gymnasium as gym
import numpy as np
import numpy.typing as npt
from gym import spaces
from gymnasium import spaces


class SimpleUnitObservationWrapper(gym.ObservationWrapper):
Expand Down
4 changes: 2 additions & 2 deletions luxai_s2/luxai_runner/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def __init__(
direct_import_python_bots=direct_import_python_bots,
)
# timing
self.remainingOverageTime = 60
self.time_per_step = 3
self.remainingOverageTime = 600
self.time_per_step = 9

self.log = Logger(
identifier=f"{self.agent}, {self.main_file_path}", verbosity=verbose
Expand Down
9 changes: 6 additions & 3 deletions luxai_s2/luxai_runner/episode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, Optional

import gym
import gymnasium as gym
import numpy as np
from luxai_runner.bot import Bot
from luxai_runner.logger import Logger
Expand Down Expand Up @@ -105,7 +105,7 @@ async def run(self):

metadata = dict()

obs = self.env.reset(seed=self.seed)
obs, _ = self.env.reset(seed=self.seed)
env_cfg = self.env.state.env_cfg
state_obs = self.env.state.get_compressed_obs()
obs = to_json(state_obs)
Expand Down Expand Up @@ -165,7 +165,10 @@ async def run(self):
else:
print(f"{agent_id} sent a invalid action {action}")
actions[agent_id] = None
new_state_obs, rewards, dones, infos = self.env.step(actions)
new_state_obs, rewards, terminations, truncations, infos = self.env.step(actions)
dones = dict()
for k in terminations:
dones[k] = terminations[k] | truncations[k]
change_obs = self.env.state.get_change_obs(state_obs)
state_obs = new_state_obs["player_0"]
obs = to_json(change_obs)
Expand Down
13 changes: 7 additions & 6 deletions luxai_s2/luxai_s2/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def reset(self, seed=None):
self.state.stats[agent] = create_empty_stats()
obs = self.state.get_obs()
observations = {agent: obs for agent in self.agents}
return observations
return observations, {}

def log_error(self, *m):
if self.env_cfg.verbose > 0:
Expand Down Expand Up @@ -762,7 +762,8 @@ def step(
Dict[str, ObservationStateDict],
Dict[str, float],
Dict[str, bool],
Dict[str, Any],
Dict[str, bool],
Dict[str, dict],
]:
"""
step(action) takes in an action for each agent and should return the
Expand Down Expand Up @@ -996,8 +997,8 @@ def step(
env_done = (
env_done or failed_agents["player_0"] or failed_agents["player_1"]
) # env is done if any agent fails.
dones = {agent: env_done or failed_agents[agent] for agent in self.agents}

terminations = {agent: env_done or failed_agents[agent] for agent in self.agents}
truncations = {agent: False or failed_agents[agent] for agent in self.agents}
# generate observations
obs = self.state.get_obs()
observations = {}
Expand All @@ -1010,7 +1011,7 @@ def step(
if env_done:
self.agents = []

return observations, rewards, dones, infos
return observations, rewards, terminations, truncations, infos

### Game Logic ###
def add_unit(self, team: Team, unit_type, pos: np.ndarray):
Expand Down Expand Up @@ -1110,7 +1111,7 @@ def raw_env() -> LuxAI_S2:
return env


import gym
import gymnasium as gym

gym.register(
id="LuxAI_S2-v0",
Expand Down
4 changes: 2 additions & 2 deletions luxai_s2/luxai_s2/spaces/act_space.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import random
from typing import Any, Dict, List

import gym
import gymnasium as gym
import numpy as np
from gym import spaces
from gymnasium import spaces

from luxai_s2.config import EnvConfig
from luxai_s2.factory import Factory
Expand Down
2 changes: 1 addition & 1 deletion luxai_s2/luxai_s2/spaces/obs_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, List

import numpy as np
from gym import spaces
from gymnasium import spaces

from luxai_s2.config import EnvConfig
from luxai_s2.spaces.act_space import ActionsQueue, FactionString
Expand Down
2 changes: 1 addition & 1 deletion luxai_s2/luxai_s2/wrappers/controllers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Dict, Any

import numpy.typing as npt
from gym import spaces
from gymnasium import spaces

class Controller:
def __init__(self, action_space: spaces.Space) -> None:
Expand Down
16 changes: 8 additions & 8 deletions luxai_s2/luxai_s2/wrappers/sb3.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Callable, Dict

import gym
import gymnasium as gym
import numpy as np
import numpy.typing as npt
from gym import spaces
from gymnasium import spaces

import luxai_s2.env
from luxai_s2.env import LuxAI_S2
Expand Down Expand Up @@ -92,21 +92,21 @@ def step(self, action: Dict[str, npt.NDArray]):
lux_action[agent] = dict()

# lux_action is now a dict mapping agent name to an action
obs, reward, done, info = self.env.step(lux_action)
obs, reward, terminated, truncated, info = self.env.step(lux_action)
self.prev_obs = obs
return obs, reward, done, info
return obs, reward, terminated, truncated, info

def reset(self, **kwargs):
# we upgrade the reset function here

# we call the original reset function first
obs = self.env.reset(**kwargs)
obs, _ = self.env.reset(**kwargs)

# then use the bid policy to go through the bidding phase
action = dict()
for agent in self.env.agents:
action[agent] = self.bid_policy(agent, obs[agent])
obs, _, _, _ = self.env.step(action)
obs, _, _, _, _ = self.env.step(action)

# while real_env_steps < 0, we are in the factory placement phase
# so we use the factory placement policy to step through this
Expand All @@ -120,7 +120,7 @@ def reset(self, **kwargs):
action[agent] = self.factory_placement_policy(agent, obs[agent])
else:
action[agent] = dict()
obs, _, _, _ = self.env.step(action)
obs, _, _, _, _ = self.env.step(action)
self.prev_obs = obs

return obs
return obs, {}

0 comments on commit c4d6e4e

Please sign in to comment.