Skip to content

Commit

Permalink
Merge pull request #140 from edbeeching/fix-rllib
Browse files Browse the repository at this point in the history
Fix rllib
  • Loading branch information
edbeeching authored Aug 3, 2023
2 parents b2034da + 8b21238 commit 2725bc8
Show file tree
Hide file tree
Showing 22 changed files with 207 additions and 82 deletions.
58 changes: 58 additions & 0 deletions .github/workflows/test-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,61 @@ jobs:
- name: Test with pytest
run: |
make test
tests_ubuntu_rllib:
strategy:
matrix:
python-version: [3.8, 3.9, 3.10.10]
os: ['ubuntu-latest']
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip wheel==0.38.4
# cpu version of pytorch
pip install .[test]
- name: Clean up dependencies
run: |
pip uninstall -y stable-baselines3 gymnasium
pip install .[rllib]
- name: Download examples
run: |
make download_examples
- name: Test with pytest
run: |
make test
tests_windows_rllib:
strategy:
matrix:
python-version: [3.8, 3.9, 3.10.10]
os: ['windows-latest']
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip wheel==0.38.4
# cpu version of pytorch
pip install .[test]
- name: Clean up dependencies
run: |
pip uninstall -y stable-baselines3 gymnasium
pip install .[rllib]
- name: Download examples
run: |
make download_examples
- name: Test with pytest
run: |
make test
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ dmypy.json

envs/unity/
logs/
logs.*/
dump/
tmp/
Packaging Python Projects Python Packaging User Guide_files/
Expand Down
6 changes: 3 additions & 3 deletions docs/ADV_CLEAN_RL.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ You can read more about CleanRL in their [technical paper](https://arxiv.org/abs

# Installation
```bash
pip install godot-rl[clean-rl]
pip install godot-rl[cleanrl]
```

While the default options for clean-rl work reasonably well. You may be interested in changing the hyperparameters.
We recommend taking the [clean-rl example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/clean_rl_example.py) and modifying to match your needs.
While the default options for cleanrl work reasonably well. You may be interested in changing the hyperparameters.
We recommend taking the [cleanrl example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/clean_rl_example.py) and modifying to match your needs.

```python
parser.add_argument("--gae-lambda", type=float, default=0.95,
Expand Down
9 changes: 7 additions & 2 deletions docs/ADV_RLLIB.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,14 @@

## Installation

If you want to train with rllib, create a new environment e.g.: `python -m venv venv.rllib` as rllib's dependencies can conflict with those of sb3 and other libraries.
Due to a version clash with gymnasium, stable-baselines3 must be uninstalled before installing rllib.
```bash
# remove sb3 installation with pip uninstall godot-rl[sb3]
pip install godot-rl[rllib]
pip install godot-rl
# remove sb3 and gymnasium installations
pip uninstall -y stable-baselines3 gymnasium
# install rllib
pip install ray[rllib]
```

## Basic Environment Usage
Expand Down
7 changes: 4 additions & 3 deletions examples/clean_rl_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import time
from distutils.util import strtobool
from collections import deque
import gym
import numpy as np
import torch
import torch.nn as nn
Expand All @@ -17,6 +16,9 @@
def parse_args():
# fmt: off
parser = argparse.ArgumentParser()
parser.add_argument("--viz", default=False, type=bool,
help="If set, the simulation will be displayed in a window during training. Otherwise "
"training will run without rendering the simualtion. This setting does not apply to in-editor training.")
parser.add_argument("--experiment_dir", default="logs/cleanrl", type=str,
help="The name of the experiment directory, in which the tensorboard logs are getting stored")
parser.add_argument("--experiment_name", default=os.path.basename(__file__).rstrip(".py"), type=str,
Expand Down Expand Up @@ -155,8 +157,7 @@ def get_action_and_value(self, x, action=None):

# env setup

envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=True, speedup=args.speedup, convert_action_space=True) # Godot envs are already vectorized
#assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, convert_action_space=True) # Godot envs are already vectorized
args.num_envs = envs.num_envs
args.batch_size = int(args.num_envs * args.num_steps)
args.minibatch_size = int(args.batch_size // args.num_minibatches)
Expand Down
13 changes: 11 additions & 2 deletions examples/sample_factory_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,17 @@ def get_args():
parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use")
parser.add_argument("--eval", default=False, action="store_true", help="whether to eval the model")
parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env")
parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model")
parser.add_argument("--seed", default=0, type=int, help="environment seed")
parser.add_argument("--export", default=False, action="store_true", help="whether to export the model")
parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
parser.add_argument("--experiment_dir", default="logs/sf", type=str,
help="The name of the experiment directory, in which the tensorboard logs are getting stored")
parser.add_argument(
"--experiment_name",
default="experiment",
type=str,
help="The name of the experiment, which will be displayed in tensorboard. ",
)

return parser.parse_known_args()

Expand All @@ -23,4 +32,4 @@ def main():


if __name__ == "__main__":
main()
main()
16 changes: 12 additions & 4 deletions examples/stable_baselines3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pathlib

from stable_baselines3.common.callbacks import CheckpointCallback
from godot_rl.core.utils import can_import
from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx
from stable_baselines3 import PPO
Expand All @@ -11,7 +12,8 @@
# To download the env source and binary:
# 1. gdrl.env_from_hub -r edbeeching/godot_rl_BallChase
# 2. chmod +x examples/godot_rl_BallChase/bin/BallChase.x86_64

if can_import("ray"):
print("WARNING, stable baselines and ray[rllib] are not compatable")

parser = argparse.ArgumentParser(allow_abbrev=False)
parser.add_argument(
Expand All @@ -34,6 +36,12 @@
help="The name of the experiment, which will be displayed in tensorboard and "
"for checkpoint directory and name (if enabled).",
)
parser.add_argument(
"--seed",
type=int,
default=0,
help="seed of the experiment"
)
parser.add_argument(
"--resume_model_path",
default=None,
Expand Down Expand Up @@ -80,8 +88,8 @@
parser.add_argument(
"--viz",
action="store_true",
help="If set, the window(s) with the Godot environment(s) will be displayed, otherwise "
"training will run without rendering the game. Does not apply to in-editor training.",
help="If set, the simulation will be displayed in a window during training. Otherwise "
"training will run without rendering the simualtion. This setting does not apply to in-editor training.",
default=False
)
parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env")
Expand All @@ -105,7 +113,7 @@
if args.env_path is None and args.viz:
print("Info: Using --viz without --env_path set has no effect, in-editor training will always render.")

env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, n_parallel=args.n_parallel,
env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel,
speedup=args.speedup)
env = VecMonitor(env)

Expand Down
2 changes: 1 addition & 1 deletion examples/stable_baselines3_hp_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from typing import Any
from typing import Dict

import gym
import gymnasium as gym

from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
from godot_rl.core.godot_env import GodotEnv
Expand Down
2 changes: 1 addition & 1 deletion godot_rl/core/godot_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def reset(self, seed=None):
response["obs"] = self._process_obs(response["obs"])
assert response["type"] == "reset"
obs = response["obs"]
return obs, {}
return obs, [{}] * self.num_envs

def call(self, method):
message = {
Expand Down
1 change: 1 addition & 0 deletions godot_rl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def get_args():
parser.add_argument("--experiment_dir", default=None, type=str, help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
parser.add_argument("--experiment_name", default="experiment", type=str, help="The name of the the experiment, which will be displayed in tensborboard")
parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
parser.add_argument("--seed", default=0, type=int, help="seed of the experiment")

args, extras = parser.parse_known_args()
if args.experiment_dir is None:
Expand Down
2 changes: 1 addition & 1 deletion godot_rl/wrappers/clean_rl_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

import numpy as np
import gym
import gymnasium as gym
from godot_rl.core.utils import lod_to_dol
from godot_rl.core.godot_env import GodotEnv

Expand Down
28 changes: 21 additions & 7 deletions godot_rl/wrappers/ray_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(
show_window=False,
framerate=None,
action_repeat=None,
speedup=None,
timeout_wait=60,
config=None,
) -> None:
Expand All @@ -31,30 +32,36 @@ def __init__(
show_window=show_window,
framerate=framerate,
action_repeat=action_repeat,
speedup=speedup
)
super().__init__(
observation_space=self._env.observation_space,
action_space=self._env.action_space,
num_envs=self._env.num_envs,
)

def vector_reset(self) -> List[EnvObsType]:
obs, info = self._env.reset()
return obs
def vector_reset(self, *, seeds: Optional[List[int]] = None, options: Optional[List[dict]] = None) -> List[EnvObsType]:
self.obs, info = self._env.reset()
return self.obs, info

def vector_step(
self, actions: List[EnvActionType]
) -> Tuple[List[EnvObsType], List[float], List[bool], List[EnvInfoDict]]:
actions = np.array(actions)
actions = np.array(actions, dtype=np.dtype(object))
self.obs, reward, term, trunc, info = self._env.step(actions, order_ij=True)
return self.obs, reward, term, info
return self.obs, reward, term, trunc, info

def get_unwrapped(self):
return [self._env]

def reset_at(self, index: Optional[int]) -> EnvObsType:
def reset_at(self,
index: Optional[int] = None,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
) -> EnvObsType:
# the env is reset automatically, no need to reset it
return self.obs[index]
return self.obs[index], {}


def register_env():
Expand All @@ -68,6 +75,7 @@ def register_env():
framerate=c["framerate"],
seed=c.worker_index + c["seed"],
action_repeat=c["framerate"],
speedup=c["speedup"],
),
)

Expand Down Expand Up @@ -118,6 +126,8 @@ def rllib_training(args, extras):
register_env()

exp["config"]["env_config"]["env_path"] = args.env_path
exp["config"]["env_config"]["seed"] = args.seed

if args.env_path is not None:
run_name = exp["algorithm"] + "/" + pathlib.Path(args.env_path).stem
else:
Expand All @@ -133,6 +143,10 @@ def rllib_training(args, extras):

checkpoint_freq = 10
checkpoint_at_end = True

exp["config"]["env_config"]["show_window"] = args.viz
exp["config"]["env_config"]["speedup"] = args.speedup

if args.eval or args.export:
checkpoint_freq = 0
exp["config"]["env_config"]["show_window"] = True
Expand Down
Loading

0 comments on commit 2725bc8

Please sign in to comment.