Merge pull request #140 from edbeeching/fix-rllib

Fix rllib
edbeeching · Aug 3, 2023 · 2725bc8 · 2725bc8
2 parents b2034da + 8b21238
commit 2725bc8
Show file tree

Hide file tree

Showing 22 changed files with 207 additions and 82 deletions.
diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
@@ -55,3 +55,61 @@ jobs:
     - name: Test with pytest
       run: |
         make test
+
+
+  tests_ubuntu_rllib:
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9, 3.10.10]
+        os: ['ubuntu-latest']
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip wheel==0.38.4
+        # cpu version of pytorch
+        pip install .[test]
+    - name: Clean up dependencies
+      run: |
+        pip uninstall -y stable-baselines3 gymnasium
+        pip install .[rllib]
+    - name: Download examples
+      run: |
+        make download_examples
+
+    - name: Test with pytest
+      run: |
+        make test
+  tests_windows_rllib:
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9, 3.10.10]
+        os: ['windows-latest']
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip wheel==0.38.4
+        # cpu version of pytorch
+        pip install .[test]
+    - name: Clean up dependencies
+      run: |
+        pip uninstall -y stable-baselines3 gymnasium
+        pip install .[rllib]
+    - name: Download examples
+      run: |
+        make download_examples
+
+    - name: Test with pytest
+      run: |
+        make test
diff --git a/.gitignore b/.gitignore
@@ -136,6 +136,7 @@ dmypy.json
 
 envs/unity/
 logs/
+logs.*/
 dump/
 tmp/
 Packaging Python Projects — Python Packaging User Guide_files/

diff --git a/docs/ADV_CLEAN_RL.md b/docs/ADV_CLEAN_RL.md
@@ -17,11 +17,11 @@ You can read more about CleanRL in their [technical paper](https://arxiv.org/abs
 
 # Installation
 ```bash
-pip install godot-rl[clean-rl]
+pip install godot-rl[cleanrl]
 ```
 
-While the default options for clean-rl work reasonably well. You may be interested in changing the hyperparameters.
-We recommend taking the [clean-rl example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/clean_rl_example.py) and modifying to match your needs.
+While the default options for cleanrl work reasonably well. You may be interested in changing the hyperparameters.
+We recommend taking the [cleanrl example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/clean_rl_example.py) and modifying to match your needs.
 
 ```python
     parser.add_argument("--gae-lambda", type=float, default=0.95,

diff --git a/docs/ADV_RLLIB.md b/docs/ADV_RLLIB.md
@@ -4,9 +4,14 @@
 
 ## Installation
 
+If you want to train with rllib, create a new environment e.g.: `python -m venv venv.rllib` as rllib's dependencies can conflict with those of sb3 and other libraries.
+Due to a version clash with gymnasium, stable-baselines3 must be uninstalled before installing rllib.
 ```bash
-# remove sb3 installation with pip uninstall godot-rl[sb3]
-pip install godot-rl[rllib]
+pip install godot-rl
+# remove sb3 and gymnasium installations
+pip uninstall -y stable-baselines3 gymnasium
+# install rllib
+pip install ray[rllib]
 ```
 
 ## Basic Environment Usage

diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py
@@ -5,7 +5,6 @@
 import time
 from distutils.util import strtobool
 from collections import deque
-import gym
 import numpy as np
 import torch
 import torch.nn as nn
@@ -17,6 +16,9 @@
 def parse_args():
     # fmt: off
     parser = argparse.ArgumentParser()
+    parser.add_argument("--viz", default=False, type=bool,
+        help="If set, the simulation will be displayed in a window during training. Otherwise "
+            "training will run without rendering the simualtion. This setting does not apply to in-editor training.")
     parser.add_argument("--experiment_dir", default="logs/cleanrl", type=str,
         help="The name of the experiment directory, in which the tensorboard logs are getting stored")
     parser.add_argument("--experiment_name", default=os.path.basename(__file__).rstrip(".py"), type=str,
@@ -155,8 +157,7 @@ def get_action_and_value(self, x, action=None):
 
     # env setup
 
-    envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=True, speedup=args.speedup, convert_action_space=True) # Godot envs are already vectorized
-    #assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
+    envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, convert_action_space=True) # Godot envs are already vectorized
     args.num_envs = envs.num_envs
     args.batch_size = int(args.num_envs * args.num_steps)
     args.minibatch_size = int(args.batch_size // args.num_minibatches)

diff --git a/examples/sample_factory_example.py b/examples/sample_factory_example.py
@@ -7,8 +7,17 @@ def get_args():
     parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use")
     parser.add_argument("--eval", default=False, action="store_true", help="whether to eval the model")
     parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env")
-    parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model")
+    parser.add_argument("--seed", default=0, type=int, help="environment seed")
+    parser.add_argument("--export", default=False, action="store_true", help="whether to export the model")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
+    parser.add_argument("--experiment_dir", default="logs/sf", type=str,
+    help="The name of the experiment directory, in which the tensorboard logs are getting stored")
+    parser.add_argument(
+        "--experiment_name",
+        default="experiment",
+        type=str,
+        help="The name of the experiment, which will be displayed in tensorboard. ",
+    )
 
     return parser.parse_known_args()
 
@@ -23,4 +32,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py
@@ -3,6 +3,7 @@
 import pathlib
 
 from stable_baselines3.common.callbacks import CheckpointCallback
+from godot_rl.core.utils import can_import
 from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
 from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx
 from stable_baselines3 import PPO
@@ -11,7 +12,8 @@
 # To download the env source and binary:
 # 1.  gdrl.env_from_hub -r edbeeching/godot_rl_BallChase
 # 2.  chmod +x examples/godot_rl_BallChase/bin/BallChase.x86_64
-
+if can_import("ray"):
+    print("WARNING, stable baselines and ray[rllib] are not compatable")
 
 parser = argparse.ArgumentParser(allow_abbrev=False)
 parser.add_argument(
@@ -34,6 +36,12 @@
     help="The name of the experiment, which will be displayed in tensorboard and "
          "for checkpoint directory and name (if enabled).",
 )
+parser.add_argument(
+    "--seed",
+    type=int,
+    default=0,
+    help="seed of the experiment"
+)
 parser.add_argument(
     "--resume_model_path",
     default=None,
@@ -80,8 +88,8 @@
 parser.add_argument(
     "--viz",
     action="store_true",
-    help="If set, the window(s) with the Godot environment(s) will be displayed, otherwise "
-         "training will run without rendering the game. Does not apply to in-editor training.",
+    help="If set, the simulation will be displayed in a window during training. Otherwise "
+        "training will run without rendering the simualtion. This setting does not apply to in-editor training.",
     default=False
 )
 parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env")
@@ -105,7 +113,7 @@
 if args.env_path is None and args.viz:
     print("Info: Using --viz without --env_path set has no effect, in-editor training will always render.")
 
-env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, n_parallel=args.n_parallel,
+env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel,
                               speedup=args.speedup)
 env = VecMonitor(env)
 

diff --git a/examples/stable_baselines3_hp_tuning.py b/examples/stable_baselines3_hp_tuning.py
@@ -23,7 +23,7 @@
 from typing import Any
 from typing import Dict
 
-import gym
+import gymnasium as gym
 
 from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
 from godot_rl.core.godot_env import GodotEnv

diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py
@@ -225,7 +225,7 @@ def reset(self, seed=None):
         response["obs"] = self._process_obs(response["obs"])
         assert response["type"] == "reset"
         obs = response["obs"]
-        return obs, {}
+        return obs, [{}] * self.num_envs
 
     def call(self, method):
         message = {

diff --git a/godot_rl/main.py b/godot_rl/main.py
@@ -62,6 +62,7 @@ def get_args():
     parser.add_argument("--experiment_dir", default=None, type=str, help="The name of the the experiment directory, in which the tensorboard logs are getting stored")
     parser.add_argument("--experiment_name", default="experiment", type=str, help="The name of the the experiment, which will be displayed in tensborboard")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
+    parser.add_argument("--seed", default=0, type=int, help="seed of the experiment")
 
     args, extras =  parser.parse_known_args()
     if args.experiment_dir is None:

diff --git a/godot_rl/wrappers/clean_rl_wrapper.py b/godot_rl/wrappers/clean_rl_wrapper.py
@@ -1,6 +1,6 @@
 
 import numpy as np
-import gym
+import gymnasium as gym
 from godot_rl.core.utils import lod_to_dol
 from godot_rl.core.godot_env import GodotEnv
 

diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py
@@ -20,6 +20,7 @@ def __init__(
         show_window=False,
         framerate=None,
         action_repeat=None,
+        speedup=None,
         timeout_wait=60,
         config=None,
     ) -> None:
@@ -31,30 +32,36 @@ def __init__(
             show_window=show_window,
             framerate=framerate,
             action_repeat=action_repeat,
+            speedup=speedup
         )
         super().__init__(
             observation_space=self._env.observation_space,
             action_space=self._env.action_space,
             num_envs=self._env.num_envs,
         )
 
-    def vector_reset(self) -> List[EnvObsType]:
-        obs, info = self._env.reset()
-        return obs
+    def vector_reset(self, *, seeds: Optional[List[int]] = None, options: Optional[List[dict]] = None) -> List[EnvObsType]:
+        self.obs, info = self._env.reset()
+        return self.obs, info 
 
     def vector_step(
         self, actions: List[EnvActionType]
     ) -> Tuple[List[EnvObsType], List[float], List[bool], List[EnvInfoDict]]:
-        actions = np.array(actions)
+        actions = np.array(actions, dtype=np.dtype(object))
         self.obs, reward, term, trunc, info = self._env.step(actions, order_ij=True)
-        return self.obs, reward, term, info
+        return self.obs, reward, term, trunc, info
 
     def get_unwrapped(self):
         return [self._env]
 
-    def reset_at(self, index: Optional[int]) -> EnvObsType:
+    def reset_at(self,     
+            index: Optional[int] = None,
+            *,
+            seed: Optional[int] = None,
+            options: Optional[dict] = None,
+        ) -> EnvObsType:
         # the env is reset automatically, no need to reset it
-        return self.obs[index]
+        return self.obs[index], {}
 
 
 def register_env():
@@ -68,6 +75,7 @@ def register_env():
             framerate=c["framerate"],
             seed=c.worker_index + c["seed"],
             action_repeat=c["framerate"],
+            speedup=c["speedup"],
         ),
     )
 
@@ -118,6 +126,8 @@ def rllib_training(args, extras):
     register_env()
 
     exp["config"]["env_config"]["env_path"] = args.env_path
+    exp["config"]["env_config"]["seed"] = args.seed
+
     if args.env_path is not None:
         run_name = exp["algorithm"] + "/" + pathlib.Path(args.env_path).stem
     else:
@@ -133,6 +143,10 @@ def rllib_training(args, extras):
 
     checkpoint_freq = 10
     checkpoint_at_end = True
+
+    exp["config"]["env_config"]["show_window"] = args.viz
+    exp["config"]["env_config"]["speedup"] = args.speedup
+
     if args.eval or args.export:
         checkpoint_freq = 0
         exp["config"]["env_config"]["show_window"] = True
-Original file line number
+Diff line change
@@ Expand Up / @@ -136,6 +136,7 @@ dmypy.json @@
     envs/unity/
     logs/
+    logs.*/
     dump/
     tmp/
     Packaging Python Projects — Python Packaging User Guide_files/
@@ Expand Down @@