diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml new file mode 100644 index 00000000..a790eafc --- /dev/null +++ b/.github/workflows/quality.yml @@ -0,0 +1,29 @@ +name: Quality + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + + check_code_quality: + name: Check code quality + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Setup Python environment + uses: actions/setup-python@v2 + with: + python-version: 3.10.10 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev]" + - name: Code quality + run: | + make quality \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 7c9f21cc..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "editor.formatOnSave": true, - "python.formatting.provider": "black", - "python.formatting.blackArgs": [ - "-l 120" - ], - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, -} \ No newline at end of file diff --git a/Makefile b/Makefile index 0aa8753e..b939b2aa 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,14 @@ .PHONY: quality style test unity-test -# Check that source code meets quality standards -quality: - black --check --line-length 119 --target-version py38 tests godot_rl - isort --check-only tests godot_rl - flake8 tests godot_rl - # Format source code automatically style: - black --line-length 119 --target-version py38 tests godot_rl - isort tests godot_rl + black --line-length 120 --target-version py310 tests godot_rl examples + isort -w 120 tests godot_rl examples +# Check that source code meets quality standards +quality: + black --check --line-length 120 --target-version py310 tests godot_rl examples + isort -w 120 --check-only tests godot_rl examples + flake8 --max-line-length 120 tests godot_rl examples # Run tests for the library test: diff --git a/README.md b/README.md index 7ecfd696..1c92926e 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,33 @@ Godot RL Agents supports 4 different RL training frameworks, the links below det - [CleanRL](docs/ADV_CLEAN_RL.md) (Windows, Mac, Linux) - [Ray rllib](docs/ADV_RLLIB.md) (Windows, Mac, Linux) +## Contributing +We welcome new contributions to the library, such as: +- New environments made in Godot +- Improvements to the readme files +- Additions to the python codebase + +Start by forking the repo and then cloning it to your machine, creating a venv and performing an editable installation. + +``` +# If you want to PR, you should fork the lib or ask to be a contibutor +git clone git@github.com:YOUR_USERNAME/godot_rl_agents.git +cd godot_rl_agents +python -m venv venv +pip install -e ".[dev]" +# check tests run +make test +``` + +Then add your features. +Format your code with: +``` +make style +make quality +``` +Then make a PR against main on the original repo. + + ## FAQ ### Why have we developed Godot RL Agents? diff --git a/examples/clean_rl_example.py b/examples/clean_rl_example.py index 8b061fb8..b95c2ed0 100644 --- a/examples/clean_rl_example.py +++ b/examples/clean_rl_example.py @@ -4,14 +4,16 @@ import pathlib import random import time -from distutils.util import strtobool from collections import deque +from distutils.util import strtobool + import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.distributions.normal import Normal from torch.utils.tensorboard import SummaryWriter + from godot_rl.wrappers.clean_rl_wrapper import CleanRLGodotEnv @@ -167,8 +169,9 @@ def get_action_and_value(self, x, action=None): # env setup - envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, seed=args.seed, - n_parallel=args.n_parallel) + envs = env = CleanRLGodotEnv( + env_path=args.env_path, show_window=args.viz, speedup=args.speedup, seed=args.seed, n_parallel=args.n_parallel + ) args.num_envs = envs.num_envs args.batch_size = int(args.num_envs * args.num_steps) args.minibatch_size = int(args.batch_size // args.num_minibatches) @@ -334,7 +337,6 @@ def get_action_and_value(self, x, action=None): agent.eval().to("cpu") - class OnnxPolicy(torch.nn.Module): def __init__(self, actor_mean): super().__init__() @@ -344,7 +346,6 @@ def forward(self, obs, state_ins): action_mean = self.actor_mean(obs) return action_mean, state_ins - onnx_policy = OnnxPolicy(agent.actor_mean) dummy_input = torch.unsqueeze(torch.tensor(envs.single_observation_space.sample()), 0) @@ -355,9 +356,10 @@ def forward(self, obs, state_ins): opset_version=15, input_names=["obs", "state_ins"], output_names=["output", "state_outs"], - dynamic_axes={'obs': {0: 'batch_size'}, - 'state_ins': {0: 'batch_size'}, # variable length axes - 'output': {0: 'batch_size'}, - 'state_outs': {0: 'batch_size'}} - + dynamic_axes={ + "obs": {0: "batch_size"}, + "state_ins": {0: "batch_size"}, # variable length axes + "output": {0: "batch_size"}, + "state_outs": {0: "batch_size"}, + }, ) diff --git a/examples/sample_factory_example.py b/examples/sample_factory_example.py index 2c4e10a6..bd86aaaf 100644 --- a/examples/sample_factory_example.py +++ b/examples/sample_factory_example.py @@ -1,5 +1,6 @@ import argparse -from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training, sample_factory_enjoy + +from godot_rl.wrappers.sample_factory_wrapper import sample_factory_enjoy, sample_factory_training def get_args(): @@ -10,8 +11,12 @@ def get_args(): parser.add_argument("--seed", default=0, type=int, help="environment seed") parser.add_argument("--export", default=False, action="store_true", help="whether to export the model") parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process") - parser.add_argument("--experiment_dir", default="logs/sf", type=str, - help="The name of the experiment directory, in which the tensorboard logs are getting stored") + parser.add_argument( + "--experiment_dir", + default="logs/sf", + type=str, + help="The name of the experiment directory, in which the tensorboard logs are getting stored", + ) parser.add_argument( "--experiment_name", default="experiment", @@ -22,14 +27,13 @@ def get_args(): return parser.parse_known_args() - def main(): args, extras = get_args() if args.eval: sample_factory_enjoy(args, extras) else: sample_factory_training(args, extras) - - + + if __name__ == "__main__": main() diff --git a/examples/stable_baselines3_example.py b/examples/stable_baselines3_example.py index 7358f991..b19b0a9d 100644 --- a/examples/stable_baselines3_example.py +++ b/examples/stable_baselines3_example.py @@ -3,12 +3,13 @@ import pathlib from typing import Callable +from stable_baselines3 import PPO from stable_baselines3.common.callbacks import CheckpointCallback +from stable_baselines3.common.vec_env.vec_monitor import VecMonitor + from godot_rl.core.utils import can_import -from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx -from stable_baselines3 import PPO -from stable_baselines3.common.vec_env.vec_monitor import VecMonitor +from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv # To download the env source and binary: # 1. gdrl.env_from_hub -r edbeeching/godot_rl_BallChase @@ -28,42 +29,39 @@ default="logs/sb3", type=str, help="The name of the experiment directory, in which the tensorboard logs and checkpoints (if enabled) are " - "getting stored." + "getting stored.", ) parser.add_argument( "--experiment_name", default="experiment", type=str, help="The name of the experiment, which will be displayed in tensorboard and " - "for checkpoint directory and name (if enabled).", -) -parser.add_argument( - "--seed", - type=int, - default=0, - help="seed of the experiment" + "for checkpoint directory and name (if enabled).", ) +parser.add_argument("--seed", type=int, default=0, help="seed of the experiment") parser.add_argument( "--resume_model_path", default=None, type=str, help="The path to a model file previously saved using --save_model_path or a checkpoint saved using " - "--save_checkpoints_frequency. Use this to resume training or infer from a saved model.", + "--save_checkpoints_frequency. Use this to resume training or infer from a saved model.", ) parser.add_argument( "--save_model_path", default=None, type=str, help="The path to use for saving the trained sb3 model after training is complete. Saved model can be used later " - "to resume training. Extension will be set to .zip", + "to resume training. Extension will be set to .zip", ) parser.add_argument( "--save_checkpoint_frequency", default=None, type=int, - help=("If set, will save checkpoints every 'frequency' environment steps. " - "Requires a unique --experiment_name or --experiment_dir for each run. " - "Does not need --save_model_path to be set. "), + help=( + "If set, will save checkpoints every 'frequency' environment steps. " + "Requires a unique --experiment_name or --experiment_dir for each run. " + "Does not need --save_model_path to be set. " + ), ) parser.add_argument( "--onnx_export_path", @@ -76,34 +74,38 @@ default=1_000_000, type=int, help="The number of environment steps to train for, default is 1_000_000. If resuming from a saved model, " - "it will continue training for this amount of steps from the saved state without counting previously trained " - "steps", + "it will continue training for this amount of steps from the saved state without counting previously trained " + "steps", ) parser.add_argument( "--inference", default=False, action="store_true", help="Instead of training, it will run inference on a loaded model for --timesteps steps. " - "Requires --resume_model_path to be set." + "Requires --resume_model_path to be set.", ) parser.add_argument( "--linear_lr_schedule", default=False, action="store_true", help="Use a linear LR schedule for training. If set, learning rate will decrease until it reaches 0 at " - "--timesteps" - "value. Note: On resuming training, the schedule will reset. If disabled, constant LR will be used." + "--timesteps" + "value. Note: On resuming training, the schedule will reset. If disabled, constant LR will be used.", ) parser.add_argument( "--viz", action="store_true", help="If set, the simulation will be displayed in a window during training. Otherwise " - "training will run without rendering the simulation. This setting does not apply to in-editor training.", - default=False + "training will run without rendering the simulation. This setting does not apply to in-editor training.", + default=False, ) parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env") -parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to " - "launch - requires --env_path to be set if > 1.") +parser.add_argument( + "--n_parallel", + default=1, + type=int, + help="How many instances of the environment executable to " "launch - requires --env_path to be set if > 1.", +) args, extras = parser.parse_known_args() @@ -136,10 +138,12 @@ def close_env(): # Prevent overwriting existing checkpoints when starting a new experiment if checkpoint saving is enabled if args.save_checkpoint_frequency is not None and os.path.isdir(path_checkpoint): - raise RuntimeError(abs_path_checkpoint + " folder already exists. " - "Use a different --experiment_dir, or --experiment_name," - "or if previous checkpoints are not needed anymore, " - "remove the folder containing the checkpoints. ") + raise RuntimeError( + abs_path_checkpoint + " folder already exists. " + "Use a different --experiment_dir, or --experiment_name," + "or if previous checkpoints are not needed anymore, " + "remove the folder containing the checkpoints. " + ) if args.inference and args.resume_model_path is None: raise parser.error("Using --inference requires --resume_model_path to be set.") @@ -147,8 +151,9 @@ def close_env(): if args.env_path is None and args.viz: print("Info: Using --viz without --env_path set has no effect, in-editor training will always render.") -env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel, - speedup=args.speedup) +env = StableBaselinesGodotEnv( + env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel, speedup=args.speedup +) env = VecMonitor(env) @@ -177,13 +182,15 @@ def func(progress_remaining: float) -> float: if args.resume_model_path is None: learning_rate = 0.0003 if not args.linear_lr_schedule else linear_schedule(0.0003) - model: PPO = PPO("MultiInputPolicy", - env, - ent_coef=0.0001, - verbose=2, - n_steps=32, - tensorboard_log=args.experiment_dir, - learning_rate=learning_rate) + model: PPO = PPO( + "MultiInputPolicy", + env, + ent_coef=0.0001, + verbose=2, + n_steps=32, + tensorboard_log=args.experiment_dir, + learning_rate=learning_rate, + ) else: path_zip = pathlib.Path(args.resume_model_path) print("Loading model: " + os.path.abspath(path_zip)) @@ -201,13 +208,16 @@ def func(progress_remaining: float) -> float: checkpoint_callback = CheckpointCallback( save_freq=(args.save_checkpoint_frequency // env.num_envs), save_path=path_checkpoint, - name_prefix=args.experiment_name + name_prefix=args.experiment_name, ) - learn_arguments['callback'] = checkpoint_callback + learn_arguments["callback"] = checkpoint_callback try: model.learn(**learn_arguments) except KeyboardInterrupt: - print("Training interrupted by user. Will save if --save_model_path was used and/or export if --onnx_export_path was used.") + print( + """Training interrupted by user. Will save if --save_model_path was + used and/or export if --onnx_export_path was used.""" + ) close_env() handle_onnx_export() diff --git a/examples/stable_baselines3_hp_tuning.py b/examples/stable_baselines3_hp_tuning.py index 7e280f8c..5e8754bb 100644 --- a/examples/stable_baselines3_hp_tuning.py +++ b/examples/stable_baselines3_hp_tuning.py @@ -8,8 +8,8 @@ You can run this example as follows: $ python examples/stable_baselines3_hp_tuning.py --env_path= --speedup=8 --n_parallel=1 - -Feel free to copy this script and update, add or remove the hp values to your liking. + +Feel free to copy this script and update, add or remove the hp values to your liking. """ try: @@ -17,30 +17,33 @@ from optuna.pruners import MedianPruner from optuna.samplers import TPESampler except ImportError as e: + print(e) print("You need to install optuna to use the hyperparameter tuning script. Try: pip install optuna") exit() -from typing import Any -from typing import Dict +import argparse +from typing import Any, Dict import gymnasium as gym - -from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv -from godot_rl.core.godot_env import GodotEnv - +import torch from stable_baselines3 import PPO from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.vec_env.vec_monitor import VecMonitor -import torch -import torch.nn as nn - -import argparse +from godot_rl.core.godot_env import GodotEnv +from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv parser = argparse.ArgumentParser(allow_abbrev=False) -parser.add_argument("--env_path", default=None, type=str, help="The Godot binary to use, do not include for in editor training") +parser.add_argument( + "--env_path", default=None, type=str, help="The Godot binary to use, do not include for in editor training" +) parser.add_argument("--speedup", default=8, type=int, help="whether to speed up the physics in the env") -parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to launch - requires --env_path to be set if > 1.") +parser.add_argument( + "--n_parallel", + default=1, + type=int, + help="How many instances of the environment executable to launch - requires --env_path to be set if > 1.", +) args, extras = parser.parse_known_args() @@ -61,6 +64,7 @@ "ent_coef": 0.005, } + def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: """Sampler for PPO hyperparameters.""" learning_rate = trial.suggest_loguniform("learning_rate", 0.0003, 0.003) @@ -118,10 +122,19 @@ def objective(trial: optuna.Trial) -> float: print("args:", kwargs) # Create the RL model. training_port = GodotEnv.DEFAULT_PORT + 1 - model = PPO("MultiInputPolicy", VecMonitor(StableBaselinesGodotEnv(env_path=args.env_path, speedup=args.speedup, n_parallel=args.n_parallel, port=training_port)), tensorboard_log="logs/optuna", **kwargs) + model = PPO( + "MultiInputPolicy", + VecMonitor( + StableBaselinesGodotEnv( + env_path=args.env_path, speedup=args.speedup, n_parallel=args.n_parallel, port=training_port + ) + ), + tensorboard_log="logs/optuna", + **kwargs, + ) # Create env used for evaluation. eval_env = VecMonitor(StableBaselinesGodotEnv(env_path=args.env_path, speedup=args.speedup)) - + # Create the callback that will periodically evaluate and report the performance. eval_callback = TrialEvalCallback( eval_env, trial, n_eval_episodes=N_EVAL_EPISODES, eval_freq=EVAL_FREQ, deterministic=True @@ -142,12 +155,12 @@ def objective(trial: optuna.Trial) -> float: # Tell the optimizer that the trial failed. if nan_encountered: - #return 0 + # return 0 return float("nan") if eval_callback.is_pruned: raise optuna.exceptions.TrialPruned() - + return eval_callback.last_mean_reward @@ -178,4 +191,4 @@ def objective(trial: optuna.Trial) -> float: print(" User attrs:") for key, value in trial.user_attrs.items(): - print(" {}: {}".format(key, value)) \ No newline at end of file + print(" {}: {}".format(key, value)) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index 8c37ccec..9f0d1c83 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -10,10 +10,10 @@ from typing import Optional import numpy as np -from godot_rl.core.utils import ActionSpaceProcessor, convert_macos_path from gymnasium import spaces -from collections import OrderedDict +from godot_rl.core.utils import ActionSpaceProcessor, convert_macos_path + class GodotEnv: MAJOR_VERSION = "0" # Versioning for the environment @@ -22,15 +22,15 @@ class GodotEnv: DEFAULT_TIMEOUT = 60 # Default socket timeout TODO def __init__( - self, - env_path: str = None, - port: int = DEFAULT_PORT, - show_window: bool = False, - seed: int = 0, - framerate: Optional[int] = None, - action_repeat: Optional[int] = None, - speedup: Optional[int] = None, - convert_action_space: bool = False, + self, + env_path: str = None, + port: int = DEFAULT_PORT, + show_window: bool = False, + seed: int = 0, + framerate: Optional[int] = None, + action_repeat: Optional[int] = None, + speedup: Optional[int] = None, + convert_action_space: bool = False, ): """ Initialize a new instance of GodotEnv @@ -98,18 +98,18 @@ def check_platform(self, filename: str): if platform == "linux" or platform == "linux2": # Linux assert ( - pathlib.Path(filename).suffix == ".x86_64" - ), f"Incorrect file suffix for filename {filename} suffix {pathlib.Path(filename).suffix}. Please provide a .x86_64 file" + pathlib.Path(filename).suffix == ".x86_64" + ), f"Incorrect file suffix for {filename=} {pathlib.Path(filename).suffix=}. Please provide a .x86_64 file" elif platform == "darwin": # OSX assert ( - pathlib.Path(filename).suffix == ".app" - ), f"Incorrect file suffix for filename {filename} suffix {pathlib.Path(filename).suffix}. Please provide a .app file" + pathlib.Path(filename).suffix == ".app" + ), f"Incorrect file suffix for {filename=} {pathlib.Path(filename).suffix=}. Please provide a .app file" elif platform == "win32": # Windows... assert ( - pathlib.Path(filename).suffix == ".exe" - ), f"Incorrect file suffix for filename {filename} suffix {pathlib.Path(filename).suffix}. Please provide a .exe file" + pathlib.Path(filename).suffix == ".exe" + ), f"Incorrect file suffix for {filename=} {pathlib.Path(filename).suffix=}. Please provide a .exe file" else: assert 0, f"unknown filetype {pathlib.Path(filename).suffix}" @@ -132,7 +132,7 @@ def from_numpy(self, action, order_ij=False): env_action = {} for j, k in enumerate(self._action_space.keys()): - if order_ij == True: + if order_ij is True: v = action[i][j] else: v = action[j][i] @@ -263,7 +263,7 @@ def _launch_env(self, env_path, port, show_window, framerate, seed, action_repea launch_cmd = f"{path} --port={port} --env_seed={seed}" - if show_window == False: + if show_window is False: launch_cmd += " --disable-render-loop --headless" if framerate is not None: launch_cmd += f" --fixed-fps {framerate}" @@ -359,13 +359,10 @@ def _get_env_info(self): @staticmethod def _decode_2d_obs_from_string( - hex_string, - shape, + hex_string, + shape, ): - return ( - np.frombuffer(bytes.fromhex(hex_string), dtype=np.uint8) - .reshape(shape) - ) + return np.frombuffer(bytes.fromhex(hex_string), dtype=np.uint8).reshape(shape) def _send_as_json(self, dictionary): message_json = json.dumps(dictionary) @@ -385,7 +382,7 @@ def _clear_socket(self): data = self.connection.recv(4) if not data: break - except BlockingIOError as e: + except BlockingIOError: pass self.connection.setblocking(True) diff --git a/godot_rl/core/utils.py b/godot_rl/core/utils.py index edca98d7..ed8d5a1f 100644 --- a/godot_rl/core/utils.py +++ b/godot_rl/core/utils.py @@ -5,7 +5,6 @@ import numpy as np - def lod_to_dol(lod): return {k: [dic[k] for dic in lod] for k in lod[0]} @@ -13,6 +12,7 @@ def lod_to_dol(lod): def dol_to_lod(dol): return [dict(zip(dol, t)) for t in zip(*dol.values())] + def convert_macos_path(env_path): """ On MacOs the user is supposed to provide a application.app file to env_path. @@ -23,12 +23,11 @@ def convert_macos_path(env_path): Example output: ./Demo.app/Contents/Macos/Demo """ - filenames = re.findall(r'[^\/]+(?=\.)', env_path) - assert ( - len(filenames) == 1 - ), f"An error occured while converting the env path for MacOS." + filenames = re.findall(r"[^\/]+(?=\.)", env_path) + assert len(filenames) == 1, "An error occured while converting the env path for MacOS." return env_path + "/Contents/MacOS/" + filenames[0] + class ActionSpaceProcessor: # can convert tuple action dists to a single continuous action distribution # eg (Box(a), Box(b)) -> Box(a+b) @@ -36,7 +35,6 @@ class ActionSpaceProcessor: # etc # does not yet work with discrete dists of n>2 def __init__(self, action_space: gym.spaces.Tuple, convert) -> None: - self._original_action_space = action_space self._convert = convert @@ -46,7 +44,6 @@ def __init__(self, action_space: gym.spaces.Tuple, convert) -> None: use_multi_discrete_spaces = False multi_discrete_spaces = np.array([]) if isinstance(action_space, gym.spaces.Tuple): - if all(isinstance(space, gym.spaces.Discrete) for space in action_space.spaces): use_multi_discrete_spaces = True for space in action_space.spaces: @@ -58,7 +55,7 @@ def __init__(self, action_space: gym.spaces.Tuple, convert) -> None: space_size += space.shape[0] elif isinstance(space, gym.spaces.Discrete): if space.n > 2: - #for now only binary actions are supported if you mix different spaces + # for now only binary actions are supported if you mix different spaces # need to add support for the n>2 case raise NotImplementedError space_size += 1 @@ -96,7 +93,6 @@ def to_original_dist(self, action): counter += space.shape[0] elif isinstance(space, gym.spaces.Discrete): - discrete_actions = np.greater(action[:, counter], 0.0) discrete_actions = discrete_actions.astype(np.float32) original_action.append(discrete_actions) @@ -107,12 +103,14 @@ def to_original_dist(self, action): return original_action + def can_import(module_name): return not cant_import(module_name) + def cant_import(module_name): try: importlib.import_module(module_name) return False except ImportError: - return True \ No newline at end of file + return True diff --git a/godot_rl/custom_models/__init__.py b/godot_rl/custom_models/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/godot_rl/custom_models/attention_model.py b/godot_rl/custom_models/attention_model.py deleted file mode 100644 index e5a101a2..00000000 --- a/godot_rl/custom_models/attention_model.py +++ /dev/null @@ -1,101 +0,0 @@ -import logging - -import gym -import numpy as np -from gym.spaces import Box, Discrete, MultiDiscrete -from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNet -from ray.rllib.models.torch.misc import (AppendBiasLayer, SlimFC, - normc_initializer) -from ray.rllib.models.torch.modules import (GRUGate, - RelativeMultiHeadAttention, - SkipConnection) -from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 -from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.policy.view_requirement import ViewRequirement -from ray.rllib.utils.annotations import override -from ray.rllib.utils.framework import try_import_torch -from ray.rllib.utils.typing import Dict, List, ModelConfigDict, TensorType - -torch, nn = try_import_torch() -logger = logging.getLogger(__name__) - - -# defines the attention model used in the bullet hell environment -# first a feed forward to test that observations are being handled correctly - - -class MyAttentionModel(TorchModelV2, nn.Module): - """Generic fully connected network.""" - - def __init__( - self, - obs_space: gym.spaces.Space, - action_space: gym.spaces.Space, - num_outputs: int, - model_config: ModelConfigDict, - name: str, - ): - - TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) - - nn.Module.__init__(self) - # simple baseline, fc all inputs and sum then value and policy head - - # if isinstance(action_space, Discrete): - # self.action_dim = action_space.n - # elif isinstance(action_space, MultiDiscrete): - # self.action_dim = np.product(action_space.nvec) - # elif action_space.shape is not None: - # self.action_dim = int(np.product(action_space.shape)) - # else: - # self.action_dim = int(len(action_space)) - # print("action space", action_space, self.action_dim, num_outputs) - prev_layer_size = 3 # int(np.product(obs_space.shape)) - # obs_space["obs"]["max_length"] = 1 - self.model = TorchFCNet(obs_space, action_space, num_outputs, model_config, name) - print(self.model) - - print(obs_space, prev_layer_size, self.num_outputs) - self._logits_branch = SlimFC( - in_size=prev_layer_size, - out_size=self.num_outputs, - activation_fn=None, - initializer=torch.nn.init.xavier_uniform_, - ) - self._value_branch = SlimFC( - in_size=prev_layer_size, - out_size=1, - activation_fn=None, - initializer=torch.nn.init.xavier_uniform_, - ) - # torch.set_printoptions(profile="full") - - @override(TorchModelV2) - def forward( - self, - input_dict: Dict[str, TensorType], - state: List[TensorType], - seq_lens: TensorType, - ) -> (TensorType, List[TensorType]): - - observations = input_dict[SampleBatch.OBS] - # print("unbatch", input_dict["obs"]["obs"].unbatch_all()[0]) - # print(input_dict["obs"]) - self._debug_batch_size = len(input_dict["obs"]["obs"].unbatch_all()) - if not input_dict["obs"]["obs"].unbatch_all()[0]: - return ( - np.zeros((self._debug_batch_size, 4)), - [], - ) - - results = [] - for obs in input_dict["obs"]["obs"].unbatch_all(): - batch = torch.cat(obs) - out = self.model({"obs": batch}) - print(out.size()) - - return np.zeros((self._debug_batch_size, 4)), state - - @override(TorchModelV2) - def value_function(self) -> TensorType: - return torch.zeros(self._debug_batch_size) diff --git a/godot_rl/download_utils/download_examples.py b/godot_rl/download_utils/download_examples.py index aaa8d791..09cd5c94 100644 --- a/godot_rl/download_utils/download_examples.py +++ b/godot_rl/download_utils/download_examples.py @@ -2,36 +2,35 @@ import os import shutil -from sys import platform -import wget from zipfile import ZipFile -BANCHES = {"4" : "main", - "3" : "godot3.5"} +import wget + +BRANCHES = {"4": "main", "3": "godot3.5"} + +BASE_URL = "https://github.com/edbeeching/godot_rl_agents_examples" -BASE_URL="https://github.com/edbeeching/godot_rl_agents_examples" def download_examples(): - #select branch + # select branch print("Select Godot version:") - for key in BANCHES.keys(): - print(f"{key} : {BANCHES[key]}") - + for key in BRANCHES.keys(): + print(f"{key} : {BRANCHES[key]}") + branch = input("Enter your choice: ") - BRANCH = BANCHES[branch] + BRANCH = BRANCHES[branch] os.makedirs("examples", exist_ok=True) - URL=f"{BASE_URL}/archive/refs/heads/{BRANCH}.zip" + URL = f"{BASE_URL}/archive/refs/heads/{BRANCH}.zip" print(f"downloading examples from {URL}") wget.download(URL, out="") print() - print(f"unzipping") - with ZipFile(f"{BRANCH}.zip", 'r') as zipObj: - # Extract all the contents of zip file in different directory - zipObj.extractall('examples/') - print(f"cleaning up") + print("unzipping") + with ZipFile(f"{BRANCH}.zip", "r") as zipObj: + # Extract all the contents of zip file in different directory + zipObj.extractall("examples/") + print("cleaning up") os.remove(f"{BRANCH}.zip") - print(f"moving files") + print("moving files") for file in os.listdir(f"examples/godot_rl_agents_examples-{BRANCH}"): shutil.move(f"examples/godot_rl_agents_examples-{BRANCH}/{file}", "examples") os.rmdir(f"examples/godot_rl_agents_examples-{BRANCH}") - \ No newline at end of file diff --git a/godot_rl/download_utils/download_godot_editor.py b/godot_rl/download_utils/download_godot_editor.py index 170e6c8e..c0bd0ae9 100644 --- a/godot_rl/download_utils/download_godot_editor.py +++ b/godot_rl/download_utils/download_godot_editor.py @@ -1,17 +1,15 @@ import os -import shutil from sys import platform -import wget from zipfile import ZipFile -BASE_URL="https://downloads.tuxfamily.org/godotengine/" -VERSIONS = { - "3": "3.5.1", - "4": "4.0" -} +import wget + +BASE_URL = "https://downloads.tuxfamily.org/godotengine/" +VERSIONS = {"3": "3.5.1", "4": "4.0"} MOST_RECENT_VERSION = "rc5" + def get_version(): while True: version = input("Which Godot version do you want to download (3 or 4)? ") @@ -19,6 +17,7 @@ def get_version(): return version print("Invalid version. Please enter 3 or 4.") + def download_editor(): version = get_version() VERSION = VERSIONS[version] @@ -27,17 +26,17 @@ def download_editor(): if VERSION == "4.0": NEW_BASE_URL = f"{BASE_URL}{VERSION}/{MOST_RECENT_VERSION}/" NAME = MOST_RECENT_VERSION - LINUX_FILENAME=f"Godot_v{VERSION}-{NAME}_linux.x86_64.zip" + LINUX_FILENAME = f"Godot_v{VERSION}-{NAME}_linux.x86_64.zip" if VERSION == "4.0": - MAC_FILENAME=f"Godot_v{VERSION}-{NAME}_macos.universal.zip" + MAC_FILENAME = f"Godot_v{VERSION}-{NAME}_macos.universal.zip" else: - MAC_FILENAME=f"Godot_v{VERSION}-{NAME}_osx.universal.64.zip" - WINDOWS_FILENAME=f"Godot_v{VERSION}-{NAME}_win64.exe.zip" + MAC_FILENAME = f"Godot_v{VERSION}-{NAME}_osx.universal.64.zip" + WINDOWS_FILENAME = f"Godot_v{VERSION}-{NAME}_win64.exe.zip" os.makedirs("editor", exist_ok=True) - FILENAME="" + FILENAME = "" if platform == "linux" or platform == "linux2": - FILENAME = LINUX_FILENAME + FILENAME = LINUX_FILENAME elif platform == "darwin": FILENAME = MAC_FILENAME elif platform == "win32" or platform == "win64": @@ -45,14 +44,14 @@ def download_editor(): else: raise NotImplementedError - URL=f"{NEW_BASE_URL}{FILENAME}" + URL = f"{NEW_BASE_URL}{FILENAME}" print(f"downloading editor {FILENAME} for platform: {platform}") wget.download(URL, out="") print() - print(f"unzipping") - with ZipFile(FILENAME, 'r') as zipObj: - # Extract all the contents of zip file in different directory - zipObj.extractall('editor/') - print(f"cleaning up") - os.remove(FILENAME) \ No newline at end of file + print("unzipping") + with ZipFile(FILENAME, "r") as zipObj: + # Extract all the contents of zip file in different directory + zipObj.extractall("editor/") + print("cleaning up") + os.remove(FILENAME) diff --git a/godot_rl/download_utils/from_hub.py b/godot_rl/download_utils/from_hub.py index b60a773b..c0bb36d3 100644 --- a/godot_rl/download_utils/from_hub.py +++ b/godot_rl/download_utils/from_hub.py @@ -1,5 +1,6 @@ import argparse import os + from huggingface_hub import Repository @@ -17,7 +18,7 @@ def main(): parser.add_argument( "-r", "--hf_repository", - help="Repo id of the dataset / environment repository from the Hugging Face Hub in the form user_name/repo_name", + help="Repo id of the dataset / environment repo from the Hugging Face Hub in the form user_name/repo_name", type=str, ) parser.add_argument( diff --git a/godot_rl/main.py b/godot_rl/main.py index f856e673..3de3027d 100644 --- a/godot_rl/main.py +++ b/godot_rl/main.py @@ -1,14 +1,14 @@ """ This is the main entrypoint to the Godot RL Agents interface -Example usage is best found in the documentation: +Example usage is best found in the documentation: https://github.com/edbeeching/godot_rl_agents/blob/main/docs/EXAMPLE_ENVIRONMENTS.md Hyperparameters and training algorithm can be defined in a .yaml file, see ppo_test.yaml as an example. Interactive Training: -With the Godot editor open, type gdrl in the terminal to launch training and +With the Godot editor open, type gdrl in the terminal to launch training and then press PLAY in the Godot editor. Training can be stopped with CTRL+C or by pressing STOP in the editor. @@ -25,52 +25,70 @@ try: from godot_rl.wrappers.ray_wrapper import rllib_training except ImportError as e: + error_message = str(e) + def rllib_training(args, extras): - print("Import error when trying to use rllib. If you have not installed the package, try: pip install godot-rl[rllib]") - print("Otherwise try fixing the error above.") + print("Import error importing rllib. If you have not installed the package, try: pip install godot-rl[rllib]") + print("Otherwise try fixing the error.", error_message) try: from godot_rl.wrappers.stable_baselines_wrapper import stable_baselines_training except ImportError as e: + error_message = str(e) + def stable_baselines_training(args, extras): - print( - "Import error when trying to use sb3. If you have not installed the package, try: pip install godot-rl[sb3]" - ) - print("Otherwise try fixing the error above.") + print("Import error importing sb3. If you have not installed the package, try: pip install godot-rl[sb3]") + print("Otherwise try fixing the error.", error_message) + try: - from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training, sample_factory_enjoy + from godot_rl.wrappers.sample_factory_wrapper import sample_factory_enjoy, sample_factory_training except ImportError as e: + error_message = str(e) + def sample_factory_training(args, extras): print( - "Import error when trying to use sample-factory If you have not installed the package, try: pip install godot-rl[sf]" + "Import error importing sample-factory If you have not installed the package, try: pip install godot-rl[sf]" ) - print("Otherwise try fixing the error above.") + print("Otherwise try fixing the error.", error_message) def get_args(): parser = argparse.ArgumentParser(allow_abbrev=False) - parser.add_argument("--trainer", default="sb3", choices=["sb3", "sf", "rllib"], type=str, help="framework to use (rllib, sf, sb3)") + parser.add_argument( + "--trainer", default="sb3", choices=["sb3", "sf", "rllib"], type=str, help="framework to use (rllib, sf, sb3)" + ) parser.add_argument("--env_path", default=None, type=str, help="Godot binary to use") - parser.add_argument("--config_file", default="ppo_test.yaml", type=str, help="The yaml config file [only for rllib]") + parser.add_argument( + "--config_file", default="ppo_test.yaml", type=str, help="The yaml config file [only for rllib]" + ) parser.add_argument("--restore", default=None, type=str, help="the location of a checkpoint to restore from") parser.add_argument("--eval", default=False, action="store_true", help="whether to eval the model") parser.add_argument("--speedup", default=1, type=int, help="whether to speed up the physics in the env") parser.add_argument("--export", default=False, action="store_true", help="wheter to export the model") parser.add_argument("--num_gpus", default=None, type=int, help="Number of GPUs to use [only for rllib]") - parser.add_argument("--experiment_dir", default=None, type=str, help="The name of the the experiment directory, in which the tensorboard logs are getting stored") - parser.add_argument("--experiment_name", default="experiment", type=str, help="The name of the the experiment, which will be displayed in tensborboard") + parser.add_argument( + "--experiment_dir", + default=None, + type=str, + help="The name of the the experiment directory, in which the tensorboard logs are getting stored", + ) + parser.add_argument( + "--experiment_name", + default="experiment", + type=str, + help="The name of the the experiment, which will be displayed in tensborboard", + ) parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process") parser.add_argument("--seed", default=0, type=int, help="seed of the experiment") - - args, extras = parser.parse_known_args() + + args, extras = parser.parse_known_args() if args.experiment_dir is None: args.experiment_dir = f"logs/{args.trainer}" - + if args.trainer == "sf" and args.env_path is None: - print("WARNING: the sample-factory intergration is not designed to run in interactive mode, please export you game to use this trainer") - + print("WARNING: the sample-factory intergration is not designed to run in interactive mode, export you game") return args, extras diff --git a/godot_rl/wrappers/clean_rl_wrapper.py b/godot_rl/wrappers/clean_rl_wrapper.py index 0c13fdfe..0059dfc2 100644 --- a/godot_rl/wrappers/clean_rl_wrapper.py +++ b/godot_rl/wrappers/clean_rl_wrapper.py @@ -1,15 +1,15 @@ -import numpy as np +from typing import Any, Optional + import gymnasium as gym +import numpy as np from numpy import ndarray -from godot_rl.core.utils import lod_to_dol from godot_rl.core.godot_env import GodotEnv -from typing import Any, Dict, List, Optional, Tuple, Union +from godot_rl.core.utils import lod_to_dol class CleanRLGodotEnv: def __init__(self, env_path: Optional[str] = None, n_parallel: int = 1, seed: int = 0, **kwargs: object) -> None: - # If we are doing editor training, n_parallel must be 1 if env_path is None and n_parallel > 1: raise ValueError("You must provide the path to a exported game executable if n_parallel > 1") @@ -18,8 +18,10 @@ def __init__(self, env_path: Optional[str] = None, n_parallel: int = 1, seed: in port = kwargs.pop("port", GodotEnv.DEFAULT_PORT) # Create a list of GodotEnv instances - self.envs = [GodotEnv(env_path=env_path, convert_action_space=True, port=port + p, seed=seed + p, **kwargs) for - p in range(n_parallel)] + self.envs = [ + GodotEnv(env_path=env_path, convert_action_space=True, port=port + p, seed=seed + p, **kwargs) + for p in range(n_parallel) + ] # Store the number of parallel environments self.n_parallel = n_parallel @@ -29,7 +31,7 @@ def _check_valid_action_space(self) -> None: action_space = self.envs[0].action_space if isinstance(action_space, gym.spaces.Tuple): assert ( - len(action_space.spaces) == 1 + len(action_space.spaces) == 1 ), f"sb3 supports a single action space, this env contains multiple spaces {action_space}" def step(self, action: np.ndarray) -> tuple[ndarray, list[Any], list[Any], list[Any], list[Any]]: @@ -45,7 +47,7 @@ def step(self, action: np.ndarray) -> tuple[ndarray, list[Any], list[Any], list[ # Send actions to each environment for i in range(self.n_parallel): - self.envs[i].step_send(action[i * num_envs:(i + 1) * num_envs]) + self.envs[i].step_send(action[i * num_envs : (i + 1) * num_envs]) # Receive results from each environment for i in range(self.n_parallel): diff --git a/godot_rl/wrappers/onnx/stable_baselines_export.py b/godot_rl/wrappers/onnx/stable_baselines_export.py index f39d0b32..19f679f9 100644 --- a/godot_rl/wrappers/onnx/stable_baselines_export.py +++ b/godot_rl/wrappers/onnx/stable_baselines_export.py @@ -41,11 +41,12 @@ def export_ppo_model_as_onnx(ppo: PPO, onnx_model_path: str): opset_version=9, input_names=["obs", "state_ins"], output_names=["output", "state_outs"], - dynamic_axes={'obs' : {0 : 'batch_size'}, - 'state_ins' : {0 : 'batch_size'}, # variable length axes - 'output' : {0 : 'batch_size'}, - 'state_outs' : {0 : 'batch_size'}} - + dynamic_axes={ + "obs": {0: "batch_size"}, + "state_ins": {0: "batch_size"}, # variable length axes + "output": {0: "batch_size"}, + "state_outs": {0: "batch_size"}, + }, ) verify_onnx_export(ppo, onnx_model_path) @@ -59,7 +60,7 @@ def verify_onnx_export(ppo: PPO, onnx_model_path: str, num_tests=10): onnx.checker.check_model(onnx_model) sb3_model = ppo.policy.to("cpu") - ort_sess = ort.InferenceSession(onnx_model_path, providers=['CPUExecutionProvider']) + ort_sess = ort.InferenceSession(onnx_model_path, providers=["CPUExecutionProvider"]) for i in range(num_tests): obs = dict(ppo.observation_space.sample()) diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py index e163fdd4..d1fc68a1 100644 --- a/godot_rl/wrappers/ray_wrapper.py +++ b/godot_rl/wrappers/ray_wrapper.py @@ -1,6 +1,6 @@ import os import pathlib -from typing import Callable, List, Optional, Tuple +from typing import List, Optional, Tuple import numpy as np import ray @@ -25,7 +25,6 @@ def __init__( timeout_wait=60, config=None, ) -> None: - self._env = GodotEnv( env_path=env_path, port=port, @@ -33,7 +32,7 @@ def __init__( show_window=show_window, framerate=framerate, action_repeat=action_repeat, - speedup=speedup + speedup=speedup, ) super().__init__( observation_space=self._env.observation_space, @@ -41,9 +40,11 @@ def __init__( num_envs=self._env.num_envs, ) - def vector_reset(self, *, seeds: Optional[List[int]] = None, options: Optional[List[dict]] = None) -> List[EnvObsType]: + def vector_reset( + self, *, seeds: Optional[List[int]] = None, options: Optional[List[dict]] = None + ) -> List[EnvObsType]: self.obs, info = self._env.reset() - return self.obs, info + return self.obs, info def vector_step( self, actions: List[EnvActionType] @@ -55,12 +56,13 @@ def vector_step( def get_unwrapped(self): return [self._env] - def reset_at(self, - index: Optional[int] = None, - *, - seed: Optional[int] = None, - options: Optional[dict] = None, - ) -> EnvObsType: + def reset_at( + self, + index: Optional[int] = None, + *, + seed: Optional[int] = None, + options: Optional[dict] = None, + ) -> EnvObsType: # the env is reset automatically, no need to reset it return self.obs[index], {} @@ -81,44 +83,48 @@ def register_env(): ) -def rllib_export(model_path): - #get path from the config file and remove the file name - path = model_path #full path with file name - path = path.split("/") #split the path into a list - path = path[:-1] #remove the file name from the list - #duplicate the path for the export - export_path = path.copy() - export_path.append("onnx") - export_path = "/".join(export_path) #join the list into a string - #duplicate the last element of the list - path.append(path[-1]) - #change format from checkpoint_000500 to checkpoint-500 - temp = path[-1].split("_") - temp = temp[-1] - #parse the number - temp = int(temp) - #back to string - temp = str(temp) - #join the string with the new format - path[-1] = "checkpoint-" + temp - path = "/".join(path) #join the list into a string - #best_checkpoint = results.get_best_checkpoint(results.trials[0], mode="max") - #print(f".. best checkpoint was: {best_checkpoint}") - - #From here on, the relevant part to exporting the model - new_trainer = PPOTrainer(config=exp["config"]) - new_trainer.restore(path) - #policy = new_trainer.get_policy() - new_trainer.export_policy_model(export_dir=export_path, onnx = 9) #This works for version 1.11.X - #Running with: gdrl --env_path envs/builds/JumperHard/jumper_hard.exe --export --restore envs/checkpoints/jumper_hard/checkpoint_000500/checkpoint-500 - #model = policy.model - #export the model to onnx using torch.onnx.export - #dummy_input = torch.randn(1, 3, 84, 84) - #input is dictionary with key "obs" and value is a tensor of shape [...,8] - #tensor = torch.randn([1, 2, 4, 6, 8, 10, 12, 14]) - #dummy_input = {"obs": tensor} - #torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, - #dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}) +# TODO: fix this implementation +# def rllib_export(model_path): +# # get path from the config file and remove the file name +# path = model_path # full path with file name +# path = path.split("/") # split the path into a list +# path = path[:-1] # remove the file name from the list +# # duplicate the path for the export +# export_path = path.copy() +# export_path.append("onnx") +# export_path = "/".join(export_path) # join the list into a string +# # duplicate the last element of the list +# path.append(path[-1]) +# # change format from checkpoint_000500 to checkpoint-500 +# temp = path[-1].split("_") +# temp = temp[-1] +# # parse the number +# temp = int(temp) +# # back to string +# temp = str(temp) +# # join the string with the new format +# path[-1] = "checkpoint-" + temp +# path = "/".join(path) # join the list into a string +# # best_checkpoint = results.get_best_checkpoint(results.trials[0], mode="max") +# # print(f".. best checkpoint was: {best_checkpoint}") + +# # From here on, the relevant part to exporting the model +# new_trainer = PPOTrainer(config=exp["config"]) +# new_trainer.restore(path) +# # policy = new_trainer.get_policy() +# new_trainer.export_policy_model(export_dir=export_path, onnx=9) # This works for version 1.11.X + + +# Running with: gdrl --env_path envs/builds/JumperHard/jumper_hard.exe --export \ +# --restore envs/checkpoints/jumper_hard/checkpoint_000500/checkpoint-500 +# model = policy.model +# export the model to onnx using torch.onnx.export +# dummy_input = torch.randn(1, 3, 84, 84) +# input is dictionary with key "obs" and value is a tensor of shape [...,8] +# tensor = torch.randn([1, 2, 4, 6, 8, 10, 12, 14]) +# dummy_input = {"obs": tensor} +# torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, +# dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}) def rllib_training(args, extras): @@ -135,7 +141,7 @@ def rllib_training(args, extras): run_name = exp["algorithm"] + "/editor" print("run_name", run_name) - if args.num_gpus != None: + if args.num_gpus is not None: exp["config"]["num_gpus"] = args.num_gpus if args.env_path is None: @@ -143,11 +149,10 @@ def rllib_training(args, extras): exp["config"]["num_workers"] = 1 checkpoint_freq = 10 - checkpoint_at_end = True - + exp["config"]["env_config"]["show_window"] = args.viz exp["config"]["env_config"]["speedup"] = args.speedup - + if args.eval or args.export: checkpoint_freq = 0 exp["config"]["env_config"]["show_window"] = True @@ -166,21 +171,22 @@ def rllib_training(args, extras): ray.init(num_gpus=exp["config"]["num_gpus"] or 1) if not args.export: - results = tune.run( - exp["algorithm"], - name=run_name, - config=exp["config"], - stop=exp["stop"], - verbose=3, - checkpoint_freq=checkpoint_freq, - checkpoint_at_end=not args.eval, - restore=args.restore, - local_dir=os.path.abspath(args.experiment_dir) or os.path.abspath("logs/rllib"), - trial_name_creator=lambda trial: f"{args.experiment_name}" if args.experiment_name else f"{trial.trainable_name}_{trial.trial_id}" - ) + tune.run( + exp["algorithm"], + name=run_name, + config=exp["config"], + stop=exp["stop"], + verbose=3, + checkpoint_freq=checkpoint_freq, + checkpoint_at_end=not args.eval, + restore=args.restore, + local_dir=os.path.abspath(args.experiment_dir) or os.path.abspath("logs/rllib"), + trial_name_creator=lambda trial: f"{args.experiment_name}" + if args.experiment_name + else f"{trial.trainable_name}_{trial.trial_id}", + ) if args.export: - rllib_export(args.restore) + raise NotImplementedError("Exporting is not (re)implemented yet") + # rllib_export(args.restore) ray.shutdown() - - diff --git a/godot_rl/wrappers/sample_factory_wrapper.py b/godot_rl/wrappers/sample_factory_wrapper.py index c38f8b44..2c01a984 100644 --- a/godot_rl/wrappers/sample_factory_wrapper.py +++ b/godot_rl/wrappers/sample_factory_wrapper.py @@ -1,15 +1,16 @@ import argparse from functools import partial -import random + import numpy as np +from gymnasium import Env from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args +from sample_factory.enjoy import enjoy from sample_factory.envs.env_utils import register_env from sample_factory.train import run_rl -from sample_factory.enjoy import enjoy from godot_rl.core.godot_env import GodotEnv from godot_rl.core.utils import lod_to_dol -from gymnasium import Env + class SampleFactoryEnvWrapperBatched(GodotEnv, Env): @property @@ -32,7 +33,6 @@ def step(self, action): @staticmethod def to_numpy(lod): - for d in lod: for k, v in d.items(): d[k] = np.array(v) @@ -51,6 +51,7 @@ def unwrapped(self): @property def num_agents(self): return self.num_envs + def reset(self, seed=None, options=None): obs, info = super().reset(seed=seed) return self.to_numpy(obs), info @@ -61,7 +62,6 @@ def step(self, action): @staticmethod def to_numpy(lod): - for d in lod: for k, v in d.items(): d[k] = np.array(v) @@ -72,7 +72,9 @@ def render(): return -def make_godot_env_func(env_path, full_env_name, cfg=None, env_config=None, render_mode=None, seed=0, speedup=1, viz=False): +def make_godot_env_func( + env_path, full_env_name, cfg=None, env_config=None, render_mode=None, seed=0, speedup=1, viz=False +): port = cfg.base_port print("BASE PORT ", cfg.base_port) show_window = False @@ -168,7 +170,7 @@ def parse_gdrl_args(args, argv=None, evaluation=False): add_gdrl_env_args(partial_cfg.env, parser, evaluation=evaluation) gdrl_override_defaults(partial_cfg.env, parser) final_cfg = parse_full_cfg(parser, argv) - + final_cfg.train_dir = args.experiment_dir or "logs/sf" final_cfg.experiment = args.experiment_name or final_cfg.experiment return final_cfg @@ -177,7 +179,7 @@ def parse_gdrl_args(args, argv=None, evaluation=False): def sample_factory_training(args, extras): register_gdrl_env(args) cfg = parse_gdrl_args(args=args, argv=extras, evaluation=args.eval) - #cfg.base_port = random.randint(20000, 22000) + # cfg.base_port = random.randint(20000, 22000) status = run_rl(cfg) return status diff --git a/godot_rl/wrappers/sbg_single_obs_wrapper.py b/godot_rl/wrappers/sbg_single_obs_wrapper.py index f4840cd1..d4136430 100644 --- a/godot_rl/wrappers/sbg_single_obs_wrapper.py +++ b/godot_rl/wrappers/sbg_single_obs_wrapper.py @@ -2,11 +2,15 @@ import gymnasium as gym import numpy as np + from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv +# A variant of the Stable Baselines Godot Env that only supports a single +# obs space from the dictionary - obs["obs"] by default. + +# This provides some basic support for using envs that have a single obs +# space with policies other than MultiInputPolicy. -# A variant of the Stable Baselines Godot Env that only supports a single obs space from the dictionary - obs["obs"] by default. -# This provides some basic support for using envs that have a single obs space with policies other than MultiInputPolicy. class SBGSingleObsEnv(StableBaselinesGodotEnv): def __init__(self, obs_key="obs", *args, **kwargs) -> None: diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py index fb723e3d..8d7493cf 100644 --- a/godot_rl/wrappers/stable_baselines_wrapper.py +++ b/godot_rl/wrappers/stable_baselines_wrapper.py @@ -1,9 +1,10 @@ +from typing import Any, Dict, List, Optional, Tuple + import gymnasium as gym import numpy as np from stable_baselines3 import PPO from stable_baselines3.common.vec_env.base_vec_env import VecEnv from stable_baselines3.common.vec_env.vec_monitor import VecMonitor -from typing import Any, Dict, List, Optional, Tuple, Union from godot_rl.core.godot_env import GodotEnv from godot_rl.core.utils import can_import, lod_to_dol @@ -14,13 +15,16 @@ def __init__(self, env_path: Optional[str] = None, n_parallel: int = 1, seed: in # If we are doing editor training, n_parallel must be 1 if env_path is None and n_parallel > 1: raise ValueError("You must provide the path to a exported game executable if n_parallel > 1") - + # Define the default port port = kwargs.pop("port", GodotEnv.DEFAULT_PORT) # Create a list of GodotEnv instances - self.envs = [GodotEnv(env_path=env_path, convert_action_space=True, port=port+p, seed=seed+p, **kwargs) for p in range(n_parallel)] - + self.envs = [ + GodotEnv(env_path=env_path, convert_action_space=True, port=port + p, seed=seed + p, **kwargs) + for p in range(n_parallel) + ] + # Store the number of parallel environments self.n_parallel = n_parallel @@ -51,7 +55,7 @@ def step(self, action: np.ndarray) -> Tuple[Dict[str, np.ndarray], np.ndarray, n # Send actions to each environment for i in range(self.n_parallel): - self.envs[i].step_send(action[i*num_envs:(i+1)*num_envs]) + self.envs[i].step_send(action[i * num_envs : (i + 1) * num_envs]) # Receive results from each environment for i in range(self.n_parallel): @@ -109,12 +113,12 @@ def env_is_wrapped(self, wrapper_class: type, indices: Optional[List[int]] = Non def env_method(self): raise NotImplementedError() - def get_attr(self, attr_name: str, indices = None) -> List[Any]: + def get_attr(self, attr_name: str, indices=None) -> List[Any]: if attr_name == "render_mode": return [None for _ in range(self.num_envs)] raise AttributeError("get attr not fully implemented in godot-rl StableBaselinesWrapper") - def seed(self, seed = None): + def seed(self, seed=None): raise NotImplementedError() def set_attr(self): @@ -128,6 +132,7 @@ def step_wait(self) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List # Wait for the results from the asynchronous step return self.results + def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> None: if can_import("ray"): print("WARNING, stable baselines and ray[rllib] are not compatable") diff --git a/setup.cfg b/setup.cfg index c0329fec..b0d7eb2a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,13 +35,12 @@ console_scripts = test = pytest>=6.0 pytest-xdist - dev = pytest>=6.0 pytest-xdist - black[jupyter]~=22.0 - flake8>=3.8.3 - isort>=5.0.0 + black + flake8 + isort pyyaml>=5.3.1 sf = @@ -53,3 +52,5 @@ rllib = cleanrl = wandb +[flake8] +ignore = E203, E501, E741, W503, W605 diff --git a/tests/benchmark_env.py b/tests/benchmark_env.py index 0043708a..d5cc4f6a 100644 --- a/tests/benchmark_env.py +++ b/tests/benchmark_env.py @@ -31,7 +31,6 @@ results = {} for framerate, port in zip(framerates, ports): - env = GodotEnv( env_path=env_path, port=port, @@ -44,7 +43,6 @@ action_space = env.action_space start = time.time() for i in range(N_STEPS): - actions = [action_space.sample() for _ in range(n_envs)] _ = env.step(actions) diff --git a/tests/test_action_space_preprocessor.py b/tests/test_action_space_preprocessor.py index 4496bbe5..a9650e2a 100644 --- a/tests/test_action_space_preprocessor.py +++ b/tests/test_action_space_preprocessor.py @@ -1,26 +1,24 @@ import pytest -from gymnasium.spaces import Tuple, Dict, Box, Discrete -from godot_rl.core.godot_env import GodotEnv +from gymnasium.spaces import Box, Discrete, Tuple + from godot_rl.core.utils import ActionSpaceProcessor -@pytest.mark.parametrize("action_space", + +@pytest.mark.parametrize( + "action_space", [ - Tuple([Box(-1,1, shape=[7]), Box(-1,1, shape=[11])]), - Tuple([Box(-1,1, shape=[7]), Discrete(2)]), + Tuple([Box(-1, 1, shape=[7]), Box(-1, 1, shape=[11])]), + Tuple([Box(-1, 1, shape=[7]), Discrete(2)]), Tuple([Discrete(2), Discrete(2)]), - Tuple([Discrete(2), Discrete(2), Box(-1,1, shape=[11])]), - ] - - - + Tuple([Discrete(2), Discrete(2), Box(-1, 1, shape=[11])]), + ], ) def test_action_space_preprocessor(action_space): - expected_output = 0 for space in action_space.spaces: if isinstance(space, Box): - assert len(space.shape) ==1 + assert len(space.shape) == 1 expected_output += space.shape[0] elif isinstance(space, Discrete): if space.n > 2: diff --git a/tests/test_call_method.py b/tests/test_call_method.py index 5a1d9fbb..cd0e78de 100644 --- a/tests/test_call_method.py +++ b/tests/test_call_method.py @@ -1,5 +1,3 @@ -import time - from godot_rl.core.godot_env import GodotEnv if __name__ == "__main__": diff --git a/tests/test_godot_env.py b/tests/test_godot_env.py index 2b9435af..7d7e9361 100644 --- a/tests/test_godot_env.py +++ b/tests/test_godot_env.py @@ -18,7 +18,6 @@ def test_env_ij(env_name, port, n_agents): env = GodotEnv(env_path=env_path, port=port) action_space = env.action_space - observation_space = env.observation_space n_envs = env.num_envs for j in range(2): @@ -37,12 +36,8 @@ def test_env_ij(env_name, port, n_agents): assert isinstance( reward[0], (float, int) ), f"The reward returned by 'step()' must be a float or int, and is {reward[0]} of type {type(reward[0])}" - assert isinstance( - term[0], bool - ), f"The 'done' signal {term[0]} {type(term[0])} must be a boolean" - assert isinstance( - info[0], dict - ), "The 'info' returned by 'step()' must be a python dictionary" + assert isinstance(term[0], bool), f"The 'done' signal {term[0]} {type(term[0])} must be a boolean" + assert isinstance(info[0], dict), "The 'info' returned by 'step()' must be a python dictionary" env.close() @@ -62,7 +57,6 @@ def test_env_ji(env_name, port, n_agents): env = GodotEnv(env_path=env_path, port=port) action_space = env.action_space - observation_space = env.observation_space n_envs = env.num_envs assert n_envs == n_agents for j in range(2): @@ -82,11 +76,7 @@ def test_env_ji(env_name, port, n_agents): assert isinstance( reward[0], (float, int) ), f"The reward returned by 'step()' must be a float or int, and is {reward[0]} of type {type(reward[0])}" - assert isinstance( - term[0], bool - ), f"The 'done' signal {term[0]} {type(term[0])} must be a boolean" - assert isinstance( - info[0], dict - ), "The 'info' returned by 'step()' must be a python dictionary" + assert isinstance(term[0], bool), f"The 'done' signal {term[0]} {type(term[0])} must be a boolean" + assert isinstance(info[0], dict), "The 'info' returned by 'step()' must be a python dictionary" env.close() diff --git a/tests/test_rllib.py b/tests/test_rllib.py index 574d2b82..3ed48a0e 100644 --- a/tests/test_rllib.py +++ b/tests/test_rllib.py @@ -2,13 +2,14 @@ from godot_rl.core.utils import cant_import + @pytest.mark.skipif(cant_import("ray"), reason="ray[rllib] is not available") def test_rllib_training(): - from godot_rl.wrappers.ray_wrapper import rllib_training from godot_rl.main import get_args + from godot_rl.wrappers.ray_wrapper import rllib_training + args, extras = get_args() args.config_file = "tests/fixtures/test_rllib.yaml" args.env_path = "examples/godot_rl_JumperHard/bin/JumperHard.x86_64" - rllib_training(args, extras) diff --git a/tests/test_sample_factory.py b/tests/test_sample_factory.py index eaa9c826..86e72b4e 100644 --- a/tests/test_sample_factory.py +++ b/tests/test_sample_factory.py @@ -2,16 +2,17 @@ from godot_rl.core.utils import cant_import + @pytest.mark.skipif(cant_import("sample_factory"), reason="sample_factory is not available") def test_sample_factory_training(): - from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training from examples.sample_factory_example import get_args + from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training + args, extras = get_args() args.env_path = "examples/godot_rl_JumperHard/bin/JumperHard.x86_64" extras = [] - extras.append('--env=gdrl') - extras.append('--train_for_env_steps=1000') - extras.append('--device=cpu') - + extras.append("--env=gdrl") + extras.append("--train_for_env_steps=1000") + extras.append("--device=cpu") + sample_factory_training(args, extras) - diff --git a/tests/test_sb3_onnx_export.py b/tests/test_sb3_onnx_export.py index d3160492..d452518d 100644 --- a/tests/test_sb3_onnx_export.py +++ b/tests/test_sb3_onnx_export.py @@ -22,8 +22,9 @@ ) def test_pytorch_vs_onnx(env_name, port): from stable_baselines3 import PPO - from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv + from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx, verify_onnx_export + from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv env_path = f"examples/godot_rl_{env_name}/bin/{env_name}.x86_64" env = StableBaselinesGodotEnv(env_path, port=port) diff --git a/tests/test_sb3_training.py b/tests/test_sb3_training.py index 4b72cbc9..e0561a54 100644 --- a/tests/test_sb3_training.py +++ b/tests/test_sb3_training.py @@ -1,7 +1,7 @@ import pytest -from godot_rl.main import get_args from godot_rl.core.utils import can_import +from godot_rl.main import get_args @pytest.mark.skipif(can_import("ray"), reason="rllib and sb3 are not compatable") @@ -30,6 +30,4 @@ def test_sb3_training(env_name, port, n_parallel): args.speedup = 8 starting_port = port + n_parallel - stable_baselines_training( - args, extras, n_steps=2, port=starting_port, n_parallel=n_parallel - ) + stable_baselines_training(args, extras, n_steps=2, port=starting_port, n_parallel=n_parallel)