Skip to content

Commit

Permalink
Merge pull request #168 from edbeeching/code-quality
Browse files Browse the repository at this point in the history
Adds code quality
  • Loading branch information
edbeeching authored Jan 24, 2024
2 parents cfaaf15 + ed8407f commit 39960d8
Show file tree
Hide file tree
Showing 31 changed files with 427 additions and 438 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/quality.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Quality

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:

check_code_quality:
name: Check code quality
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Setup Python environment
uses: actions/setup-python@v2
with:
python-version: 3.10.10
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[dev]"
- name: Code quality
run: |
make quality
12 changes: 0 additions & 12 deletions .vscode/settings.json

This file was deleted.

15 changes: 7 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
.PHONY: quality style test unity-test

# Check that source code meets quality standards
quality:
black --check --line-length 119 --target-version py38 tests godot_rl
isort --check-only tests godot_rl
flake8 tests godot_rl

# Format source code automatically
style:
black --line-length 119 --target-version py38 tests godot_rl
isort tests godot_rl
black --line-length 120 --target-version py310 tests godot_rl examples
isort -w 120 tests godot_rl examples
# Check that source code meets quality standards
quality:
black --check --line-length 120 --target-version py310 tests godot_rl examples
isort -w 120 --check-only tests godot_rl examples
flake8 --max-line-length 120 tests godot_rl examples

# Run tests for the library
test:
Expand Down
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,33 @@ Godot RL Agents supports 4 different RL training frameworks, the links below det
- [CleanRL](docs/ADV_CLEAN_RL.md) (Windows, Mac, Linux)
- [Ray rllib](docs/ADV_RLLIB.md) (Windows, Mac, Linux)

## Contributing
We welcome new contributions to the library, such as:
- New environments made in Godot
- Improvements to the readme files
- Additions to the python codebase

Start by forking the repo and then cloning it to your machine, creating a venv and performing an editable installation.

```
# If you want to PR, you should fork the lib or ask to be a contibutor
git clone git@github.com:YOUR_USERNAME/godot_rl_agents.git
cd godot_rl_agents
python -m venv venv
pip install -e ".[dev]"
# check tests run
make test
```

Then add your features.
Format your code with:
```
make style
make quality
```
Then make a PR against main on the original repo.


## FAQ

### Why have we developed Godot RL Agents?
Expand Down
22 changes: 12 additions & 10 deletions examples/clean_rl_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
import pathlib
import random
import time
from distutils.util import strtobool
from collections import deque
from distutils.util import strtobool

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions.normal import Normal
from torch.utils.tensorboard import SummaryWriter

from godot_rl.wrappers.clean_rl_wrapper import CleanRLGodotEnv


Expand Down Expand Up @@ -167,8 +169,9 @@ def get_action_and_value(self, x, action=None):

# env setup

envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, seed=args.seed,
n_parallel=args.n_parallel)
envs = env = CleanRLGodotEnv(
env_path=args.env_path, show_window=args.viz, speedup=args.speedup, seed=args.seed, n_parallel=args.n_parallel
)
args.num_envs = envs.num_envs
args.batch_size = int(args.num_envs * args.num_steps)
args.minibatch_size = int(args.batch_size // args.num_minibatches)
Expand Down Expand Up @@ -334,7 +337,6 @@ def get_action_and_value(self, x, action=None):

agent.eval().to("cpu")


class OnnxPolicy(torch.nn.Module):
def __init__(self, actor_mean):
super().__init__()
Expand All @@ -344,7 +346,6 @@ def forward(self, obs, state_ins):
action_mean = self.actor_mean(obs)
return action_mean, state_ins


onnx_policy = OnnxPolicy(agent.actor_mean)
dummy_input = torch.unsqueeze(torch.tensor(envs.single_observation_space.sample()), 0)

Expand All @@ -355,9 +356,10 @@ def forward(self, obs, state_ins):
opset_version=15,
input_names=["obs", "state_ins"],
output_names=["output", "state_outs"],
dynamic_axes={'obs': {0: 'batch_size'},
'state_ins': {0: 'batch_size'}, # variable length axes
'output': {0: 'batch_size'},
'state_outs': {0: 'batch_size'}}

dynamic_axes={
"obs": {0: "batch_size"},
"state_ins": {0: "batch_size"}, # variable length axes
"output": {0: "batch_size"},
"state_outs": {0: "batch_size"},
},
)
16 changes: 10 additions & 6 deletions examples/sample_factory_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training, sample_factory_enjoy

from godot_rl.wrappers.sample_factory_wrapper import sample_factory_enjoy, sample_factory_training


def get_args():
Expand All @@ -10,8 +11,12 @@ def get_args():
parser.add_argument("--seed", default=0, type=int, help="environment seed")
parser.add_argument("--export", default=False, action="store_true", help="whether to export the model")
parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
parser.add_argument("--experiment_dir", default="logs/sf", type=str,
help="The name of the experiment directory, in which the tensorboard logs are getting stored")
parser.add_argument(
"--experiment_dir",
default="logs/sf",
type=str,
help="The name of the experiment directory, in which the tensorboard logs are getting stored",
)
parser.add_argument(
"--experiment_name",
default="experiment",
Expand All @@ -22,14 +27,13 @@ def get_args():
return parser.parse_known_args()



def main():
args, extras = get_args()
if args.eval:
sample_factory_enjoy(args, extras)
else:
sample_factory_training(args, extras)


if __name__ == "__main__":
main()
92 changes: 51 additions & 41 deletions examples/stable_baselines3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
import pathlib
from typing import Callable

from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor

from godot_rl.core.utils import can_import
from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv

# To download the env source and binary:
# 1. gdrl.env_from_hub -r edbeeching/godot_rl_BallChase
Expand All @@ -28,42 +29,39 @@
default="logs/sb3",
type=str,
help="The name of the experiment directory, in which the tensorboard logs and checkpoints (if enabled) are "
"getting stored."
"getting stored.",
)
parser.add_argument(
"--experiment_name",
default="experiment",
type=str,
help="The name of the experiment, which will be displayed in tensorboard and "
"for checkpoint directory and name (if enabled).",
)
parser.add_argument(
"--seed",
type=int,
default=0,
help="seed of the experiment"
"for checkpoint directory and name (if enabled).",
)
parser.add_argument("--seed", type=int, default=0, help="seed of the experiment")
parser.add_argument(
"--resume_model_path",
default=None,
type=str,
help="The path to a model file previously saved using --save_model_path or a checkpoint saved using "
"--save_checkpoints_frequency. Use this to resume training or infer from a saved model.",
"--save_checkpoints_frequency. Use this to resume training or infer from a saved model.",
)
parser.add_argument(
"--save_model_path",
default=None,
type=str,
help="The path to use for saving the trained sb3 model after training is complete. Saved model can be used later "
"to resume training. Extension will be set to .zip",
"to resume training. Extension will be set to .zip",
)
parser.add_argument(
"--save_checkpoint_frequency",
default=None,
type=int,
help=("If set, will save checkpoints every 'frequency' environment steps. "
"Requires a unique --experiment_name or --experiment_dir for each run. "
"Does not need --save_model_path to be set. "),
help=(
"If set, will save checkpoints every 'frequency' environment steps. "
"Requires a unique --experiment_name or --experiment_dir for each run. "
"Does not need --save_model_path to be set. "
),
)
parser.add_argument(
"--onnx_export_path",
Expand All @@ -76,34 +74,38 @@
default=1_000_000,
type=int,
help="The number of environment steps to train for, default is 1_000_000. If resuming from a saved model, "
"it will continue training for this amount of steps from the saved state without counting previously trained "
"steps",
"it will continue training for this amount of steps from the saved state without counting previously trained "
"steps",
)
parser.add_argument(
"--inference",
default=False,
action="store_true",
help="Instead of training, it will run inference on a loaded model for --timesteps steps. "
"Requires --resume_model_path to be set."
"Requires --resume_model_path to be set.",
)
parser.add_argument(
"--linear_lr_schedule",
default=False,
action="store_true",
help="Use a linear LR schedule for training. If set, learning rate will decrease until it reaches 0 at "
"--timesteps"
"value. Note: On resuming training, the schedule will reset. If disabled, constant LR will be used."
"--timesteps"
"value. Note: On resuming training, the schedule will reset. If disabled, constant LR will be used.",
)
parser.add_argument(
"--viz",
action="store_true",
help="If set, the simulation will be displayed in a window during training. Otherwise "
"training will run without rendering the simulation. This setting does not apply to in-editor training.",
default=False
"training will run without rendering the simulation. This setting does not apply to in-editor training.",
default=False,
)
parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env")
parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to "
"launch - requires --env_path to be set if > 1.")
parser.add_argument(
"--n_parallel",
default=1,
type=int,
help="How many instances of the environment executable to " "launch - requires --env_path to be set if > 1.",
)
args, extras = parser.parse_known_args()


Expand Down Expand Up @@ -136,19 +138,22 @@ def close_env():

# Prevent overwriting existing checkpoints when starting a new experiment if checkpoint saving is enabled
if args.save_checkpoint_frequency is not None and os.path.isdir(path_checkpoint):
raise RuntimeError(abs_path_checkpoint + " folder already exists. "
"Use a different --experiment_dir, or --experiment_name,"
"or if previous checkpoints are not needed anymore, "
"remove the folder containing the checkpoints. ")
raise RuntimeError(
abs_path_checkpoint + " folder already exists. "
"Use a different --experiment_dir, or --experiment_name,"
"or if previous checkpoints are not needed anymore, "
"remove the folder containing the checkpoints. "
)

if args.inference and args.resume_model_path is None:
raise parser.error("Using --inference requires --resume_model_path to be set.")

if args.env_path is None and args.viz:
print("Info: Using --viz without --env_path set has no effect, in-editor training will always render.")

env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel,
speedup=args.speedup)
env = StableBaselinesGodotEnv(
env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel, speedup=args.speedup
)
env = VecMonitor(env)


Expand Down Expand Up @@ -177,13 +182,15 @@ def func(progress_remaining: float) -> float:

if args.resume_model_path is None:
learning_rate = 0.0003 if not args.linear_lr_schedule else linear_schedule(0.0003)
model: PPO = PPO("MultiInputPolicy",
env,
ent_coef=0.0001,
verbose=2,
n_steps=32,
tensorboard_log=args.experiment_dir,
learning_rate=learning_rate)
model: PPO = PPO(
"MultiInputPolicy",
env,
ent_coef=0.0001,
verbose=2,
n_steps=32,
tensorboard_log=args.experiment_dir,
learning_rate=learning_rate,
)
else:
path_zip = pathlib.Path(args.resume_model_path)
print("Loading model: " + os.path.abspath(path_zip))
Expand All @@ -201,13 +208,16 @@ def func(progress_remaining: float) -> float:
checkpoint_callback = CheckpointCallback(
save_freq=(args.save_checkpoint_frequency // env.num_envs),
save_path=path_checkpoint,
name_prefix=args.experiment_name
name_prefix=args.experiment_name,
)
learn_arguments['callback'] = checkpoint_callback
learn_arguments["callback"] = checkpoint_callback
try:
model.learn(**learn_arguments)
except KeyboardInterrupt:
print("Training interrupted by user. Will save if --save_model_path was used and/or export if --onnx_export_path was used.")
print(
"""Training interrupted by user. Will save if --save_model_path was
used and/or export if --onnx_export_path was used."""
)

close_env()
handle_onnx_export()
Expand Down
Loading

0 comments on commit 39960d8

Please sign in to comment.