Skip to content

Commit

Permalink
init v0.2.0
Browse files Browse the repository at this point in the history
init v0.2.0
  • Loading branch information
huangshiyu13 authored Dec 20, 2023
2 parents e864a08 + 2b798c0 commit a50c041
Show file tree
Hide file tree
Showing 15 changed files with 74 additions and 61 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
[![Embark](https://img.shields.io/badge/discord-OpenRL-%237289da.svg?logo=discord)](https://discord.gg/qMbVT2qBhr)
[![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://join.slack.com/t/openrlhq/shared_invite/zt-1tqwpvthd-Eeh0IxQ~DIaGqYXoW2IUQg)

OpenRL-v0.1.10 is updated on Oct 27, 2023
OpenRL-v0.2.0 is updated on Dec 20, 2023

The main branch is the latest version of OpenRL, which is under active development. If you just want to have a try with
OpenRL, you can switch to the stable branch.
Expand Down
9 changes: 4 additions & 5 deletions examples/envpool/envpool_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
from packaging import version
from stable_baselines3.common.vec_env import VecEnvWrapper as BaseWrapper
from stable_baselines3.common.vec_env import VecMonitor
from stable_baselines3.common.vec_env.base_vec_env import (VecEnvObs,
VecEnvStepReturn)
from stable_baselines3.common.vec_env.base_vec_env import VecEnvObs, VecEnvStepReturn

is_legacy_gym = version.parse(gym.__version__) < version.parse("0.26.0")

Expand Down Expand Up @@ -114,9 +113,9 @@ def __init__(

if is_wrapped_with_monitor:
warnings.warn(
"The environment is already wrapped with a `Monitor` wrapper"
"but you are wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics will be"
"overwritten by the `VecMonitor` ones.",
"The environment is already wrapped with a `Monitor` wrapperbut you are"
" wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics"
" will beoverwritten by the `VecMonitor` ones.",
UserWarning,
)

Expand Down
11 changes: 7 additions & 4 deletions examples/envpool/make_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
import envpool
from gymnasium import Env


from openrl.envs.vec_env import (AsyncVectorEnv, RewardWrapper,
SyncVectorEnv, VecMonitorWrapper)
from openrl.envs.vec_env import (
AsyncVectorEnv,
RewardWrapper,
SyncVectorEnv,
VecMonitorWrapper,
)
from openrl.envs.vec_env.vec_info import VecInfoFactory
from openrl.envs.wrappers.base_wrapper import BaseWrapper
from openrl.rewards import RewardFactory
Expand Down Expand Up @@ -76,7 +79,7 @@ def make_envpool_envs(
assert kwargs.get("env_type") in ["gym", "dm", "gymnasium"]
kwargs["envpool"] = True

if 'env_wrappers' in kwargs:
if "env_wrappers" in kwargs:
env_wrappers = kwargs.pop("env_wrappers")
else:
env_wrappers = []
Expand Down
4 changes: 2 additions & 2 deletions examples/envpool/train_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

""""""
import numpy as np

from openrl.configs.config import create_config_parser
from make_env import make

from examples.envpool.envpool_wrappers import VecAdapter, VecMonitor
from openrl.configs.config import create_config_parser
from openrl.modules.common import PPONet as Net
from openrl.modules.common.ppo_net import PPONet as Net
from openrl.runners.common import PPOAgent as Agent
Expand Down
2 changes: 1 addition & 1 deletion openrl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__TITLE__ = "openrl"
__VERSION__ = "v0.1.10"
__VERSION__ = "v0.2.0"
__DESCRIPTION__ = "Distributed Deep RL Framework"
__AUTHOR__ = "OpenRL Contributors"
__EMAIL__ = "huangshiyu@4paradigm.com"
Expand Down
2 changes: 1 addition & 1 deletion openrl/envs/common/build_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,4 @@ def _make_env() -> Env:
return _make_env

env_fns = [create_env(env_id, env_num, need_env_id) for env_id in range(env_num)]
return env_fns
return env_fns
2 changes: 1 addition & 1 deletion openrl/envs/common/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,4 +173,4 @@ def make(
vec_info_class = VecInfoFactory.get_vec_info_class(vec_info_class, env)
env = VecMonitorWrapper(vec_info_class, env)

return env
return env
23 changes: 12 additions & 11 deletions openrl/envs/nlp/daily_dialog_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,18 @@ def __init__(
# set the observation and action space here
self._vocab_size = self.tokenizer.vocab_size

self.observation_space = DictSpace({
"input_encoded_pt": spaces.Box(
low=0,
high=self._vocab_size,
shape=(self._max_text_length + self.max_steps,),
),
"input_attention_mask_pt": spaces.Box(
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
),
})
self.observation_space = DictSpace(
{
"input_encoded_pt": spaces.Box(
low=0,
high=self._vocab_size,
shape=(self._max_text_length + self.max_steps,),
),
"input_attention_mask_pt": spaces.Box(
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
),
}
)
self.action_space = Discrete(n=self._vocab_size)
# see https://github.com/huggingface/transformers/issues/4875 : rounding up to nearest power of 2 for better GPU efficiency

Expand Down Expand Up @@ -112,7 +114,6 @@ def __init__(
self.reward_function = None

def set_reward(self, reward_fn=None):

self.reward_function = reward_fn

def step_word(self, word: str) -> Tuple[Dict[str, torch.tensor], int, bool, dict]:
Expand Down
22 changes: 12 additions & 10 deletions openrl/envs/nlp/fake_dialog_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,18 @@ def __init__(
# set the observation and action space here
self._vocab_size = 2

self.observation_space = DictSpace({
"input_encoded_pt": spaces.Box(
low=0,
high=self._vocab_size,
shape=(self._max_text_length + self.max_steps,),
),
"input_attention_mask_pt": spaces.Box(
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
),
})
self.observation_space = DictSpace(
{
"input_encoded_pt": spaces.Box(
low=0,
high=self._vocab_size,
shape=(self._max_text_length + self.max_steps,),
),
"input_attention_mask_pt": spaces.Box(
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
),
}
)
self.action_space = Discrete(n=self._vocab_size)

n = 2
Expand Down
5 changes: 2 additions & 3 deletions openrl/envs/nlp/rewards/intent.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,9 @@ def __init__(
self.use_model_parallel = False

if intent_model == "builtin_intent":

self._device = "cpu"
self.use_data_parallel = False
self.use_data_parallel = False

from transformers import GPT2Config, GPT2LMHeadModel

class TestTokenizer:
Expand Down
15 changes: 8 additions & 7 deletions openrl/envs/nlp/rewards/kl_penalty.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@ def __init__(

# reference model
if ref_model == "builtin_ref":

self.device = "cpu"
self.use_data_parallel = False
self.use_data_parallel = False

from transformers import GPT2Config, GPT2LMHeadModel

config = GPT2Config()
Expand Down Expand Up @@ -146,10 +145,12 @@ def __call__(
rew = -self._alpha * kl_div
infos = []
for kl in kl_div:
infos.append({
"alpha": self._alpha,
"kl_div": kl.mean(),
})
infos.append(
{
"alpha": self._alpha,
"kl_div": kl.mean(),
}
)
return rew, infos

def _prepare_inputs_for_model(
Expand Down
24 changes: 14 additions & 10 deletions openrl/envs/nlp/utils/metrics/meteor.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,20 @@ def _info(self):
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=[
datasets.Features({
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Sequence(
datasets.Value("string", id="sequence"), id="references"
),
}),
datasets.Features({
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Value("string", id="sequence"),
}),
datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Sequence(
datasets.Value("string", id="sequence"), id="references"
),
}
),
datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Value("string", id="sequence"),
}
),
],
codebase_urls=[
"https://github.com/nltk/nltk/blob/develop/nltk/translate/meteor_score.py"
Expand Down
1 change: 0 additions & 1 deletion openrl/modules/networks/policy_network_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def __init__(
disable_drop_out: bool = True,
extra_args=None,
) -> None:

self.device = device
self.use_fp16 = cfg.use_fp16
self.use_deepspeed = cfg.use_deepspeed
Expand Down
1 change: 0 additions & 1 deletion openrl/modules/networks/value_network_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def __init__(
device=torch.device("cpu"),
extra_args=None,
):

self.device = device

self.use_fp16 = cfg.use_fp16
Expand Down
12 changes: 9 additions & 3 deletions openrl/modules/utils/valuenorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,15 @@ def __init__(
self.per_element_update = per_element_update
self.tpdv = dict(dtype=torch.float32, device=device)

self.running_mean = nn.Parameter(torch.zeros(input_shape), requires_grad=False).to(**self.tpdv)
self.running_mean_sq = nn.Parameter(torch.zeros(input_shape), requires_grad=False).to(**self.tpdv)
self.debiasing_term = nn.Parameter(torch.tensor(0.0), requires_grad=False).to(**self.tpdv)
self.running_mean = nn.Parameter(
torch.zeros(input_shape), requires_grad=False
).to(**self.tpdv)
self.running_mean_sq = nn.Parameter(
torch.zeros(input_shape), requires_grad=False
).to(**self.tpdv)
self.debiasing_term = nn.Parameter(torch.tensor(0.0), requires_grad=False).to(
**self.tpdv
)

# self.running_mean = nn.Parameter(torch.zeros(input_shape), requires_grad=False)
# self.running_mean_sq = nn.Parameter(
Expand Down

0 comments on commit a50c041

Please sign in to comment.