From 022ba0139b45392c92920f0430d97e5b753f24e0 Mon Sep 17 00:00:00 2001 From: DONNOT Benjamin Date: Wed, 10 Jan 2024 16:22:15 +0100 Subject: [PATCH 1/3] improving the doc to fix issue rte-france#568 [skip ci] --- docs/action.rst | 2 +- docs/environment.rst | 2 +- grid2op/Environment/baseEnv.py | 59 ++++++++++++++++++++++++++ grid2op/Environment/environment.py | 8 ++++ grid2op/Observation/baseObservation.py | 33 +++++++++++++- grid2op/tests/BaseBackendTest.py | 2 +- 6 files changed, 101 insertions(+), 5 deletions(-) diff --git a/docs/action.rst b/docs/action.rst index a370d4d8b..90abdaa57 100644 --- a/docs/action.rst +++ b/docs/action.rst @@ -85,7 +85,7 @@ you want to perform on the grid. For more information you can consult the help o To avoid extremely verbose things, as of grid2op 1.5.0, we introduced some convenience functions to allow easier action construction. You can now do `act.load_set_bus = ...` instead of the previously way -more verbose `act.update({"set_bus": {"loads_id": ...}}` +more verbose `act.update({"set_bus": {"loads_id": ...}})` .. _action-module-examples: diff --git a/docs/environment.rst b/docs/environment.rst index 11cac0a59..88213ffec 100644 --- a/docs/environment.rst +++ b/docs/environment.rst @@ -101,7 +101,7 @@ be equivalent to starting into the "middle" of a video game. If that is the case Finally, you might have noticed that each call to "env.reset" might take a while. This can dramatically increase the training time, especially at the beginning. This is due to the fact that each time `env.reset` is called, the whole chronics is read from the hard drive. If you want to lower this -impact then you might consult the `Optimize the data pipeline`_ section. +impact then you might consult the :ref:`environment-module-data-pipeline` page of the doc. .. _environment-module-chronics-info: diff --git a/grid2op/Environment/baseEnv.py b/grid2op/Environment/baseEnv.py index 3f8ccf757..e0cbeea38 100644 --- a/grid2op/Environment/baseEnv.py +++ b/grid2op/Environment/baseEnv.py @@ -84,6 +84,65 @@ class BaseEnv(GridObjects, RandomObject, ABC): The documentation is showed here to document the common attributes of an "BaseEnvironment". + .. _danger-env-ownership: + + Notes + ------------------------ + + Note en environment data ownership + + .. danger:: + + + A non pythonic decision has been implemented in grid2op for various reasons: an environment + owns everything created from it. + + This means that if you (or the python interpreter) deletes the environment, you might not + use some data generate with this environment. + + More precisely, you cannot do something like: + + .. code-block:: python + + import grid2op + env = grid2op.make("l2rpn_case14_sandbox") + + saved_obs = [] + + obs = env.reset() + saved_obs.append(obs) + obs2, reward, done, info = env.step(env.action_space()) + saved_obs.append(obs2) + + saved_obs[0].simulate(env.action_space()) # works + del env + saved_obs[0].simulate(env.action_space()) # DOES NOT WORK + + It will raise an error like `Grid2OpException EnvError "This environment is closed. You cannot use it anymore."` + + This will also happen if you do things inside functions, for example like this: + + .. code-block:: python + + import grid2op + + def foo(manager): + env = grid2op.make("l2rpn_case14_sandbox") + obs = env.reset() + manager.append(obs) + obs2, reward, done, info = env.step(env.action_space()) + manager.append(obs2) + manager[0].simulate(env.action_space()) # works + return manager + + manager = [] + manager = foo(manager) + manager[0].simulate(env.action_space()) # DOES NOT WORK + + The same error is raised because the environment `env` is automatically deleted by python when the function `foo` ends + (well it might work on some cases, if the function is called before the variable `env` is actually deleted but you + should not rely on this behaviour.) + Attributes ---------- diff --git a/grid2op/Environment/environment.py b/grid2op/Environment/environment.py index ed613c4a9..09df00f97 100644 --- a/grid2op/Environment/environment.py +++ b/grid2op/Environment/environment.py @@ -37,6 +37,14 @@ class Environment(BaseEnv): """ This class is the grid2op implementation of the "Environment" entity in the RL framework. + .. danger:: + + Long story short, once a environment is deleted, you cannot use anything it "holds" including, + but not limited to the capacity to perform `obs.simulate(...)` even if the `obs` is still + referenced. + + See :ref:`danger-env-ownership` (first danger block). + Attributes ---------- diff --git a/grid2op/Observation/baseObservation.py b/grid2op/Observation/baseObservation.py index 1c0a259fa..6b401502b 100644 --- a/grid2op/Observation/baseObservation.py +++ b/grid2op/Observation/baseObservation.py @@ -4207,7 +4207,18 @@ def get_forecast_env(self) -> "grid2op.Environment.Environment": f_obs_3, *_ = forecast_env.step(act_3) sim_obs_3, *_ = sim_obs_2.simulate(act_3) # f_obs_3 should be sim_obs_3 - + + .. danger:: + + Long story short, once a environment (and a forecast_env is one) + is deleted, you cannot use anything it "holds" including, + but not limited to the capacity to perform `obs.simulate(...)` even if the `obs` is still + referenced. + + See :ref:`danger-env-ownership` (first danger block). + + This caused issue https://github.com/rte-france/Grid2Op/issues/568 for example. + Returns ------- grid2op.Environment.Environment @@ -4339,8 +4350,26 @@ def get_env_from_external_forecasts(self, you have 100 rows then you have 100 steps. .. warning:: - We remind that, if you provide some forecasts, it is expected that + We remind that, if you provide some forecasts, it is expected that they allow some powerflow to converge. + The balance between total generation on one side and total demand and losses on the other should also + make "as close as possible" to reduce some modeling artifact (by the backend, grid2op does not check + anything here). + + Finally, make sure that your input data meet the constraints on the generators (pmin, pmax and ramps) + otherwise you might end up with incorrect behaviour. Grid2op supposes that data fed to it + is consistent with its model. If not it's "undefined behaviour". + + .. danger:: + + Long story short, once a environment (and a forecast_env is one) + is deleted, you cannot use anything it "holds" including, + but not limited to the capacity to perform `obs.simulate(...)` even if the `obs` is still + referenced. + + See :ref:`danger-env-ownership` (first danger block). + This caused issue https://github.com/rte-france/Grid2Op/issues/568 for example. + Examples -------- A typical use might look like diff --git a/grid2op/tests/BaseBackendTest.py b/grid2op/tests/BaseBackendTest.py index 3a3bb46e6..b8f99b617 100644 --- a/grid2op/tests/BaseBackendTest.py +++ b/grid2op/tests/BaseBackendTest.py @@ -2741,7 +2741,7 @@ def test_issue_134(self): } ) obs, reward, done, info = env.step(action) - assert not done + assert not done, f"Episode should not have ended here, error : {info['exception']}" assert obs.line_status[LINE_ID] == False assert obs.topo_vect[obs.line_or_pos_topo_vect[LINE_ID]] == -1 assert obs.topo_vect[obs.line_ex_pos_topo_vect[LINE_ID]] == -1 From 3c5196f7fcf1e5b2a595889eb5158da8cbff1a6e Mon Sep 17 00:00:00 2001 From: DONNOT Benjamin Date: Wed, 10 Jan 2024 16:37:04 +0100 Subject: [PATCH 2/3] adressing issue rte-france#569 [skip ci] --- CHANGELOG.rst | 1 + grid2op/Episode/EpisodeReplay.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e24666144..1486819a0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -39,6 +39,7 @@ Change Log - [FIXED] `PandapowerBackend`, when no slack was present - [FIXED] the "BaseBackendTest" class did not correctly detect divergence in most cases (which lead to weird bugs in failing tests) +- [FIXED] an issue with imageio having deprecated the `fps` kwargs (see https://github.com/rte-france/Grid2Op/issues/569) - [ADDED] A type of environment that does not perform the "emulation of the protections" for some part of the grid (`MaskedEnvironment`) see https://github.com/rte-france/Grid2Op/issues/571 - [IMPROVED] the CI speed: by not testing every possible numpy version but only most ancient and most recent diff --git a/grid2op/Episode/EpisodeReplay.py b/grid2op/Episode/EpisodeReplay.py index 6213bf450..b21f21fc7 100644 --- a/grid2op/Episode/EpisodeReplay.py +++ b/grid2op/Episode/EpisodeReplay.py @@ -102,15 +102,15 @@ def replay_episode( load_info: ``str`` Defaults to "p". What kind of values to show on loads. - Can be oneof `["p", "v", None]` + Can be one of `["p", "v", None]` gen_info: ``str`` Defaults to "p". What kind of values to show on generators. - Can be oneof `["p", "v", None]` + Can be one of `["p", "v", None]` line_info: ``str`` Defaults to "rho". What kind of values to show on lines. - Can be oneof `["rho", "a", "p", "v", None]` + Can be one of `["rho", "a", "p", "v", None]` resolution: ``tuple`` Defaults to (1280, 720). The resolution to use for the gif. @@ -187,7 +187,12 @@ def replay_episode( # Export all frames as gif if enabled if gif_name is not None and len(frames) > 0: try: - imageio.mimwrite(gif_path, frames, fps=fps) + try: + # with imageio > 2.5 you need to compute the duration + imageio.mimwrite(gif_path, frames, duration=1000./fps) + except TypeError: + # imageio <= 2.5 can be given fps directly + imageio.mimwrite(gif_path, frames, fps=fps) # Try to compress try: from pygifsicle import optimize From f1310c5538e3f277e364e8816609483071271bda Mon Sep 17 00:00:00 2001 From: DONNOT Benjamin Date: Thu, 11 Jan 2024 12:31:23 +0100 Subject: [PATCH 3/3] implementing some tests and some bugfix for MaskedEnvironment --- grid2op/Environment/maskedEnvironment.py | 21 +- grid2op/Runner/runner.py | 1 + grid2op/tests/test_MaskedEnvironment.py | 283 +++++++++++------------ 3 files changed, 153 insertions(+), 152 deletions(-) diff --git a/grid2op/Environment/maskedEnvironment.py b/grid2op/Environment/maskedEnvironment.py index 7b2ad5cea..b97bf986c 100644 --- a/grid2op/Environment/maskedEnvironment.py +++ b/grid2op/Environment/maskedEnvironment.py @@ -27,7 +27,14 @@ class MaskedEnvironment(Environment): # TODO heritage ou alors on met un truc d .. warning:: At time of writing, the behaviour of "obs.simulate" is not modified """ - CAN_SKIP_TS = False # some steps can be more than one time steps + # some kind of infinity value + # NB we multiply np.finfo(dt_float).max by a small number (1e-7) to avoid overflow + # indeed, _hard_overflow_threshold is multiply by the flow on the lines + INF_VAL_THM_LIM = 1e-7 * np.finfo(dt_float).max + + # some kind of infinity value + INF_VAL_TS_OVERFLOW_ALLOW = np.iinfo(dt_int).max - 1 + def __init__(self, grid2op_env: Union[Environment, dict], lines_of_interest): @@ -38,7 +45,7 @@ def __init__(self, elif isinstance(grid2op_env, dict): super().__init__(**grid2op_env) else: - raise EnvError(f"For TimedOutEnvironment you need to provide " + raise EnvError(f"For MaskedEnvironment you need to provide " f"either an Environment or a dict " f"for grid2op_env. You provided: {type(grid2op_env)}") @@ -62,10 +69,8 @@ def _make_lines_of_interest(self, lines_of_interest): def _reset_vectors_and_timings(self): super()._reset_vectors_and_timings() - self._hard_overflow_threshold[~self._lines_of_interest] = 1e-7 * np.finfo(dt_float).max # some kind of infinity value - # NB we multiply np.finfo(dt_float).max by a small number to avoid overflow - # indeed, _hard_overflow_threshold is multiply by the flow on the lines - self._nb_timestep_overflow_allowed[~self._lines_of_interest] = np.iinfo(dt_int).max - 1 # some kind of infinity value + self._hard_overflow_threshold[~self._lines_of_interest] = type(self).INF_VAL_THM_LIM + self._nb_timestep_overflow_allowed[~self._lines_of_interest] = type(self).INF_VAL_TS_OVERFLOW_ALLOW def get_kwargs(self, with_backend=True, with_chronics_handler=True): res = {} @@ -79,6 +84,10 @@ def get_params_for_runner(self): res["other_env_kwargs"] = {"lines_of_interest": copy.deepcopy(self._lines_of_interest)} return res + def _custom_deepcopy_for_copy(self, new_obj): + super()._custom_deepcopy_for_copy(new_obj) + new_obj._lines_of_interest = copy.deepcopy(self._lines_of_interest) + @classmethod def init_obj_from_kwargs(cls, other_env_kwargs, diff --git a/grid2op/Runner/runner.py b/grid2op/Runner/runner.py index c790b0883..59747a116 100644 --- a/grid2op/Runner/runner.py +++ b/grid2op/Runner/runner.py @@ -1137,6 +1137,7 @@ def run( returned list are not necessarily sorted by this value) - "cum_reward" the cumulative reward obtained by the :attr:`Runner.Agent` on this episode i - "nb_time_step": the number of time steps played in this episode. + - "total_step": the total number of time steps possible in this episode. - "episode_data" : [Optional] The :class:`EpisodeData` corresponding to this episode run only if `add_detailed_output=True` - "add_nb_highres_sim": [Optional] The estimated number of calls to high resolution simulator made diff --git a/grid2op/tests/test_MaskedEnvironment.py b/grid2op/tests/test_MaskedEnvironment.py index 11cd2f96a..41ed76110 100644 --- a/grid2op/tests/test_MaskedEnvironment.py +++ b/grid2op/tests/test_MaskedEnvironment.py @@ -20,8 +20,9 @@ MultiDiscreteActSpace) -class TestMaskedEnvironment(unittest.TestCase): - def get_mask(self): +class TestMaskedEnvironment(unittest.TestCase): + @staticmethod + def get_mask(): mask = np.full(20, fill_value=False, dtype=bool) mask[[0, 1, 4, 2, 3, 6, 5]] = True # THT part return mask @@ -30,9 +31,9 @@ def setUp(self) -> None: with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.env_in = MaskedEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), - lines_of_interest=self.get_mask()) + lines_of_interest=TestMaskedEnvironment.get_mask()) self.env_out = MaskedEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), - lines_of_interest=~self.get_mask()) + lines_of_interest=~TestMaskedEnvironment.get_mask()) self.line_id = 3 th_lim = self.env_in.get_thermal_limit() * 2. # avoid all problem in general th_lim[self.line_id] /= 10. # make sure to get trouble in line 3 @@ -41,10 +42,11 @@ def setUp(self) -> None: # env_out: line is out of the area self.env_out.set_thermal_limit(th_lim) - self._init_env(self.env_in) - self._init_env(self.env_out) - - def _init_env(self, env): + TestMaskedEnvironment._init_env(self.env_in) + TestMaskedEnvironment._init_env(self.env_out) + + @staticmethod + def _init_env(env): env.set_id(0) env.seed(0) env.reset() @@ -69,13 +71,13 @@ def test_ok(self): obs_out, reward, done, info = self.env_out.step(act) if i < 2: # 2 : 2 full steps already assert obs_in.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" - assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" + assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" else: # cooldown applied for line 3: # - it disconnect stuff in `self.env_in` # - it does not affect anything in `self.env_out` assert not obs_in.line_status[self.line_id] - assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" + assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" def test_reset(self): # timestep_overflow should be 0 initially even if the flow is too high @@ -84,155 +86,144 @@ def test_reset(self): assert obs.rho[self.line_id] > 1. -class TestTimedOutEnvironmentCpy(TestMaskedEnvironment): +class TestMaskedEnvironmentCpy(TestMaskedEnvironment): def setUp(self) -> None: super().setUp() - init_int = self.env_in.copy() - init_out = self.env_out.copy() - self.env0 = self.env_in.copy() - self.env1 = self.env_out.copy() + init_int = self.env_in + init_out = self.env_out + self.env_in = self.env_in.copy() + self.env_out = self.env_out.copy() init_int.close() init_out.close() -# class TestTOEnvRunner(unittest.TestCase): -# def get_timeout_ms(self): -# return 200 - -# def setUp(self) -> None: -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# self.env1 = TimedOutEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), -# time_out_ms=self.get_timeout_ms()) -# params = self.env1.parameters -# params.NO_OVERFLOW_DISCONNECTION = True -# self.env1.change_parameters(params) -# self.cum_reward = 645.70208 -# self.max_iter = 10 +class TestMaskedEnvironmentRunner(unittest.TestCase): + def setUp(self) -> None: + TestMaskedEnvironment.setUp(self) + self.max_iter = 10 -# def tearDown(self) -> None: -# self.env1.close() -# return super().tearDown() + def tearDown(self) -> None: + self.env_in.close() + self.env_out.close() + return super().tearDown() -# def test_runner_can_make(self): -# runner = Runner(**self.env1.get_params_for_runner()) -# env2 = runner.init_env() -# assert isinstance(env2, TimedOutEnvironment) -# assert env2.time_out_ms == self.get_timeout_ms() - -# def test_runner_noskip(self): -# agent = AgentOK(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=1, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 - -# def test_runner_skip1(self): -# agent = AgentKO(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=1, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 - -# def test_runner_skip2(self): -# agent = AgentKO2(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=1, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 - -# def test_runner_skip2_2ep(self): -# agent = AgentKO2(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=2, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 -# _, _, cum_reward, timestep, max_ts = res[1] -# assert abs(cum_reward - 648.90795) <= 1e-5 - - -# class TestTOEnvGym(unittest.TestCase): -# def get_timeout_ms(self): -# return 400. + def test_runner_can_make(self): + runner = Runner(**self.env_in.get_params_for_runner()) + env2 = runner.init_env() + assert isinstance(env2, MaskedEnvironment) + assert (env2._lines_of_interest == self.env_in._lines_of_interest).all() + + def test_runner(self): + # create the runner + runner_in = Runner(**self.env_in.get_params_for_runner()) + runner_out = Runner(**self.env_out.get_params_for_runner()) + res_in, *_ = runner_in.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0], add_detailed_output=True) + res_out, *_ = runner_out.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0], add_detailed_output=True) + res_in2, *_ = runner_in.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0]) + # check correct results are obtained when agregated + assert res_in[3] == 10 + assert res_in2[3] == 10 + assert res_out[3] == 10 + assert np.allclose(res_in[2], 645.4992065) + assert np.allclose(res_in2[2], 645.4992065) + assert np.allclose(res_out[2], 645.7020874) + + # check detailed results + ep_data_in = res_in[-1] + ep_data_out = res_out[-1] + for i in range(self.max_iter + 1): + obs_in = ep_data_in.observations[i] + obs_out = ep_data_out.observations[i] + if i < 3: + assert obs_in.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" + assert obs_out.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" + else: + # cooldown applied for line 3: + # - it disconnect stuff in `self.env_in` + # - it does not affect anything in `self.env_out` + assert not obs_in.line_status[self.line_id], f"error for step {i}: line is not disconnected" + assert obs_out.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" -# def setUp(self) -> None: -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# self.env1 = TimedOutEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), -# time_out_ms=self.get_timeout_ms()) + + +class TestMaskedEnvironmentGym(unittest.TestCase): + def setUp(self) -> None: + TestMaskedEnvironment.setUp(self) -# def tearDown(self) -> None: -# self.env1.close() -# return super().tearDown() + def tearDown(self) -> None: + self.env_in.close() + self.env_out.close() + return super().tearDown() -# def test_gym_with_step(self): -# """test the step function also makes the 'do nothing'""" -# self.skipTest("On docker execution time is too unstable") -# env_gym = GymEnv(self.env1) -# env_gym.reset() - -# agentok = AgentOK(env_gym) -# for i in range(10): -# act = agentok.act_gym(None, None, None) -# for k in act: -# act[k][:] = 0 -# *_, info = env_gym.step(act) -# assert info["nb_do_nothing"] == 0 -# assert info["nb_do_nothing_made"] == 0 -# assert env_gym.init_env._nb_dn_last == 0 - -# env_gym.reset() -# agentko = AgentKO1(env_gym) -# for i in range(10): -# act = agentko.act_gym(None, None, None) -# for k in act: -# act[k][:] = 0 -# *_, info = env_gym.step(act) -# assert info["nb_do_nothing"] == 1 -# assert info["nb_do_nothing_made"] == 1 -# assert env_gym.init_env._nb_dn_last == 1 + def _aux_run_envs(self, act, env_gym_in, env_gym_out): + for i in range(10): + obs_in, reward, done, truncated, info = env_gym_in.step(act) + obs_out, reward, done, truncated, info = env_gym_out.step(act) + if i < 2: # 2 : 2 full steps already + assert obs_in["timestep_overflow"][self.line_id] == i + 1, f"error for step {i}: {obs_in['timestep_overflow'][self.line_id]}" + assert obs_out['timestep_overflow'][self.line_id] == i + 1, f"error for step {i}: {obs_out['timestep_overflow'][self.line_id]}" + else: + # cooldown applied for line 3: + # - it disconnect stuff in `self.env_in` + # - it does not affect anything in `self.env_out` + assert not obs_in["line_status"][self.line_id] + assert obs_out["timestep_overflow"][self.line_id] == i + 1, f"error for step {i}: {obs_out['timestep_overflow'][self.line_id]}" + + def test_gym_with_step(self): + """test the step function also disconnects (or not) the lines""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + act = {} + self._aux_run_envs(act, env_gym_in, env_gym_out) + env_gym_in.reset() + env_gym_out.reset() + self._aux_run_envs(act, env_gym_in, env_gym_out) -# def test_gym_normal(self): -# """test I can create the gym env""" -# env_gym = GymEnv(self.env1) -# env_gym.reset() - -# def test_gym_box(self): -# """test I can create the gym env with box ob space and act space""" -# env_gym = GymEnv(self.env1) -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# env_gym.action_space = BoxGymActSpace(self.env1.action_space) -# env_gym.observation_space = BoxGymObsSpace(self.env1.observation_space) -# env_gym.reset() - -# def test_gym_discrete(self): -# """test I can create the gym env with discrete act space""" -# env_gym = GymEnv(self.env1) -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# env_gym.action_space = DiscreteActSpace(self.env1.action_space) -# env_gym.reset() + def test_gym_normal(self): + """test I can create the gym env""" + env_gym = GymEnv(self.env_in) + env_gym.reset() + + def test_gym_box(self): + """test I can create the gym env with box ob space and act space""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env_gym_in.action_space = BoxGymActSpace(self.env_in.action_space) + env_gym_in.observation_space = BoxGymObsSpace(self.env_in.observation_space) + env_gym_out.action_space = BoxGymActSpace(self.env_out.action_space) + env_gym_out.observation_space = BoxGymObsSpace(self.env_out.observation_space) + env_gym_in.reset() + env_gym_out.reset() + + def test_gym_discrete(self): + """test I can create the gym env with discrete act space""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env_gym_in.action_space = DiscreteActSpace(self.env_in.action_space) + env_gym_out.action_space = DiscreteActSpace(self.env_out.action_space) + env_gym_in.reset() + env_gym_out.reset() + act = 0 + self._aux_run_envs(act, env_gym_in, env_gym_out) + -# def test_gym_multidiscrete(self): -# """test I can create the gym env with multi discrete act space""" -# env_gym = GymEnv(self.env1) -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# env_gym.action_space = MultiDiscreteActSpace(self.env1.action_space) -# env_gym.reset() + def test_gym_multidiscrete(self): + """test I can create the gym env with multi discrete act space""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env_gym_in.action_space = MultiDiscreteActSpace(self.env_in.action_space) + env_gym_out.action_space = MultiDiscreteActSpace(self.env_out.action_space) + env_gym_in.reset() + env_gym_out.reset() + act = env_gym_in.action_space.sample() + act[:] = 0 + self._aux_run_envs(act, env_gym_in, env_gym_out) if __name__ == "__main__":