From f1310c5538e3f277e364e8816609483071271bda Mon Sep 17 00:00:00 2001 From: DONNOT Benjamin Date: Thu, 11 Jan 2024 12:31:23 +0100 Subject: [PATCH] implementing some tests and some bugfix for MaskedEnvironment --- grid2op/Environment/maskedEnvironment.py | 21 +- grid2op/Runner/runner.py | 1 + grid2op/tests/test_MaskedEnvironment.py | 283 +++++++++++------------ 3 files changed, 153 insertions(+), 152 deletions(-) diff --git a/grid2op/Environment/maskedEnvironment.py b/grid2op/Environment/maskedEnvironment.py index 7b2ad5cea..b97bf986c 100644 --- a/grid2op/Environment/maskedEnvironment.py +++ b/grid2op/Environment/maskedEnvironment.py @@ -27,7 +27,14 @@ class MaskedEnvironment(Environment): # TODO heritage ou alors on met un truc d .. warning:: At time of writing, the behaviour of "obs.simulate" is not modified """ - CAN_SKIP_TS = False # some steps can be more than one time steps + # some kind of infinity value + # NB we multiply np.finfo(dt_float).max by a small number (1e-7) to avoid overflow + # indeed, _hard_overflow_threshold is multiply by the flow on the lines + INF_VAL_THM_LIM = 1e-7 * np.finfo(dt_float).max + + # some kind of infinity value + INF_VAL_TS_OVERFLOW_ALLOW = np.iinfo(dt_int).max - 1 + def __init__(self, grid2op_env: Union[Environment, dict], lines_of_interest): @@ -38,7 +45,7 @@ def __init__(self, elif isinstance(grid2op_env, dict): super().__init__(**grid2op_env) else: - raise EnvError(f"For TimedOutEnvironment you need to provide " + raise EnvError(f"For MaskedEnvironment you need to provide " f"either an Environment or a dict " f"for grid2op_env. You provided: {type(grid2op_env)}") @@ -62,10 +69,8 @@ def _make_lines_of_interest(self, lines_of_interest): def _reset_vectors_and_timings(self): super()._reset_vectors_and_timings() - self._hard_overflow_threshold[~self._lines_of_interest] = 1e-7 * np.finfo(dt_float).max # some kind of infinity value - # NB we multiply np.finfo(dt_float).max by a small number to avoid overflow - # indeed, _hard_overflow_threshold is multiply by the flow on the lines - self._nb_timestep_overflow_allowed[~self._lines_of_interest] = np.iinfo(dt_int).max - 1 # some kind of infinity value + self._hard_overflow_threshold[~self._lines_of_interest] = type(self).INF_VAL_THM_LIM + self._nb_timestep_overflow_allowed[~self._lines_of_interest] = type(self).INF_VAL_TS_OVERFLOW_ALLOW def get_kwargs(self, with_backend=True, with_chronics_handler=True): res = {} @@ -79,6 +84,10 @@ def get_params_for_runner(self): res["other_env_kwargs"] = {"lines_of_interest": copy.deepcopy(self._lines_of_interest)} return res + def _custom_deepcopy_for_copy(self, new_obj): + super()._custom_deepcopy_for_copy(new_obj) + new_obj._lines_of_interest = copy.deepcopy(self._lines_of_interest) + @classmethod def init_obj_from_kwargs(cls, other_env_kwargs, diff --git a/grid2op/Runner/runner.py b/grid2op/Runner/runner.py index c790b0883..59747a116 100644 --- a/grid2op/Runner/runner.py +++ b/grid2op/Runner/runner.py @@ -1137,6 +1137,7 @@ def run( returned list are not necessarily sorted by this value) - "cum_reward" the cumulative reward obtained by the :attr:`Runner.Agent` on this episode i - "nb_time_step": the number of time steps played in this episode. + - "total_step": the total number of time steps possible in this episode. - "episode_data" : [Optional] The :class:`EpisodeData` corresponding to this episode run only if `add_detailed_output=True` - "add_nb_highres_sim": [Optional] The estimated number of calls to high resolution simulator made diff --git a/grid2op/tests/test_MaskedEnvironment.py b/grid2op/tests/test_MaskedEnvironment.py index 11cd2f96a..41ed76110 100644 --- a/grid2op/tests/test_MaskedEnvironment.py +++ b/grid2op/tests/test_MaskedEnvironment.py @@ -20,8 +20,9 @@ MultiDiscreteActSpace) -class TestMaskedEnvironment(unittest.TestCase): - def get_mask(self): +class TestMaskedEnvironment(unittest.TestCase): + @staticmethod + def get_mask(): mask = np.full(20, fill_value=False, dtype=bool) mask[[0, 1, 4, 2, 3, 6, 5]] = True # THT part return mask @@ -30,9 +31,9 @@ def setUp(self) -> None: with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.env_in = MaskedEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), - lines_of_interest=self.get_mask()) + lines_of_interest=TestMaskedEnvironment.get_mask()) self.env_out = MaskedEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), - lines_of_interest=~self.get_mask()) + lines_of_interest=~TestMaskedEnvironment.get_mask()) self.line_id = 3 th_lim = self.env_in.get_thermal_limit() * 2. # avoid all problem in general th_lim[self.line_id] /= 10. # make sure to get trouble in line 3 @@ -41,10 +42,11 @@ def setUp(self) -> None: # env_out: line is out of the area self.env_out.set_thermal_limit(th_lim) - self._init_env(self.env_in) - self._init_env(self.env_out) - - def _init_env(self, env): + TestMaskedEnvironment._init_env(self.env_in) + TestMaskedEnvironment._init_env(self.env_out) + + @staticmethod + def _init_env(env): env.set_id(0) env.seed(0) env.reset() @@ -69,13 +71,13 @@ def test_ok(self): obs_out, reward, done, info = self.env_out.step(act) if i < 2: # 2 : 2 full steps already assert obs_in.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" - assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" + assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" else: # cooldown applied for line 3: # - it disconnect stuff in `self.env_in` # - it does not affect anything in `self.env_out` assert not obs_in.line_status[self.line_id] - assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" + assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" def test_reset(self): # timestep_overflow should be 0 initially even if the flow is too high @@ -84,155 +86,144 @@ def test_reset(self): assert obs.rho[self.line_id] > 1. -class TestTimedOutEnvironmentCpy(TestMaskedEnvironment): +class TestMaskedEnvironmentCpy(TestMaskedEnvironment): def setUp(self) -> None: super().setUp() - init_int = self.env_in.copy() - init_out = self.env_out.copy() - self.env0 = self.env_in.copy() - self.env1 = self.env_out.copy() + init_int = self.env_in + init_out = self.env_out + self.env_in = self.env_in.copy() + self.env_out = self.env_out.copy() init_int.close() init_out.close() -# class TestTOEnvRunner(unittest.TestCase): -# def get_timeout_ms(self): -# return 200 - -# def setUp(self) -> None: -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# self.env1 = TimedOutEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), -# time_out_ms=self.get_timeout_ms()) -# params = self.env1.parameters -# params.NO_OVERFLOW_DISCONNECTION = True -# self.env1.change_parameters(params) -# self.cum_reward = 645.70208 -# self.max_iter = 10 +class TestMaskedEnvironmentRunner(unittest.TestCase): + def setUp(self) -> None: + TestMaskedEnvironment.setUp(self) + self.max_iter = 10 -# def tearDown(self) -> None: -# self.env1.close() -# return super().tearDown() + def tearDown(self) -> None: + self.env_in.close() + self.env_out.close() + return super().tearDown() -# def test_runner_can_make(self): -# runner = Runner(**self.env1.get_params_for_runner()) -# env2 = runner.init_env() -# assert isinstance(env2, TimedOutEnvironment) -# assert env2.time_out_ms == self.get_timeout_ms() - -# def test_runner_noskip(self): -# agent = AgentOK(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=1, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 - -# def test_runner_skip1(self): -# agent = AgentKO(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=1, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 - -# def test_runner_skip2(self): -# agent = AgentKO2(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=1, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 - -# def test_runner_skip2_2ep(self): -# agent = AgentKO2(self.env1) -# runner = Runner(**self.env1.get_params_for_runner(), -# agentClass=None, -# agentInstance=agent) -# res = runner.run(nb_episode=2, -# max_iter=self.max_iter) -# _, _, cum_reward, timestep, max_ts = res[0] -# assert abs(cum_reward - self.cum_reward) <= 1e-5 -# _, _, cum_reward, timestep, max_ts = res[1] -# assert abs(cum_reward - 648.90795) <= 1e-5 - - -# class TestTOEnvGym(unittest.TestCase): -# def get_timeout_ms(self): -# return 400. + def test_runner_can_make(self): + runner = Runner(**self.env_in.get_params_for_runner()) + env2 = runner.init_env() + assert isinstance(env2, MaskedEnvironment) + assert (env2._lines_of_interest == self.env_in._lines_of_interest).all() + + def test_runner(self): + # create the runner + runner_in = Runner(**self.env_in.get_params_for_runner()) + runner_out = Runner(**self.env_out.get_params_for_runner()) + res_in, *_ = runner_in.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0], add_detailed_output=True) + res_out, *_ = runner_out.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0], add_detailed_output=True) + res_in2, *_ = runner_in.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0]) + # check correct results are obtained when agregated + assert res_in[3] == 10 + assert res_in2[3] == 10 + assert res_out[3] == 10 + assert np.allclose(res_in[2], 645.4992065) + assert np.allclose(res_in2[2], 645.4992065) + assert np.allclose(res_out[2], 645.7020874) + + # check detailed results + ep_data_in = res_in[-1] + ep_data_out = res_out[-1] + for i in range(self.max_iter + 1): + obs_in = ep_data_in.observations[i] + obs_out = ep_data_out.observations[i] + if i < 3: + assert obs_in.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}" + assert obs_out.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" + else: + # cooldown applied for line 3: + # - it disconnect stuff in `self.env_in` + # - it does not affect anything in `self.env_out` + assert not obs_in.line_status[self.line_id], f"error for step {i}: line is not disconnected" + assert obs_out.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}" -# def setUp(self) -> None: -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# self.env1 = TimedOutEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__), -# time_out_ms=self.get_timeout_ms()) + + +class TestMaskedEnvironmentGym(unittest.TestCase): + def setUp(self) -> None: + TestMaskedEnvironment.setUp(self) -# def tearDown(self) -> None: -# self.env1.close() -# return super().tearDown() + def tearDown(self) -> None: + self.env_in.close() + self.env_out.close() + return super().tearDown() -# def test_gym_with_step(self): -# """test the step function also makes the 'do nothing'""" -# self.skipTest("On docker execution time is too unstable") -# env_gym = GymEnv(self.env1) -# env_gym.reset() - -# agentok = AgentOK(env_gym) -# for i in range(10): -# act = agentok.act_gym(None, None, None) -# for k in act: -# act[k][:] = 0 -# *_, info = env_gym.step(act) -# assert info["nb_do_nothing"] == 0 -# assert info["nb_do_nothing_made"] == 0 -# assert env_gym.init_env._nb_dn_last == 0 - -# env_gym.reset() -# agentko = AgentKO1(env_gym) -# for i in range(10): -# act = agentko.act_gym(None, None, None) -# for k in act: -# act[k][:] = 0 -# *_, info = env_gym.step(act) -# assert info["nb_do_nothing"] == 1 -# assert info["nb_do_nothing_made"] == 1 -# assert env_gym.init_env._nb_dn_last == 1 + def _aux_run_envs(self, act, env_gym_in, env_gym_out): + for i in range(10): + obs_in, reward, done, truncated, info = env_gym_in.step(act) + obs_out, reward, done, truncated, info = env_gym_out.step(act) + if i < 2: # 2 : 2 full steps already + assert obs_in["timestep_overflow"][self.line_id] == i + 1, f"error for step {i}: {obs_in['timestep_overflow'][self.line_id]}" + assert obs_out['timestep_overflow'][self.line_id] == i + 1, f"error for step {i}: {obs_out['timestep_overflow'][self.line_id]}" + else: + # cooldown applied for line 3: + # - it disconnect stuff in `self.env_in` + # - it does not affect anything in `self.env_out` + assert not obs_in["line_status"][self.line_id] + assert obs_out["timestep_overflow"][self.line_id] == i + 1, f"error for step {i}: {obs_out['timestep_overflow'][self.line_id]}" + + def test_gym_with_step(self): + """test the step function also disconnects (or not) the lines""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + act = {} + self._aux_run_envs(act, env_gym_in, env_gym_out) + env_gym_in.reset() + env_gym_out.reset() + self._aux_run_envs(act, env_gym_in, env_gym_out) -# def test_gym_normal(self): -# """test I can create the gym env""" -# env_gym = GymEnv(self.env1) -# env_gym.reset() - -# def test_gym_box(self): -# """test I can create the gym env with box ob space and act space""" -# env_gym = GymEnv(self.env1) -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# env_gym.action_space = BoxGymActSpace(self.env1.action_space) -# env_gym.observation_space = BoxGymObsSpace(self.env1.observation_space) -# env_gym.reset() - -# def test_gym_discrete(self): -# """test I can create the gym env with discrete act space""" -# env_gym = GymEnv(self.env1) -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# env_gym.action_space = DiscreteActSpace(self.env1.action_space) -# env_gym.reset() + def test_gym_normal(self): + """test I can create the gym env""" + env_gym = GymEnv(self.env_in) + env_gym.reset() + + def test_gym_box(self): + """test I can create the gym env with box ob space and act space""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env_gym_in.action_space = BoxGymActSpace(self.env_in.action_space) + env_gym_in.observation_space = BoxGymObsSpace(self.env_in.observation_space) + env_gym_out.action_space = BoxGymActSpace(self.env_out.action_space) + env_gym_out.observation_space = BoxGymObsSpace(self.env_out.observation_space) + env_gym_in.reset() + env_gym_out.reset() + + def test_gym_discrete(self): + """test I can create the gym env with discrete act space""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env_gym_in.action_space = DiscreteActSpace(self.env_in.action_space) + env_gym_out.action_space = DiscreteActSpace(self.env_out.action_space) + env_gym_in.reset() + env_gym_out.reset() + act = 0 + self._aux_run_envs(act, env_gym_in, env_gym_out) + -# def test_gym_multidiscrete(self): -# """test I can create the gym env with multi discrete act space""" -# env_gym = GymEnv(self.env1) -# with warnings.catch_warnings(): -# warnings.filterwarnings("ignore") -# env_gym.action_space = MultiDiscreteActSpace(self.env1.action_space) -# env_gym.reset() + def test_gym_multidiscrete(self): + """test I can create the gym env with multi discrete act space""" + env_gym_in = GymEnv(self.env_in) + env_gym_out = GymEnv(self.env_out) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env_gym_in.action_space = MultiDiscreteActSpace(self.env_in.action_space) + env_gym_out.action_space = MultiDiscreteActSpace(self.env_out.action_space) + env_gym_in.reset() + env_gym_out.reset() + act = env_gym_in.action_space.sample() + act[:] = 0 + self._aux_run_envs(act, env_gym_in, env_gym_out) if __name__ == "__main__":