a new type of env that does not perform line disconection on a masked…

… part of the grid, see rte-france#571
Grid2op · Dec 15, 2023 · e867bc1 · e867bc1
1 parent 03ad59c
commit e867bc1
Show file tree

Hide file tree

Showing 8 changed files with 451 additions and 39 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -39,6 +39,8 @@ Change Log
 - [FIXED] `PandapowerBackend`, when no slack was present
 - [FIXED] the "BaseBackendTest" class did not correctly detect divergence in most cases (which lead 
   to weird bugs in failing tests)
+- [ADDED] A type of environment that does not perform the "emulation of the protections"
+  for some part of the grid (`MaskedEnvironment`)
 - [IMPROVED] the CI speed: by not testing every possible numpy version but only most ancient and most recent
 - [IMPROVED] Runner now test grid2op version 1.9.6 and 1.9.7
 - [IMPROVED] refacto `gridobj_cls._clear_class_attribute` and `gridobj_cls._clear_grid_dependant_class_attributes`

diff --git a/grid2op/Backend/backend.py b/grid2op/Backend/backend.py
@@ -1023,10 +1023,12 @@ def next_grid_state(self,
             ] = True
 
             # disconnect the current power lines
-            if to_disc[lines_status].sum() == 0:
-                # no powerlines have been disconnected at this time step, i stop the computation there
+            if to_disc[lines_status].any() == 0:
+                # no powerlines have been disconnected at this time step, 
+                # i stop the computation there
                 break
             disconnected_during_cf[to_disc] = ts
+
             # perform the disconnection action
             for i, el in enumerate(to_disc):
                 if el:

diff --git a/grid2op/Environment/__init__.py b/grid2op/Environment/__init__.py
@@ -5,7 +5,8 @@
     "SingleEnvMultiProcess",
     "MultiEnvMultiProcess",
     "MultiMixEnvironment",
-    "TimedOutEnvironment"
+    "TimedOutEnvironment",
+    "MaskedEnvironment"
 ]
 
 from grid2op.Environment.baseEnv import BaseEnv
@@ -15,3 +16,4 @@
 from grid2op.Environment.multiEnvMultiProcess import MultiEnvMultiProcess
 from grid2op.Environment.multiMixEnv import MultiMixEnvironment
 from grid2op.Environment.timedOutEnv import TimedOutEnvironment
+from grid2op.Environment.maskedEnvironment import MaskedEnvironment
diff --git a/grid2op/Environment/baseEnv.py b/grid2op/Environment/baseEnv.py
@@ -342,7 +342,7 @@ def __init__(
         )
         self._timestep_overflow: np.ndarray = None
         self._nb_timestep_overflow_allowed: np.ndarray = None
-        self._hard_overflow_threshold: float = self._parameters.HARD_OVERFLOW_THRESHOLD
+        self._hard_overflow_threshold: np.ndarray  = None
 
         # store actions "cooldown"
         self._times_before_line_status_actionable: np.ndarray = None
@@ -626,7 +626,7 @@ def _custom_deepcopy_for_copy(self, new_obj, dict_=None):
         new_obj._nb_timestep_overflow_allowed = copy.deepcopy(
             self._nb_timestep_overflow_allowed
         )
-        new_obj._hard_overflow_threshold = self._hard_overflow_threshold
+        new_obj._hard_overflow_threshold = copy.deepcopy(self._hard_overflow_threshold)
 
         # store actions "cooldown"
         new_obj._times_before_line_status_actionable = copy.deepcopy(
@@ -1204,7 +1204,6 @@ def _has_been_initialized(self):
         self._gen_downtime = np.zeros(self.n_gen, dtype=dt_int)
         self._gen_activeprod_t = np.zeros(self.n_gen, dtype=dt_float)
         self._gen_activeprod_t_redisp = np.zeros(self.n_gen, dtype=dt_float)
-        self._nb_timestep_overflow_allowed = np.ones(shape=self.n_line, dtype=dt_int)
         self._max_timestep_line_status_deactivated = (
             self._parameters.NB_TIMESTEP_COOLDOWN_LINE
         )
@@ -1220,6 +1219,11 @@ def _has_been_initialized(self):
             fill_value=self._parameters.NB_TIMESTEP_OVERFLOW_ALLOWED,
             dtype=dt_int,
         )
+        self._hard_overflow_threshold = np.full(
+            shape=(self.n_line,),
+            fill_value=self._parameters.HARD_OVERFLOW_THRESHOLD,
+            dtype=dt_float,
+        )
         self._timestep_overflow = np.zeros(shape=(self.n_line,), dtype=dt_int)
 
         # update the parameters
@@ -1261,7 +1265,6 @@ def _update_parameters(self):
         # type of power flow to play
         # if True, then it will not disconnect lines above their thermal limits
         self._no_overflow_disconnection = self._parameters.NO_OVERFLOW_DISCONNECTION
-        self._hard_overflow_threshold = self._parameters.HARD_OVERFLOW_THRESHOLD
 
         # store actions "cooldown"
         self._max_timestep_line_status_deactivated = (
@@ -1275,7 +1278,7 @@ def _update_parameters(self):
         self._nb_timestep_overflow_allowed[
             :
         ] = self._parameters.NB_TIMESTEP_OVERFLOW_ALLOWED
-
+        self._hard_overflow_threshold[:] = self._parameters.HARD_OVERFLOW_THRESHOLD
         # hard overflow part
         self._env_dc = self._parameters.ENV_DC
 
@@ -2957,6 +2960,10 @@ def _aux_register_env_converged(self, disc_lines, action, init_line_status, new_
         # TODO is non zero and disconnected, this should be ok.
         self._time_extract_obs += time.perf_counter() - beg_res
 
+    def _backend_next_grid_state(self):
+        """overlaoded in MaskedEnv"""
+        return self.backend.next_grid_state(env=self, is_dc=self._env_dc)
+
     def _aux_run_pf_after_state_properly_set(
         self, action, init_line_status, new_p, except_
     ):
@@ -2965,9 +2972,7 @@ def _aux_run_pf_after_state_properly_set(
         try:
             # compute the next _grid state
             beg_pf = time.perf_counter()
-            disc_lines, detailed_info, conv_ = self.backend.next_grid_state(
-                env=self, is_dc=self._env_dc
-            )
+            disc_lines, detailed_info, conv_ = self._backend_next_grid_state()
             self._disc_lines[:] = disc_lines
             self._time_powerflow += time.perf_counter() - beg_pf
             if conv_ is None:
@@ -3328,7 +3333,7 @@ def _reset_vectors_and_timings(self):
         ] = self._parameters.NB_TIMESTEP_OVERFLOW_ALLOWED
 
         self.nb_time_step = 0  # to have the first step at 0
-        self._hard_overflow_threshold = self._parameters.HARD_OVERFLOW_THRESHOLD
+        self._hard_overflow_threshold[:] = self._parameters.HARD_OVERFLOW_THRESHOLD
         self._env_dc = self._parameters.ENV_DC
 
         self._times_before_line_status_actionable[:] = 0

diff --git a/grid2op/Environment/maskedEnvironment.py b/grid2op/Environment/maskedEnvironment.py
@@ -0,0 +1,150 @@
+# Copyright (c) 2023, RTE (https://www.rte-france.com)
+# See AUTHORS.txt
+# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
+# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
+# you can obtain one at http://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
+
+import copy
+import numpy as np
+from typing import Tuple, Union, List
+from grid2op.Environment.environment import Environment
+from grid2op.Action import BaseAction
+from grid2op.Observation import BaseObservation
+from grid2op.Exceptions import EnvError
+from grid2op.dtypes import dt_bool, dt_float, dt_int
+
+
+class MaskedEnvironment(Environment):  # TODO heritage ou alors on met un truc de base
+    """This class is the grid2op implementation of a "maked" environment: lines not in the 
+    `lines_of_interest` mask will NOT be deactivated by the environment is the flow is too high
+    (or moderately high for too long.)
+    
+    .. warning::
+        This class might not behave normally if used with TimeOutEnvironment, MultiEnv, MultiMixEnv etc.
+    
+    .. warning::
+        At time of writing, the behaviour of "obs.simulate" is not modified
+    """  
+    CAN_SKIP_TS = False  # some steps can be more than one time steps
+    def __init__(self,
+                 grid2op_env: Union[Environment, dict],
+                 lines_of_interest):
+
+        self._lines_of_interest = self._make_lines_of_interest(lines_of_interest)
+        if isinstance(grid2op_env, Environment):
+            super().__init__(**grid2op_env.get_kwargs())
+        elif isinstance(grid2op_env, dict):
+            super().__init__(**grid2op_env)
+        else:
+            raise EnvError(f"For TimedOutEnvironment you need to provide "
+                           f"either an Environment or a dict "
+                           f"for grid2op_env. You provided: {type(grid2op_env)}")
+
+    def _make_lines_of_interest(self, lines_of_interest):
+        # NB is called BEFORE the env has been created...
+        if isinstance(lines_of_interest, np.ndarray):
+            # if lines_of_interest.size() != type(self).n_line:
+                # raise EnvError("Impossible to init A masked environment when the number of lines "
+                            #    "of the mask do not match the number of lines on the grid.")
+            res = lines_of_interest.astype(dt_bool)
+            if res.sum() == 0:
+                raise EnvError("You cannot use MaskedEnvironment and masking all "
+                               "the grid. If you don't want to simulate powerline "
+                               "disconnection when they are game over, please "
+                               "set params.NO_OVERFLOW_DISCONNECT=True (see doc)")
+        else:
+            raise EnvError("Format of lines_of_interest is not understood. "
+                           "Please provide a vector of the size of the "
+                           "number of lines on the grid.")
+        return res
+
+    def _reset_vectors_and_timings(self):
+        super()._reset_vectors_and_timings()
+        self._hard_overflow_threshold[~self._lines_of_interest] = 1e-7 * np.finfo(dt_float).max   # some kind of infinity value
+        # NB we multiply np.finfo(dt_float).max by a small number to avoid overflow
+        # indeed, _hard_overflow_threshold is multiply by the flow on the lines
+        self._nb_timestep_overflow_allowed[~self._lines_of_interest] = np.iinfo(dt_int).max - 1  # some kind of infinity value
+
+    def get_kwargs(self, with_backend=True, with_chronics_handler=True):
+        res = {}
+        res["lines_of_interest"] = copy.deepcopy(self._lines_of_interest)
+        res["grid2op_env"] = super().get_kwargs(with_backend, with_chronics_handler)
+        return res
+
+    def get_params_for_runner(self):
+        res = super().get_params_for_runner()
+        res["envClass"] = MaskedEnvironment
+        res["other_env_kwargs"] = {"lines_of_interest": copy.deepcopy(self._lines_of_interest)}
+        return res
+
+    @classmethod
+    def init_obj_from_kwargs(cls,
+                             other_env_kwargs,
+                             init_env_path,
+                             init_grid_path,
+                             chronics_handler,
+                             backend,
+                             parameters,
+                             name,
+                             names_chronics_to_backend,
+                             actionClass,
+                             observationClass,
+                             rewardClass,
+                             legalActClass,
+                             voltagecontrolerClass,
+                             other_rewards,
+                             opponent_space_type,
+                             opponent_action_class,
+                             opponent_class,
+                             opponent_init_budget,
+                             opponent_budget_per_ts,
+                             opponent_budget_class,
+                             opponent_attack_duration,
+                             opponent_attack_cooldown,
+                             kwargs_opponent,
+                             with_forecast,
+                             attention_budget_cls,
+                             kwargs_attention_budget,
+                             has_attention_budget,
+                             logger,
+                             kwargs_observation,
+                             observation_bk_class,
+                             observation_bk_kwargs,
+                             _raw_backend_class,
+                             _read_from_local_dir):
+        res = MaskedEnvironment(grid2op_env={"init_env_path": init_env_path,
+                                             "init_grid_path": init_grid_path,
+                                             "chronics_handler": chronics_handler,
+                                             "backend": backend,
+                                             "parameters": parameters,
+                                             "name": name,
+                                             "names_chronics_to_backend": names_chronics_to_backend,
+                                             "actionClass": actionClass,
+                                             "observationClass": observationClass,
+                                             "rewardClass": rewardClass,
+                                             "legalActClass": legalActClass,
+                                             "voltagecontrolerClass": voltagecontrolerClass,
+                                             "other_rewards": other_rewards,
+                                             "opponent_space_type": opponent_space_type,
+                                             "opponent_action_class": opponent_action_class,
+                                             "opponent_class": opponent_class,
+                                             "opponent_init_budget": opponent_init_budget,
+                                             "opponent_budget_per_ts": opponent_budget_per_ts,
+                                             "opponent_budget_class": opponent_budget_class,
+                                             "opponent_attack_duration": opponent_attack_duration,
+                                             "opponent_attack_cooldown": opponent_attack_cooldown,
+                                             "kwargs_opponent": kwargs_opponent,
+                                             "with_forecast": with_forecast,
+                                             "attention_budget_cls": attention_budget_cls,
+                                             "kwargs_attention_budget": kwargs_attention_budget,
+                                             "has_attention_budget": has_attention_budget,
+                                             "logger": logger,
+                                             "kwargs_observation": kwargs_observation,
+                                             "observation_bk_class": observation_bk_class,
+                                             "observation_bk_kwargs": observation_bk_kwargs,
+                                             "_raw_backend_class": _raw_backend_class,
+                                             "_read_from_local_dir": _read_from_local_dir},
+                                  **other_env_kwargs)
+        return res
diff --git a/grid2op/Environment/timedOutEnv.py b/grid2op/Environment/timedOutEnv.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
+# Copyright (c) 2023, RTE (https://www.rte-france.com)
 # See AUTHORS.txt
 # This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
 # If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
@@ -23,7 +23,10 @@ class TimedOutEnvironment(Environment):  # TODO heritage ou alors on met un truc
     of the `step` function. 
     
     For more information, see the documentation of 
-    :func:`TimedOutEnvironment.step` for 
+    :func:`TimedOutEnvironment.step` 
+    
+    .. warning::
+        This class might not behave normally if used with MaskedEnvironment, MultiEnv, MultiMixEnv etc.
     
     Attributes
     ----------