Merge pull request #9 from jbussemaker/feature/arch_sbo_precision

Feature/arch sbo precision
jbussemaker · Nov 21, 2023 · 9590894 · 9590894
2 parents 0238477 + 74f6a21
commit 9590894
Show file tree

Hide file tree

Showing 25 changed files with 856 additions and 161 deletions.
diff --git a/docs/api/problem.md b/docs/api/problem.md
@@ -37,6 +37,7 @@
         members:
             - all_discrete_x
             - correct_x
+            - corrector
             - quick_sample_discrete_x
             - des_vars
             - imputation_ratio

diff --git a/docs/index.md b/docs/index.md
@@ -19,6 +19,7 @@ This library hopes to support in doing this.
 The library provides:
 
 - A common interface for defining architecture optimization problems based on [pymoo](https://pymoo.org/)
+- Sampling and correction algorithms for hierarchical design spaces
 - Support in using Surrogate-Based Optimization (SBO) algorithms:
   - Implementation of a basic SBO algorithm
   - Connectors to various external SBO libraries
@@ -71,6 +72,8 @@ You then need to implement the following functionality:
 - Design variable definition in the `__init__` function using `Variable` classes (in `pymoo.core.variable`)
 - Evaluation of a design vector in `_arch_evaluate`
 - Correction (imputation/repair) of a design vector in `_correct_x`
+  - Note that this function is only used if `all_discrete_x` (as returned by `_gen_all_discrete_x`) is not available
+    or  `design_space.use_auto_correction` is set to `False`
 - Return which variables are conditionally active in `_is_conditionally_active`
 - An (optional) interface for implementing intermediate storage of problem-specific results (`store_results`), and
   restarting an optimization from these previous results (`load_previous_results`)
@@ -138,8 +141,14 @@ class DemoArchOptProblem(CachedParetoFrontMixin, ArchOptProblemBase):
         f_out[:, 0] = np.sum(x ** 2, axis=1)
 
     def _correct_x(self, x: np.ndarray, is_active: np.ndarray):
-        """Fill the activeness matrix and (if needed) impute any design variables that are partially inactive.
-        Imputation of inactive design variables is always applied after this function."""
+        """
+        Fill the activeness matrix and (if needed) correct any design variables that are partially inactive.
+        Imputation of inactive design variables is always applied after this function.
+        
+        Only needed if no explicit design space model is given.
+        Only used if not all discrete design vectors `all_discrete_x` is available OR
+        `self.design_space.use_auto_corrector = False`.
+        """
 
         # Get categorical values associated to the third design variables (i_dv = 2)
         categorical_values = self.get_categorical_values(x, i_dv=2)

diff --git a/docs/test_problems.md b/docs/test_problems.md
@@ -60,14 +60,14 @@ An overview of available test problems in `sb_arch_opt.problems` (* = recommende
 | `MOHierarchicalRosenbrock`        | 5       | 8      | 2     | 2     |     | Y   | Y   | Y    |     | 32            | 1.5       |              |                                  |
 | ~~`MOHierarchicalTestProblem`~~   | 11      | 16     | 2     | 2     |     | Y   | Y   | Y    |     | 64            | 72        |              |                                  |
 | Module: `gnc`                     |
-| `GNCNoActNrType`                  | 18      |        | 1     |       | Y   |     |     | Y    |     | 265           | 989       |              |                                  |
-| `GNCNoActType`                    | 20      |        | 2     |       | Y   |     | Y   | Y    |     | 327           | 7.2e3     |              |                                  |
-| `GNCNoActNr`                      | 24      |        | 2     |       | Y   |     | Y   | Y    |     | 26500         | 7.2e3     |              |                                  |
-| `GNCNoAct`                        | 26      |        | 2     |       | Y   |     | Y   | Y    |     | 29857         | 57.6e3    |              |                                  |
-| `GNCNoNrType`                     | 27      |        | 1     |       | Y   |     |     | Y    |     | 70225         | 1911      |              |                                  |
-| `GNCNoType`                       | 30      |        | 2     |       | Y   |     | Y   | Y    |     | 85779         | 42.2e3    |              |                                  |
-| `GNCNoNr`                         | 36      |        | 2     |       | Y   |     | Y   | Y    |     | 70225000      | 37.6e3    |              |                                  |
-| `GNC`                             | 39      |        | 2     |       | Y   |     | Y   | Y    |     | 79091323      | 901e3     |              |                                  |
+| `GNCNoActNrType`                  | 18      |        | 1     |       | Y   |     |     | Y    |     | 265           | 1.9       |              |                                  |
+| `GNCNoActType`                    | 20      |        | 2     |       | Y   |     | Y   | Y    |     | 327           | 14.1      |              |                                  |
+| `GNCNoActNr`                      | 24      |        | 2     |       | Y   |     | Y   | Y    |     | 26500         | 14.1      |              |                                  |
+| `GNCNoAct`                        | 26      |        | 2     |       | Y   |     | Y   | Y    |     | 29857         | 113       |              |                                  |
+| `GNCNoNrType`                     | 27      |        | 1     |       | Y   |     |     | Y    |     | 70225         | 3.7       |              |                                  |
+| `GNCNoType`                       | 30      |        | 2     |       | Y   |     | Y   | Y    |     | 85779         | 83        |              |                                  |
+| `GNCNoNr`                         | 36      |        | 2     |       | Y   |     | Y   | Y    |     | 70225000      | 74        |              |                                  |
+| `GNC`                             | 39      |        | 2     |       | Y   |     | Y   | Y    |     | 79091323      | 1761      |              |                                  |
 | Module: `hidden_constraints`      |
 | `Mueller01`                       |         | 5      | 1     |       |     |     |     |      | Y   |               |           |              | fail_rate: 67%                   |
 | `Mueller02`                       |         | 4      | 1     |       |     |     |     |      | Y   |               |           |              | fail_rate: 40%                   |

diff --git a/sb_arch_opt/__init__.py b/sb_arch_opt/__init__.py
@@ -1 +1 @@
-__version__ = '1.2.1'
+__version__ = '1.3.0'
diff --git a/sb_arch_opt/algo/arch_sbo/hc_strategy.py b/sb_arch_opt/algo/arch_sbo/hc_strategy.py
@@ -49,16 +49,16 @@
 log = logging.getLogger('sb_arch_opt.sbo_hc')
 
 
-def get_hc_strategy(kpls_n_dim: Optional[int] = 10, min_pov: float = .5):
+def get_hc_strategy(kpls_n_dim: Optional[int] = 10, min_pov: float = .25):
     """
     Get a hidden constraints strategy that works well for most problems.
+
+    The minimum Probability of Viability (min_pov) can be used to determine how more the algorithm will be pushed
+    towards exploration over exploitation. Values between 10% and 50% are shown to give good optimization results.
     """
 
-    # Get the predictor: RF works best but requires scikit-learn
-    try:
-        predictor = RandomForestClassifier(n=100, n_dim=10)
-    except ImportError:
-        predictor = MDGPRegressor(kpls_n_dim=kpls_n_dim)
+    # Get the predictor: MD-GP works best
+    predictor = MDGPRegressor(kpls_n_dim=kpls_n_dim)
 
     # Create the strategy: use as additional constraint at Probability of Validity >= 50%
     return PredictionHCStrategy(predictor, constraint=True, min_pov=min_pov)

diff --git a/sb_arch_opt/algo/arch_sbo/infill.py b/sb_arch_opt/algo/arch_sbo/infill.py
@@ -188,6 +188,13 @@ def select_infill(self, population: Population, infill_problem: Problem, n_infil
 
         # Improve selected points by local optimization
         return self._increase_precision(sel_pop)
+        # print(f'SEL POP x    = {sel_pop.get("X")}')
+        # print(f'SEL POP f_in = {sel_pop.get("F")}')
+        # improved_sel_pop = self._increase_precision(sel_pop)
+        # print(f'IMP POP x    = {improved_sel_pop.get("X")}')
+        # x, is_active = self.problem.correct_x(improved_sel_pop.get("X"))
+        # print(f'IMP POP f_in = {self.evaluate(x, is_active)[0]}')
+        # return improved_sel_pop
 
     def _increase_precision(self, pop: Population) -> Population:
         """Increase the precision of the continuous variables by running a local gradient-based optimization
@@ -201,7 +208,7 @@ def _increase_precision(self, pop: Population) -> Population:
         if not np.any(is_cont_mask):
             return pop
 
-        def get_y_precision_funcs(x_ref: np.ndarray, is_act_ref: np.ndarray, i_opt):
+        def get_y_precision_funcs(x_ref: np.ndarray, is_act_ref: np.ndarray, f_ref: np.ndarray, i_opt):
             last_g: Optional[np.ndarray] = None
             x_norm_opt = x_norm[i_opt]
             xl_opt = xl[i_opt]
@@ -212,13 +219,16 @@ def y_precision(x_norm_):
                 x_eval[0, i_opt] = x_norm_*x_norm_opt + xl_opt
 
                 f, g = self.evaluate(x_eval, is_active=is_act_ref)
-                # print(f'EVAL {x_norm_}: {f}, {g}')
 
-                # Infill objectives are normalized so we can just add them to get a correctly-weighted single objective:
-                # - For function-based infills (prediction mean), the surrogates are trained on normalized y values
-                # - Expected Improvement and other probability-based infills are also normalized
-                f_so = np.sum(f)
+                # Objective is the improvement in the direction of the negative unit vector
+                f_diff = f-f_ref
+                f_abs_diff = np.sum(f_diff)
 
+                # Penalize deviation from unit vector to ensure that the design point stays in the same area of the PF
+                f_deviation = np.ptp(f_diff)
+                f_so = f_abs_diff + 100*f_deviation**2
+
+                # print(f'EVAL {x_norm_}: {f} --> {f_so}, {g}')
                 last_g = g[0, :] if g is not None else None
                 return f_so
 
@@ -242,6 +252,7 @@ def _g(_):
         x_norm[x_norm < 1e-6] = 1e-6
 
         x, is_active = problem.correct_x(pop.get('X'))
+        f_pop = pop.get('F')
         x_optimized = []
         for i in range(len(pop)):
             # Only optimize active continuous variables
@@ -253,11 +264,11 @@ def _g(_):
                 continue
 
             # Run optimization
-            f_opt, con, xn_, xl_ = get_y_precision_funcs(x[[i], :], is_active[[i], :], i_optimize)
+            f_opt, con, xn_, xl_ = get_y_precision_funcs(x[[i], :], is_active[[i], :], f_pop[[i], :], i_optimize)
             bounds = [(0., 1.) for _ in i_optimize]
             x_start_norm = (x_ref_i[i_optimize]-xl_)/xn_
             res = minimize(f_opt, x_start_norm, method='slsqp', bounds=bounds,
-                           constraints=con, options={'maxiter': 20, 'eps': 1e-4, 'ftol': 1e-3})
+                           constraints=con, options={'maxiter': 20, 'eps': 1e-5, 'ftol': 1e-4})
             if res.success:
                 x_opt = x_ref_i.copy()
                 x_opt[i_optimize] = res.x*xn_ + xl_

diff --git a/sb_arch_opt/algo/arch_sbo/models.py b/sb_arch_opt/algo/arch_sbo/models.py
@@ -46,12 +46,12 @@
     import smt.utils.design_space as ds
 
     from smt import __version__
-    IS_SMT_21 = not __version__.startswith('2.0')
+    IS_SMT_22 = not __version__.startswith('2.0') and not __version__.startswith('2.1')
 
     HAS_ARCH_SBO = True
 except ImportError:
     HAS_ARCH_SBO = False
-    IS_SMT_21 = False
+    IS_SMT_22 = False
 
     class BaseDesignSpace:
         pass
@@ -60,7 +60,7 @@ class SurrogateModel:
         pass
 
 __all__ = ['check_dependencies', 'HAS_ARCH_SBO', 'ModelFactory', 'MixedDiscreteNormalization', 'SBArchOptDesignSpace',
-           'MultiSurrogateModel', 'IS_SMT_21']
+           'MultiSurrogateModel', 'IS_SMT_22']
 
 
 def check_dependencies():
@@ -166,11 +166,13 @@ def get_kriging_model(multi=True, kpls_n_comp: int = None, **kwargs):
             surrogate = MultiSurrogateModel(surrogate)
         return surrogate
 
-    def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, **kwargs_) -> Tuple['SurrogateModel', Normalization]:
+    def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, ignore_hierarchy=False,
+                             **kwargs_) -> Tuple['SurrogateModel', Normalization]:
         check_dependencies()
         normalization = self.get_md_normalization()
         design_space = self.problem.design_space
-        norm_ds_spec = self.create_smt_design_space_spec(design_space, md_normalize=True)
+        norm_ds_spec = self.create_smt_design_space_spec(
+            design_space, md_normalize=True, ignore_hierarchy=ignore_hierarchy)
 
         kwargs = dict(
             print_global=False,
@@ -185,16 +187,17 @@ def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, **kwargs_) -
             n_dim_apply_pls = design_space.n_var
 
             # PLS is not applied to categorical variables for EHH/HH kernels (see KrgBased._matrix_data_corr)
-            if IS_SMT_21 and kwargs['categorical_kernel'] not in [MixIntKernelType.CONT_RELAX, MixIntKernelType.GOWER]:
+            if IS_SMT_22 and kwargs['categorical_kernel'] not in [MixIntKernelType.CONT_RELAX, MixIntKernelType.GOWER]:
                 n_dim_apply_pls = design_space.n_var - np.sum(design_space.is_cat_mask)
 
             if kpls_n_comp > n_dim_apply_pls:
                 kpls_n_comp = None
 
         if kpls_n_comp is not None:
-            if not IS_SMT_21:
+            if not IS_SMT_22:
                 kwargs['categorical_kernel'] = MixIntKernelType.CONT_RELAX
 
+            # Ignore hierarchy in the design space as KPLS does not support this
             non_hier_ds_spec = self.create_smt_design_space_spec(
                 self.problem.design_space, md_normalize=True, ignore_hierarchy=True)
             kwargs['design_space'] = non_hier_ds_spec.design_space
@@ -203,6 +206,9 @@ def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, **kwargs_) -
         else:
             surrogate = KRG(**kwargs)
 
+        if ignore_hierarchy or kpls_n_comp is not None:
+            surrogate.supports['x_hierarchy'] = False
+
         if multi:
             surrogate = MultiSurrogateModel(surrogate)
 
@@ -254,7 +260,7 @@ def _override(theta):
 class SBArchOptDesignSpace(BaseDesignSpace):
     """SMT design space implementation using SBArchOpt's design space logic"""
 
-    _global_disable_hierarchical_cat_fix = IS_SMT_21
+    _global_disable_hierarchical_cat_fix = IS_SMT_22
 
     def __init__(self, arch_design_space: ArchDesignSpace, md_normalize=False, cont_relax=False,
                  ignore_hierarchy=False):
@@ -271,7 +277,7 @@ def arch_design_space(self) -> ArchDesignSpace:
     def _get_design_variables(self) -> List['ds.DesignVariable']:
         """Return the design variables defined in this design space if not provided upon initialization of the class"""
         smt_des_vars = []
-        is_conditional = self._ds.is_conditionally_active
+        is_dv_cond = ([False]*len(self._ds.des_vars)) if self._ignore_hierarchy else self._ds.is_conditionally_active
         normalize = self.normalize is not None
         cont_relax = self._cont_relax
         for i, dv in enumerate(self._ds.des_vars):
@@ -297,7 +303,7 @@ def _get_design_variables(self) -> List['ds.DesignVariable']:
                     smt_des_vars.append(ds.FloatVariable(0, len(dv.options)-1))
                 else:
                     # Conditional categorical variables are currently not supported
-                    if is_conditional[i] and not self._global_disable_hierarchical_cat_fix:
+                    if is_dv_cond[i] and not self._global_disable_hierarchical_cat_fix:
                         smt_des_vars.append(ds.IntegerVariable(0, len(dv.options)-1))
                     else:
                         smt_des_vars.append(ds.CategoricalVariable(values=dv.options))
@@ -308,7 +314,7 @@ def _get_design_variables(self) -> List['ds.DesignVariable']:
 
     def _is_conditionally_acting(self) -> np.ndarray:
         if self._ignore_hierarchy:
-            return np.zeros((len(self._ds.is_conditionally_active),), dtype=bool)
+            return np.zeros((len(self._ds.des_vars),), dtype=bool)
 
         return self._ds.is_conditionally_active
 
@@ -325,8 +331,8 @@ def _correct_get_acting(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
             is_active = np.ones(is_active.shape, dtype=bool)
         return x, is_active
 
-    def _sample_valid_x(self, n: int) -> Tuple[np.ndarray, np.ndarray]:
-        sampler = HierarchicalSampling()
+    def _sample_valid_x(self, n: int, random_state=None) -> Tuple[np.ndarray, np.ndarray]:
+        sampler = HierarchicalSampling(seed=random_state)
         stub_problem = ArchOptProblemBase(self._ds)
         x, is_active = sampler.sample_get_x(stub_problem, n)
 
@@ -356,6 +362,9 @@ def __init__(self, surrogate: 'SurrogateModel', **kwargs):
         self.supports = self._surrogate.supports
         self.options["print_global"] = False
 
+        self.xt = None
+        self.yt = None
+
     @property
     def name(self):
         return f'Multi{self._surrogate.name}'
@@ -364,6 +373,9 @@ def _initialize(self):
         self.supports["derivatives"] = False
 
     def set_training_values(self, xt: np.ndarray, yt: np.ndarray, name=None, is_acting=None) -> None:
+        self.xt = xt
+        self.yt = yt
+
         self._models = models = []
         for iy in range(yt.shape[1]):
             model: Union['KrgBased', 'SurrogateModel'] = self._copy_underlying()
@@ -398,6 +410,9 @@ def train(self) -> None:
 
             model.train()
 
+            # rmse = np.linalg.norm(self.yt[:, i] - model.predict_values(self.xt)[:, 0], 2)
+            # print(f'TRAINED {i}: {rmse:.3g}')
+
             if i == 0 and isinstance(model, KrgBased):
                 try:
                     theta0 = list(model.optimal_theta)