Skip to content

Commit

Permalink
Merge pull request #9 from jbussemaker/feature/arch_sbo_precision
Browse files Browse the repository at this point in the history
Feature/arch sbo precision
  • Loading branch information
jbussemaker authored Nov 21, 2023
2 parents 0238477 + 74f6a21 commit 9590894
Show file tree
Hide file tree
Showing 25 changed files with 856 additions and 161 deletions.
1 change: 1 addition & 0 deletions docs/api/problem.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
members:
- all_discrete_x
- correct_x
- corrector
- quick_sample_discrete_x
- des_vars
- imputation_ratio
Expand Down
13 changes: 11 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This library hopes to support in doing this.
The library provides:

- A common interface for defining architecture optimization problems based on [pymoo](https://pymoo.org/)
- Sampling and correction algorithms for hierarchical design spaces
- Support in using Surrogate-Based Optimization (SBO) algorithms:
- Implementation of a basic SBO algorithm
- Connectors to various external SBO libraries
Expand Down Expand Up @@ -71,6 +72,8 @@ You then need to implement the following functionality:
- Design variable definition in the `__init__` function using `Variable` classes (in `pymoo.core.variable`)
- Evaluation of a design vector in `_arch_evaluate`
- Correction (imputation/repair) of a design vector in `_correct_x`
- Note that this function is only used if `all_discrete_x` (as returned by `_gen_all_discrete_x`) is not available
or `design_space.use_auto_correction` is set to `False`
- Return which variables are conditionally active in `_is_conditionally_active`
- An (optional) interface for implementing intermediate storage of problem-specific results (`store_results`), and
restarting an optimization from these previous results (`load_previous_results`)
Expand Down Expand Up @@ -138,8 +141,14 @@ class DemoArchOptProblem(CachedParetoFrontMixin, ArchOptProblemBase):
f_out[:, 0] = np.sum(x ** 2, axis=1)

def _correct_x(self, x: np.ndarray, is_active: np.ndarray):
"""Fill the activeness matrix and (if needed) impute any design variables that are partially inactive.
Imputation of inactive design variables is always applied after this function."""
"""
Fill the activeness matrix and (if needed) correct any design variables that are partially inactive.
Imputation of inactive design variables is always applied after this function.
Only needed if no explicit design space model is given.
Only used if not all discrete design vectors `all_discrete_x` is available OR
`self.design_space.use_auto_corrector = False`.
"""

# Get categorical values associated to the third design variables (i_dv = 2)
categorical_values = self.get_categorical_values(x, i_dv=2)
Expand Down
16 changes: 8 additions & 8 deletions docs/test_problems.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ An overview of available test problems in `sb_arch_opt.problems` (* = recommende
| `MOHierarchicalRosenbrock` | 5 | 8 | 2 | 2 | | Y | Y | Y | | 32 | 1.5 | | |
| ~~`MOHierarchicalTestProblem`~~ | 11 | 16 | 2 | 2 | | Y | Y | Y | | 64 | 72 | | |
| Module: `gnc` |
| `GNCNoActNrType` | 18 | | 1 | | Y | | | Y | | 265 | 989 | | |
| `GNCNoActType` | 20 | | 2 | | Y | | Y | Y | | 327 | 7.2e3 | | |
| `GNCNoActNr` | 24 | | 2 | | Y | | Y | Y | | 26500 | 7.2e3 | | |
| `GNCNoAct` | 26 | | 2 | | Y | | Y | Y | | 29857 | 57.6e3 | | |
| `GNCNoNrType` | 27 | | 1 | | Y | | | Y | | 70225 | 1911 | | |
| `GNCNoType` | 30 | | 2 | | Y | | Y | Y | | 85779 | 42.2e3 | | |
| `GNCNoNr` | 36 | | 2 | | Y | | Y | Y | | 70225000 | 37.6e3 | | |
| `GNC` | 39 | | 2 | | Y | | Y | Y | | 79091323 | 901e3 | | |
| `GNCNoActNrType` | 18 | | 1 | | Y | | | Y | | 265 | 1.9 | | |
| `GNCNoActType` | 20 | | 2 | | Y | | Y | Y | | 327 | 14.1 | | |
| `GNCNoActNr` | 24 | | 2 | | Y | | Y | Y | | 26500 | 14.1 | | |
| `GNCNoAct` | 26 | | 2 | | Y | | Y | Y | | 29857 | 113 | | |
| `GNCNoNrType` | 27 | | 1 | | Y | | | Y | | 70225 | 3.7 | | |
| `GNCNoType` | 30 | | 2 | | Y | | Y | Y | | 85779 | 83 | | |
| `GNCNoNr` | 36 | | 2 | | Y | | Y | Y | | 70225000 | 74 | | |
| `GNC` | 39 | | 2 | | Y | | Y | Y | | 79091323 | 1761 | | |
| Module: `hidden_constraints` |
| `Mueller01` | | 5 | 1 | | | | | | Y | | | | fail_rate: 67% |
| `Mueller02` | | 4 | 1 | | | | | | Y | | | | fail_rate: 40% |
Expand Down
2 changes: 1 addition & 1 deletion sb_arch_opt/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.2.1'
__version__ = '1.3.0'
12 changes: 6 additions & 6 deletions sb_arch_opt/algo/arch_sbo/hc_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,16 @@
log = logging.getLogger('sb_arch_opt.sbo_hc')


def get_hc_strategy(kpls_n_dim: Optional[int] = 10, min_pov: float = .5):
def get_hc_strategy(kpls_n_dim: Optional[int] = 10, min_pov: float = .25):
"""
Get a hidden constraints strategy that works well for most problems.
The minimum Probability of Viability (min_pov) can be used to determine how more the algorithm will be pushed
towards exploration over exploitation. Values between 10% and 50% are shown to give good optimization results.
"""

# Get the predictor: RF works best but requires scikit-learn
try:
predictor = RandomForestClassifier(n=100, n_dim=10)
except ImportError:
predictor = MDGPRegressor(kpls_n_dim=kpls_n_dim)
# Get the predictor: MD-GP works best
predictor = MDGPRegressor(kpls_n_dim=kpls_n_dim)

# Create the strategy: use as additional constraint at Probability of Validity >= 50%
return PredictionHCStrategy(predictor, constraint=True, min_pov=min_pov)
Expand Down
27 changes: 19 additions & 8 deletions sb_arch_opt/algo/arch_sbo/infill.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ def select_infill(self, population: Population, infill_problem: Problem, n_infil

# Improve selected points by local optimization
return self._increase_precision(sel_pop)
# print(f'SEL POP x = {sel_pop.get("X")}')
# print(f'SEL POP f_in = {sel_pop.get("F")}')
# improved_sel_pop = self._increase_precision(sel_pop)
# print(f'IMP POP x = {improved_sel_pop.get("X")}')
# x, is_active = self.problem.correct_x(improved_sel_pop.get("X"))
# print(f'IMP POP f_in = {self.evaluate(x, is_active)[0]}')
# return improved_sel_pop

def _increase_precision(self, pop: Population) -> Population:
"""Increase the precision of the continuous variables by running a local gradient-based optimization
Expand All @@ -201,7 +208,7 @@ def _increase_precision(self, pop: Population) -> Population:
if not np.any(is_cont_mask):
return pop

def get_y_precision_funcs(x_ref: np.ndarray, is_act_ref: np.ndarray, i_opt):
def get_y_precision_funcs(x_ref: np.ndarray, is_act_ref: np.ndarray, f_ref: np.ndarray, i_opt):
last_g: Optional[np.ndarray] = None
x_norm_opt = x_norm[i_opt]
xl_opt = xl[i_opt]
Expand All @@ -212,13 +219,16 @@ def y_precision(x_norm_):
x_eval[0, i_opt] = x_norm_*x_norm_opt + xl_opt

f, g = self.evaluate(x_eval, is_active=is_act_ref)
# print(f'EVAL {x_norm_}: {f}, {g}')

# Infill objectives are normalized so we can just add them to get a correctly-weighted single objective:
# - For function-based infills (prediction mean), the surrogates are trained on normalized y values
# - Expected Improvement and other probability-based infills are also normalized
f_so = np.sum(f)
# Objective is the improvement in the direction of the negative unit vector
f_diff = f-f_ref
f_abs_diff = np.sum(f_diff)

# Penalize deviation from unit vector to ensure that the design point stays in the same area of the PF
f_deviation = np.ptp(f_diff)
f_so = f_abs_diff + 100*f_deviation**2

# print(f'EVAL {x_norm_}: {f} --> {f_so}, {g}')
last_g = g[0, :] if g is not None else None
return f_so

Expand All @@ -242,6 +252,7 @@ def _g(_):
x_norm[x_norm < 1e-6] = 1e-6

x, is_active = problem.correct_x(pop.get('X'))
f_pop = pop.get('F')
x_optimized = []
for i in range(len(pop)):
# Only optimize active continuous variables
Expand All @@ -253,11 +264,11 @@ def _g(_):
continue

# Run optimization
f_opt, con, xn_, xl_ = get_y_precision_funcs(x[[i], :], is_active[[i], :], i_optimize)
f_opt, con, xn_, xl_ = get_y_precision_funcs(x[[i], :], is_active[[i], :], f_pop[[i], :], i_optimize)
bounds = [(0., 1.) for _ in i_optimize]
x_start_norm = (x_ref_i[i_optimize]-xl_)/xn_
res = minimize(f_opt, x_start_norm, method='slsqp', bounds=bounds,
constraints=con, options={'maxiter': 20, 'eps': 1e-4, 'ftol': 1e-3})
constraints=con, options={'maxiter': 20, 'eps': 1e-5, 'ftol': 1e-4})
if res.success:
x_opt = x_ref_i.copy()
x_opt[i_optimize] = res.x*xn_ + xl_
Expand Down
41 changes: 28 additions & 13 deletions sb_arch_opt/algo/arch_sbo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@
import smt.utils.design_space as ds

from smt import __version__
IS_SMT_21 = not __version__.startswith('2.0')
IS_SMT_22 = not __version__.startswith('2.0') and not __version__.startswith('2.1')

HAS_ARCH_SBO = True
except ImportError:
HAS_ARCH_SBO = False
IS_SMT_21 = False
IS_SMT_22 = False

class BaseDesignSpace:
pass
Expand All @@ -60,7 +60,7 @@ class SurrogateModel:
pass

__all__ = ['check_dependencies', 'HAS_ARCH_SBO', 'ModelFactory', 'MixedDiscreteNormalization', 'SBArchOptDesignSpace',
'MultiSurrogateModel', 'IS_SMT_21']
'MultiSurrogateModel', 'IS_SMT_22']


def check_dependencies():
Expand Down Expand Up @@ -166,11 +166,13 @@ def get_kriging_model(multi=True, kpls_n_comp: int = None, **kwargs):
surrogate = MultiSurrogateModel(surrogate)
return surrogate

def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, **kwargs_) -> Tuple['SurrogateModel', Normalization]:
def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, ignore_hierarchy=False,
**kwargs_) -> Tuple['SurrogateModel', Normalization]:
check_dependencies()
normalization = self.get_md_normalization()
design_space = self.problem.design_space
norm_ds_spec = self.create_smt_design_space_spec(design_space, md_normalize=True)
norm_ds_spec = self.create_smt_design_space_spec(
design_space, md_normalize=True, ignore_hierarchy=ignore_hierarchy)

kwargs = dict(
print_global=False,
Expand All @@ -185,16 +187,17 @@ def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, **kwargs_) -
n_dim_apply_pls = design_space.n_var

# PLS is not applied to categorical variables for EHH/HH kernels (see KrgBased._matrix_data_corr)
if IS_SMT_21 and kwargs['categorical_kernel'] not in [MixIntKernelType.CONT_RELAX, MixIntKernelType.GOWER]:
if IS_SMT_22 and kwargs['categorical_kernel'] not in [MixIntKernelType.CONT_RELAX, MixIntKernelType.GOWER]:
n_dim_apply_pls = design_space.n_var - np.sum(design_space.is_cat_mask)

if kpls_n_comp > n_dim_apply_pls:
kpls_n_comp = None

if kpls_n_comp is not None:
if not IS_SMT_21:
if not IS_SMT_22:
kwargs['categorical_kernel'] = MixIntKernelType.CONT_RELAX

# Ignore hierarchy in the design space as KPLS does not support this
non_hier_ds_spec = self.create_smt_design_space_spec(
self.problem.design_space, md_normalize=True, ignore_hierarchy=True)
kwargs['design_space'] = non_hier_ds_spec.design_space
Expand All @@ -203,6 +206,9 @@ def get_md_kriging_model(self, kpls_n_comp: int = None, multi=True, **kwargs_) -
else:
surrogate = KRG(**kwargs)

if ignore_hierarchy or kpls_n_comp is not None:
surrogate.supports['x_hierarchy'] = False

if multi:
surrogate = MultiSurrogateModel(surrogate)

Expand Down Expand Up @@ -254,7 +260,7 @@ def _override(theta):
class SBArchOptDesignSpace(BaseDesignSpace):
"""SMT design space implementation using SBArchOpt's design space logic"""

_global_disable_hierarchical_cat_fix = IS_SMT_21
_global_disable_hierarchical_cat_fix = IS_SMT_22

def __init__(self, arch_design_space: ArchDesignSpace, md_normalize=False, cont_relax=False,
ignore_hierarchy=False):
Expand All @@ -271,7 +277,7 @@ def arch_design_space(self) -> ArchDesignSpace:
def _get_design_variables(self) -> List['ds.DesignVariable']:
"""Return the design variables defined in this design space if not provided upon initialization of the class"""
smt_des_vars = []
is_conditional = self._ds.is_conditionally_active
is_dv_cond = ([False]*len(self._ds.des_vars)) if self._ignore_hierarchy else self._ds.is_conditionally_active
normalize = self.normalize is not None
cont_relax = self._cont_relax
for i, dv in enumerate(self._ds.des_vars):
Expand All @@ -297,7 +303,7 @@ def _get_design_variables(self) -> List['ds.DesignVariable']:
smt_des_vars.append(ds.FloatVariable(0, len(dv.options)-1))
else:
# Conditional categorical variables are currently not supported
if is_conditional[i] and not self._global_disable_hierarchical_cat_fix:
if is_dv_cond[i] and not self._global_disable_hierarchical_cat_fix:
smt_des_vars.append(ds.IntegerVariable(0, len(dv.options)-1))
else:
smt_des_vars.append(ds.CategoricalVariable(values=dv.options))
Expand All @@ -308,7 +314,7 @@ def _get_design_variables(self) -> List['ds.DesignVariable']:

def _is_conditionally_acting(self) -> np.ndarray:
if self._ignore_hierarchy:
return np.zeros((len(self._ds.is_conditionally_active),), dtype=bool)
return np.zeros((len(self._ds.des_vars),), dtype=bool)

return self._ds.is_conditionally_active

Expand All @@ -325,8 +331,8 @@ def _correct_get_acting(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
is_active = np.ones(is_active.shape, dtype=bool)
return x, is_active

def _sample_valid_x(self, n: int) -> Tuple[np.ndarray, np.ndarray]:
sampler = HierarchicalSampling()
def _sample_valid_x(self, n: int, random_state=None) -> Tuple[np.ndarray, np.ndarray]:
sampler = HierarchicalSampling(seed=random_state)
stub_problem = ArchOptProblemBase(self._ds)
x, is_active = sampler.sample_get_x(stub_problem, n)

Expand Down Expand Up @@ -356,6 +362,9 @@ def __init__(self, surrogate: 'SurrogateModel', **kwargs):
self.supports = self._surrogate.supports
self.options["print_global"] = False

self.xt = None
self.yt = None

@property
def name(self):
return f'Multi{self._surrogate.name}'
Expand All @@ -364,6 +373,9 @@ def _initialize(self):
self.supports["derivatives"] = False

def set_training_values(self, xt: np.ndarray, yt: np.ndarray, name=None, is_acting=None) -> None:
self.xt = xt
self.yt = yt

self._models = models = []
for iy in range(yt.shape[1]):
model: Union['KrgBased', 'SurrogateModel'] = self._copy_underlying()
Expand Down Expand Up @@ -398,6 +410,9 @@ def train(self) -> None:

model.train()

# rmse = np.linalg.norm(self.yt[:, i] - model.predict_values(self.xt)[:, 0], 2)
# print(f'TRAINED {i}: {rmse:.3g}')

if i == 0 and isinstance(model, KrgBased):
try:
theta0 = list(model.optimal_theta)
Expand Down
Loading

0 comments on commit 9590894

Please sign in to comment.