diff --git a/piglot/bin/piglot.py b/piglot/bin/piglot.py index 9f11d03..f088ede 100644 --- a/piglot/bin/piglot.py +++ b/piglot/bin/piglot.py @@ -62,7 +62,7 @@ def main(config_path: str = None): stop_criteria=stop, ) # Re-run the best case - if 'skip_last_run' not in config: + if 'skip_last_run' not in config and best_params is not None: objective(parameters.normalise(best_params)) diff --git a/piglot/bin/piglot_plot.py b/piglot/bin/piglot_plot.py index a6ea28b..70e93d9 100644 --- a/piglot/bin/piglot_plot.py +++ b/piglot/bin/piglot_plot.py @@ -5,8 +5,10 @@ import argparse from tempfile import TemporaryDirectory import numpy as np +import pandas as pd import matplotlib.pyplot as plt from scipy.integrate import trapezoid +from scipy.spatial import ConvexHull from PIL import Image import torch from piglot.parameter import read_parameters @@ -321,6 +323,85 @@ def plot_gp(args): plt.close() +def plot_pareto(args): + """Driver for plotting the Pareto front for a given multi-objective optimisation problem. + + Parameters + ---------- + args : dict + Passed arguments. + """ + config = parse_config_file(args.config) + objective = read_objective(config["objective"], read_parameters(config), config["output"]) + if objective.num_objectives != 2: + raise ValueError("Can only plot the Pareto front for a two-objective optimisation problem.") + data = objective.get_history() + fig, ax = plt.subplots() + # Read all the points and variances + total_points = np.array([entry['values'] for entry in data.values()]).T + variances = np.array([entry['variances'] for entry in data.values() if 'variances' in entry]).T + has_variance = variances.size > 0 + # Separate the dominated points + dominated = [] + nondominated = [] + pareto = pd.read_table(os.path.join(config["output"], 'pareto_front')).to_numpy() + for i, point in enumerate(total_points): + if np.isclose(point, pareto).all(axis=1).any(): + nondominated.append((point, variances[i, :] if has_variance else None)) + else: + dominated.append((point, variances[i, :] if has_variance else None)) + # Build the Pareto hull + hull = ConvexHull(pareto) + for simplex in hull.simplices: + # Hacky: filter the line between the two endpoints (assuming the list is sorted by x) + if abs(simplex[0] - simplex[1]) < pareto.shape[0] - 1: + ax.plot(pareto[simplex, 0], pareto[simplex, 1], 'r', ls='--') + # Plot the points + if has_variance: + ax.errorbar( + [point[0][0] for point in nondominated], + [point[0][1] for point in nondominated], + xerr=np.sqrt([point[1][0] for point in nondominated]), + yerr=np.sqrt([point[1][0] for point in nondominated]), + c='r', + fmt='o', + label='Pareto front', + ) + if args.all: + ax.errorbar( + [point[0][0] for point in dominated], + [point[0][1] for point in dominated], + xerr=np.sqrt([point[1][0] for point in dominated]), + yerr=np.sqrt([point[1][0] for point in dominated]), + c='k', + fmt='o', + label='Dominated points', + ) + else: + ax.scatter( + [point[0][0] for point in nondominated], + [point[0][1] for point in nondominated], + c='r', + label='Pareto front', + ) + if args.all: + ax.scatter( + [point[0][0] for point in dominated], + [point[0][1] for point in dominated], + c='k', + label='Dominated points', + ) + if args.log: + ax.set_xscale('log') + ax.set_yscale('log') + ax.set_xlabel('Objective 1') + ax.set_ylabel('Objective 2') + ax.legend() + ax.grid() + fig.tight_layout() + plt.show() + + def main(passed_args: List[str] = None): """Entry point for this script.""" # Global argument parser settings @@ -544,6 +625,36 @@ def main(passed_args: List[str] = None): ) sp_gp.set_defaults(func=plot_gp) + # Pareto front plotting + sp_pareto = subparsers.add_parser( + 'pareto', + help='plot the Pareto front for a given multi-objective optimisation problem', + description=("Plot the Pareto front for a given multi-objective optimisation problem. This " + "must be executed in the same path as the running piglot instance."), + ) + sp_pareto.add_argument( + 'config', + type=str, + help="Path for the used or generated configuration file.", + ) + sp_pareto.add_argument( + '--save_fig', + default=None, + type=str, + help=("Path to save the generated figure. If used, graphical output is skipped."), + ) + sp_pareto.add_argument( + '--log', + action='store_true', + help="Plot in a log scale." + ) + sp_pareto.add_argument( + '--all', + action='store_true', + help="Plot the both the Pareto front and the dominated points." + ) + sp_pareto.set_defaults(func=plot_pareto) + args = parser.parse_args() if passed_args is None else parser.parse_args(passed_args) args.func(args) diff --git a/piglot/objective.py b/piglot/objective.py index eb3b0c5..5bd20b1 100644 --- a/piglot/objective.py +++ b/piglot/objective.py @@ -18,7 +18,7 @@ class Composition(ABC): """Abstract class for defining composition functionals with gradients""" - def composition(self, inner: np.ndarray, params: np.ndarray) -> float: + def composition(self, inner: np.ndarray, params: np.ndarray) -> np.ndarray: """Abstract method for computing the outer function of the composition Parameters @@ -30,11 +30,11 @@ def composition(self, inner: np.ndarray, params: np.ndarray) -> float: Returns ------- - float - Scalar composition result + np.ndarray + Composition result """ result = self.composition_torch(torch.from_numpy(inner), torch.from_numpy(params)) - return result.numpy(force=True).item() + return result.numpy(force=True) @abstractmethod def composition_torch(self, inner: torch.Tensor, params: torch.Tensor) -> torch.Tensor: @@ -50,7 +50,7 @@ def composition_torch(self, inner: torch.Tensor, params: torch.Tensor) -> torch. Returns ------- torch.Tensor - Scalar composition result + Composition result """ @@ -152,8 +152,36 @@ class ObjectiveResult: values: List[np.ndarray] variances: Optional[List[np.ndarray]] = None + def __mc_variance( + self, + composition: Composition, + num_samples: int = 1024, + ) -> float: + """Compute the objective variance using Monte Carlo (using fixed base samples). + + Parameters + ---------- + composition : Composition + Composition functional to use. + num_samples : int, optional + Number of Monte Carlo samples, by default 1000. + + Returns + ------- + float + Estimated variance of the objective. + """ + biased = [norm.rvs(loc=0, scale=1) for _ in range(num_samples)] + mc_objectives = [ + composition.composition( + self.values + bias * np.sqrt(self.variances), + self.params, + ) for bias in biased + ] + return np.var(mc_objectives, axis=0) + def scalarise(self, composition: Composition = None) -> float: - """Scalarise the result. + """Scalarise the result under noise-free single-objective optimisation. Parameters ---------- @@ -165,14 +193,30 @@ def scalarise(self, composition: Composition = None) -> float: float Scalarised result. """ - if composition is not None: - return composition.composition(self.values, self.params) - return np.mean(self.values) + if composition is None: + return np.mean(self.values) + return composition.composition(self.values, self.params).item() + + def scalarise_mo(self, composition: Composition = None) -> List[float]: + """Pseudo-scalarise the result under noise-free multi-objective optimisation. + + Parameters + ---------- + composition : Composition, optional + Composition functional to use, by default None. + + Returns + ------- + List[float] + Pseudo-scalarised result. + """ + if composition is None: + return [np.mean(vals) for vals in self.values] + return [val.item() for val in composition.composition(self.values, self.params)] def scalarise_stochastic( self, composition: Composition = None, - num_samples: int = 1024, ) -> Tuple[float, float]: """Scalarise the result. @@ -180,23 +224,40 @@ def scalarise_stochastic( ---------- composition : Composition, optional Composition functional to use, by default None. - num_samples : int, optional - Number of samples to use for Monte Carlo, by default 1024. Returns ------- Tuple[float, float] Scalarised mean and variance. """ - if composition is not None: - # Compute the objective variance using Monte Carlo (using fixed base samples) - biased = [norm.rvs(loc=0, scale=1) for _ in range(num_samples)] - mc_objectives = [ - composition.composition(self.values + bias * np.sqrt(self.variances), self.params) - for bias in biased + if composition is None: + return np.mean(self.values), np.sum(self.variances) + return ( + composition.composition(self.values, self.params).item(), + self.__mc_variance(composition).item() + ) + + def scalarise_mo_stochastic( + self, + composition: Composition = None, + ) -> Tuple[List[float], List[float]]: + """Pseudo-scalarise the result under stochastic multi-objective optimisation. + + Parameters + ---------- + composition : Composition, optional + Composition functional to use, by default None. + + Returns + ------- + List[Tuple[float, float]] + Pseudo-scalarised means and variances. + """ + if composition is None: + return [ + (np.mean(vals), np.sum(vars)) for vals, vars in zip(self.values, self.variances) ] - return composition.composition(self.values, self.params), np.var(mc_objectives) - return np.mean(self.values), np.sum(self.variances) + return composition.composition(self.values, self.params), self.__mc_variance(composition) class GenericObjective(Objective): @@ -207,6 +268,7 @@ def __init__( parameters: ParameterSet, stochastic: bool = False, composition: Composition = None, + num_objectives: int = 1, output_dir: str = None, ) -> None: super().__init__() @@ -214,6 +276,8 @@ def __init__( self.output_dir = output_dir self.stochastic = stochastic self.composition = composition + self.num_objectives = num_objectives + self.multi_objective = num_objectives > 1 self.func_calls = 0 self.begin_time = time.perf_counter() self.__mutex = Lock() @@ -226,9 +290,15 @@ def prepare(self) -> None: # Build header for function calls file with open(os.path.join(self.func_calls_file), 'w', encoding='utf8') as file: file.write(f'{"Start Time /s":>15}\t{"Run Time /s":>15}') - file.write(f'\t{"Objective":>15}') - if self.stochastic: - file.write(f'\t{"Variance":>15}') + if self.multi_objective: + for i in range(self.num_objectives): + file.write(f'\t{"Objective_" + str(i + 1):>15}') + if self.stochastic: + file.write(f'\t{"Variance_" + str(i + 1):>15}') + else: + file.write(f'\t{"Objective":>15}') + if self.stochastic: + file.write(f'\t{"Variance":>15}') for param in self.parameters: file.write(f"\t{param.name:>15}") file.write(f'\t{"Hash":>64}\n') @@ -276,14 +346,23 @@ def __call__(self, values: np.ndarray, concurrent: bool = False) -> ObjectiveRes with open(os.path.join(self.func_calls_file), 'a', encoding='utf8') as file: file.write(f'{begin_time - self.begin_time:>15.8e}\t') file.write(f'{end_time - begin_time:>15.8e}\t') - if self.stochastic: - value, variance = objective_result.scalarise_stochastic(self.composition) - file.write(f'{value:>15.8e}\t{variance:>15.8e}') + if self.multi_objective: + if self.stochastic: + vals, vars = objective_result.scalarise_mo_stochastic(self.composition) + for value, var in zip(vals, vars): + file.write(f'{value:>15.8e}\t{var:>15.8e}\t') + else: + for val in objective_result.scalarise_mo(self.composition): + file.write(f'{val:>15.8e}\t') else: - file.write(f'{objective_result.scalarise(self.composition):>15.8e}') + if self.stochastic: + value, var = objective_result.scalarise_stochastic(self.composition) + file.write(f'{value:>15.8e}\t{var:>15.8e}\t') + else: + file.write(f'{objective_result.scalarise(self.composition):>15.8e}\t') for i, param in enumerate(self.parameters): - file.write(f"\t{param.denormalise(values[i]):>15.6f}") - file.write(f'\t{self.parameters.hash(values)}\n') + file.write(f"{param.denormalise(values[i]):>15.6f}\t") + file.write(f'{self.parameters.hash(values)}\n') return objective_result def plot_best(self) -> List[Figure]: @@ -294,18 +373,26 @@ def plot_best(self) -> List[Figure]: List[Figure] List of figures with the plot. """ - # Find hash associated with the best case - df = pd.read_table(self.func_calls_file) - df.columns = df.columns.str.strip() - min_series = df.iloc[df["Objective"].idxmin()] - call_hash = str(min_series["Hash"]) - # Use the single case plotting utility - figures = self.plot_case(call_hash) - # Also display the best case - print("Best run:") - print(min_series.drop(["Objective", "Hash"])) - print(f"Hash: {call_hash}") - print(f"Objective: {min_series['Objective']:15.8e}") + # Build the objective list + objective_list = ["Objective"] + if self.multi_objective: + objective_list = [f"Objective_{i + 1}" for i in range(self.num_objectives)] + # Plot the best case for each objective + figures = [] + for i, objective in enumerate(objective_list): + # Find hash associated with the best case + df = pd.read_table(self.func_calls_file) + df.columns = df.columns.str.strip() + min_series = df.iloc[df[objective].idxmin()] + call_hash = str(min_series["Hash"]) + # Use the single case plotting utility + options = {'append_title': objective} if self.multi_objective else None + figures += self.plot_case(call_hash, options=options) + # Also display the best case + print(f"Best run{' (' + objective + ')'}:") + print(min_series.drop(objective_list + ["Hash"])) + print(f"Hash: {call_hash}") + print(f"{objective}: {min_series[objective]:15.8e}\n") return figures def get_history(self) -> Dict[str, Dict[str, Any]]: @@ -321,6 +408,24 @@ def get_history(self) -> Dict[str, Dict[str, Any]]: x_axis = df["Start Time /s"] + df["Run Time /s"] params = df[[param.name for param in self.parameters]] param_hash = df["Hash"].to_list() + # Multi-objective case + if self.multi_objective: + values = [df[f"Objective_{i + 1}"] for i in range(self.num_objectives)] + if self.stochastic: + variances = [df[f"Variance_{i + 1}"] for i in range(self.num_objectives)] + return_dict = {} + for i in range(self.num_objectives): + result = { + "time": x_axis.to_numpy(), + "values": values[i], + "params": params.to_numpy(), + "hashes": param_hash, + } + if self.stochastic: + result["variances"] = variances[i] + return_dict[f"Objective_{i + 1}"] = result + return return_dict + # Single objective case result = { "time": x_axis.to_numpy(), "values": df["Objective"].to_numpy(), diff --git a/piglot/objectives/design.py b/piglot/objectives/design.py index c6fdd0e..3eba889 100644 --- a/piglot/objectives/design.py +++ b/piglot/objectives/design.py @@ -126,11 +126,15 @@ def __init__( output_dir: str, stochastic: bool = False, composite: bool = False, + multi_objective: bool = False, ) -> None: super().__init__( parameters, stochastic, - composition=self.__composition(True, stochastic, targets) if composite else None, + composition=( + self.__composition(not multi_objective, stochastic, targets) if composite else None + ), + num_objectives=len(targets) if multi_objective else 1, output_dir=output_dir, ) self.solver = solver @@ -358,4 +362,5 @@ def read( output_dir, stochastic=bool(config.get('stochastic', False)), composite=bool(config.get('composite', False)), + multi_objective=bool(config.get('multi_objective', False)), ) diff --git a/piglot/objectives/fitting.py b/piglot/objectives/fitting.py index ab3ec02..85c1fae 100644 --- a/piglot/objectives/fitting.py +++ b/piglot/objectives/fitting.py @@ -276,15 +276,15 @@ def prepare(self) -> None: def composition( self, - scalarise: bool, + multi_objective: bool, stochastic: bool, ) -> Composition: """Build the composition utility for the design objective. Parameters ---------- - scalarise : bool - Whether we should scalarise the objective. + multi_objective : bool + Whether this is a multi-objective problem. stochastic : bool Whether the objective is stochastic. targets : List[DesignTarget] @@ -296,7 +296,7 @@ def composition( Composition utility for the design objective. """ return ResponseComposition( - scalarise=scalarise, + scalarise=not multi_objective, stochastic=stochastic, weights=[t.weight for t in self.references], reductions=[t.reduction for t in self.references], @@ -463,11 +463,13 @@ def __init__( output_dir: str, stochastic: bool = False, composite: bool = False, + multi_objective: bool = False, ) -> None: super().__init__( parameters, stochastic, - composition=solver.composition(True, stochastic) if composite else None, + composition=solver.composition(multi_objective, stochastic) if composite else None, + num_objectives=len(solver.references) if multi_objective else 1, output_dir=output_dir, ) self.composite = composite @@ -479,7 +481,7 @@ def prepare(self) -> None: self.solver.prepare() # Reset the composition utility if self.composite: - self.composition = self.solver.composition(True, self.stochastic) + self.composition = self.solver.composition(self.multi_objective, self.stochastic) def _objective(self, values: np.ndarray, concurrent: bool = False) -> ObjectiveResult: """Abstract method for objective computation. @@ -592,4 +594,5 @@ def read( output_dir, stochastic=bool(config.get('stochastic', False)), composite=bool(config.get('composite', False)), + multi_objective=bool(config.get('multi_objective', False)), ) diff --git a/piglot/optimiser.py b/piglot/optimiser.py index 06582ae..fe09fa9 100644 --- a/piglot/optimiser.py +++ b/piglot/optimiser.py @@ -278,18 +278,21 @@ def optimise( self.begin_time = time.perf_counter() self._optimise(n_dim, n_iter, bounds, init_shot) elapsed = time.perf_counter() - self.begin_time - # Denormalise best solution - new_solution = self.parameters.denormalise(self.best_solution) + # Denormalise best solution (if any) + new_solution = None + if self.best_solution is not None: + new_solution = np.array(self.parameters.denormalise(self.best_solution)) if verbose: self.pbar.close() print(f'Completed {self.i_iter} iterations in {pretty_time(elapsed)}') print(f'Best loss: {self.best_value:15.8e}') - print('Best parameters') - max_width = max(len(par.name) for par in self.parameters) - for i, par in enumerate(self.parameters): - print(f'- {par.name.rjust(max_width)}: {new_solution[i]:>12.6f}') + if self.best_solution is not None: + print('Best parameters') + max_width = max(len(par.name) for par in self.parameters) + for i, par in enumerate(self.parameters): + print(f'- {par.name.rjust(max_width)}: {new_solution[i]:>12.6f}') # Return the best value - return self.best_value, np.array(new_solution) + return self.best_value, new_solution @abstractmethod def _optimise( @@ -324,8 +327,8 @@ def _optimise( def __update_progress_files( self, i_iter: int, - curr_solution: float, - curr_value: np.ndarray, + curr_solution: np.ndarray, + curr_value: float, extra_info: str, ) -> None: """Update progress on output files. @@ -334,16 +337,18 @@ def __update_progress_files( ---------- i_iter : int Current iteration number. + curr_solution : np.ndarray + Current objective minimiser. curr_value : float Current objective value. - curr_solution : float - Current objective minimiser. extra_info : str Additional information to pass to user. """ elapsed = time.perf_counter() - self.begin_time - denorm_curr = self.parameters.denormalise(curr_solution) - denorm_best = self.parameters.denormalise(self.best_solution) + skip_pars = curr_solution is None + if not skip_pars: + denorm_curr = self.parameters.denormalise(curr_solution) + denorm_best = self.parameters.denormalise(self.best_solution) # Update progress file with open(os.path.join(self.output_dir, "progress"), 'w', encoding='utf8') as file: file.write(f'Iteration: {i_iter}\n') @@ -351,9 +356,10 @@ def __update_progress_files( file.write(f'Best loss: {self.best_value}\n') if extra_info is not None: file.write(f'Optimiser info: {extra_info}\n') - file.write('Best parameters:\n') - for i, par in enumerate(self.parameters): - file.write(f'\t{par.name}: {denorm_best[i]}\n') + if not skip_pars: + file.write('Best parameters:\n') + for i, par in enumerate(self.parameters): + file.write(f'\t{par.name}: {denorm_best[i]}\n') file.write(f'\nElapsed time: {pretty_time(elapsed)}\n') # Update history file with open(os.path.join(self.output_dir, "history"), 'a', encoding='utf8') as file: @@ -362,7 +368,7 @@ def __update_progress_files( file.write(f'{self.best_value:>15.8e}\t') file.write(f'{curr_value:>15.8e}\t') for i, par in enumerate(self.parameters): - file.write(f'{denorm_curr[i]:>15.8f}\t') + file.write('None\t'.rjust(16) if skip_pars else f'{denorm_curr[i]:>15.8f}\t') file.write(f"\t{'-' if extra_info is None else extra_info}") file.write('\n') @@ -382,7 +388,7 @@ def _progress_check( Current iteration number. curr_value : float Current objective value. - curr_solution : float + curr_solution : np.ndarray Current objective minimiser. extra_info : str Additional information to pass to user. diff --git a/piglot/optimisers/botorch/bayes.py b/piglot/optimisers/botorch/bayes.py index 1715543..8c4b7cf 100644 --- a/piglot/optimisers/botorch/bayes.py +++ b/piglot/optimisers/botorch/bayes.py @@ -1,7 +1,9 @@ """Main optimiser classes for using BoTorch with piglot""" -from typing import Tuple, List, Union +from typing import Tuple, List, Union, Type, Dict, Callable from multiprocessing.pool import ThreadPool as Pool +import os import warnings +import dataclasses import numpy as np import torch from scipy.stats import qmc @@ -9,18 +11,103 @@ import botorch from botorch.fit import fit_gpytorch_mll from botorch.optim import optimize_acqf -from botorch.models.model import Model from botorch.models import SingleTaskGP -from botorch.acquisition.acquisition import AcquisitionFunction -from botorch.acquisition import UpperConfidenceBound, qUpperConfidenceBound -from botorch.acquisition import ExpectedImprovement, qExpectedImprovement -from botorch.acquisition import ProbabilityOfImprovement, qProbabilityOfImprovement -from botorch.acquisition.objective import GenericMCObjective -from botorch.acquisition.knowledge_gradient import qKnowledgeGradient +from botorch.models.model import Model +from botorch.models.converter import batched_to_model_list +from botorch.acquisition import ( + AcquisitionFunction, + UpperConfidenceBound, + qUpperConfidenceBound, + ExpectedImprovement, + qExpectedImprovement, + ProbabilityOfImprovement, + qProbabilityOfImprovement, + LogExpectedImprovement, + qLogExpectedImprovement, + qNoisyExpectedImprovement, + qLogNoisyExpectedImprovement, + qKnowledgeGradient, +) +from botorch.acquisition.objective import GenericMCObjective, UnstandardizePosteriorTransform +from botorch.acquisition.multi_objective import ( + qExpectedHypervolumeImprovement, + qNoisyExpectedHypervolumeImprovement, + qHypervolumeKnowledgeGradient, +) +from botorch.utils.multi_objective.box_decompositions.non_dominated import ( + FastNondominatedPartitioning, + NondominatedPartitioning, +) +from botorch.acquisition.multi_objective.logei import ( + qLogExpectedHypervolumeImprovement, + qLogNoisyExpectedHypervolumeImprovement, +) +from botorch.acquisition.multi_objective.objective import GenericMCMultiOutputObjective from botorch.sampling import SobolQMCNormalSampler -from piglot.objective import Objective, GenericObjective, ObjectiveResult -from piglot.optimisers.botorch.dataset import BayesDataset +from piglot.objective import ( + Objective, + GenericObjective, + ObjectiveResult, +) from piglot.optimiser import Optimiser +from piglot.optimisers.botorch.dataset import BayesDataset + + +AVAILABLE_ACQUISITIONS: Dict[str, Type[AcquisitionFunction]] = { + # Analytical acquisitions + 'ucb': UpperConfidenceBound, + 'ei': ExpectedImprovement, + 'logei': LogExpectedImprovement, + 'pi': ProbabilityOfImprovement, + # Quasi-Monte Carlo acquisitions + 'qucb': qUpperConfidenceBound, + 'qei': qExpectedImprovement, + 'qlogei': qLogExpectedImprovement, + 'qpi': qProbabilityOfImprovement, + 'qkg': qKnowledgeGradient, + # Analytical and quasi-Monte Carlo acquisitions for noisy problems + 'qnei': qNoisyExpectedImprovement, + 'qlognei': qLogNoisyExpectedImprovement, + # Multi-objective acquisitions + 'qehvi': qExpectedHypervolumeImprovement, + 'qnehvi': qNoisyExpectedHypervolumeImprovement, + 'qlogehvi': qLogExpectedHypervolumeImprovement, + 'qlognehvi': qLogNoisyExpectedHypervolumeImprovement, + 'qhvkg': qHypervolumeKnowledgeGradient +} + + +def default_acquisition( + composite: bool, + multi_objective: bool, + noisy: bool, + q: int, +) -> str: + """Return the default acquisition function for the given optimisation problem. + + Parameters + ---------- + composite : bool, optional + Whether the optimisation problem is a composition. + multi_objective : bool, optional + Whether the optimisation problem is multi-objective. + noisy : bool, optional + Whether the optimisation problem is noisy. + q : int, optional + Number of candidates to generate. + + Returns + ------- + str + Name of the default acquisition function. + """ + if multi_objective: + return 'qlognehvi' if noisy else 'qlogehvi' + if noisy: + return 'qlognei' + if composite or q > 1: + return 'qlogei' + return 'logei' def fit_mll_pytorch_loop(mll: ExactMarginalLogLikelihood, n_iters: int = 100) -> None: @@ -54,7 +141,7 @@ def __init__( objective: Objective, n_initial: int = 8, n_test: int = 0, - acquisition: str = 'ucb', + acquisition: str = None, beta: float = 1.0, noisy: float = False, q: int = 1, @@ -65,19 +152,30 @@ def __init__( ) -> None: if not isinstance(objective, GenericObjective): raise RuntimeError("Bayesian optimiser requires a GenericObjective") + if bool(noisy) and objective.stochastic: + warnings.warn("Noisy setting with stochastic objective - ignoring objective variance") super().__init__('BoTorch', objective) self.objective = objective self.n_initial = n_initial self.acquisition = acquisition self.beta = beta - self.noisy = bool(noisy) + self.noisy = bool(noisy) or objective.stochastic self.q = q self.seed = seed self.load_file = load_file self.export = export self.n_test = n_test self.device = device - if self.acquisition not in ('ucb', 'ei', 'pi', 'kg', 'qucb', 'qei', 'qpi', 'qkg'): + self.partitioning: FastNondominatedPartitioning = None + self.ref_point: torch.Tensor = None + if acquisition is None: + self.acquisition = default_acquisition( + objective.composition, + objective.multi_objective, + self.noisy, + self.q, + ) + elif self.acquisition not in AVAILABLE_ACQUISITIONS: raise RuntimeError(f"Unkown acquisition function {self.acquisition}") if not self.acquisition.startswith('q') and self.q != 1: raise RuntimeError("Can only use q != 1 for quasi-Monte Carlo acquisitions") @@ -92,16 +190,6 @@ def _validate_problem(self, objective: Objective) -> None: Objective to optimise """ - def _acq_func( - self, - dataset: BayesDataset, - model: Model, - n_dim: int, - ) -> Tuple[AcquisitionFunction, int, int]: - if self.objective.composition: - return self._build_acquisition_composite(dataset, model, n_dim) - return self._build_acquisition_scalar(dataset, model, n_dim) - def _build_model(self, std_dataset: BayesDataset) -> Model: # Fetch data params = std_dataset.params @@ -118,7 +206,7 @@ def _build_model(self, std_dataset: BayesDataset) -> Model: except botorch.exceptions.ModelFittingError: warnings.warn('Optimisation of the MLL failed, falling back to PyTorch optimiser') fit_mll_pytorch_loop(mll) - return model + return batched_to_model_list(model) if self.objective.multi_objective else model def _get_candidates( self, @@ -184,7 +272,7 @@ def _get_random_points( return [point * (bound[:, 1] - bound[:, 0]) + bound[:, 0] for point in points] def _result_to_dataset(self, result: ObjectiveResult) -> Tuple[np.ndarray, np.ndarray]: - if self.objective.composition: + if self.objective.composition or self.objective.multi_objective: values = result.values variances = result.variances if self.objective.stochastic else np.zeros_like(values) else: @@ -200,48 +288,51 @@ def _value_to_scalar( value: Union[np.ndarray, torch.Tensor], params: Union[np.ndarray, torch.Tensor], ) -> float: + if self.objective.multi_objective: + raise RuntimeError("Cannot convert multi-objective value to scalar") if self.objective.composition: if isinstance(value, np.ndarray): return self.objective.composition.composition(value, params) return self.objective.composition.composition_torch(value, params).cpu().item() return value.item() - def _build_acquisition_scalar( + def _get_composition( self, dataset: BayesDataset, - model: Model, - n_dim: int, - ) -> Tuple[AcquisitionFunction, int, int]: - # Default values for multi-restart optimisation - num_restarts = 12 - raw_samples = max(256, 16 * n_dim * n_dim) - # Delegate acquisition building - best = torch.min(dataset.values).item() - mc_objective = GenericMCObjective(lambda vals, X: -vals.squeeze(-1)) - sampler = SobolQMCNormalSampler(torch.Size([512]), seed=self.seed) - if self.acquisition == 'ucb': - acq = UpperConfidenceBound(model, self.beta, maximize=False) - elif self.acquisition == 'qucb': - acq = qUpperConfidenceBound(model, self.beta, sampler=sampler, objective=mc_objective) - elif self.acquisition == 'ei': - acq = ExpectedImprovement(model, best, maximize=False) - elif self.acquisition == 'qei': - acq = qExpectedImprovement(model, best, sampler=sampler, objective=mc_objective) - elif self.acquisition == 'pi': - acq = ProbabilityOfImprovement(model, best, maximize=False) - elif self.acquisition == 'qpi': - acq = qProbabilityOfImprovement(model, best, sampler=sampler, objective=mc_objective) - elif self.acquisition in ('kg', 'qkg'): - # Knowledge gradient is quite expensive: use less samples - num_restarts = 6 - raw_samples = 128 - sampler = SobolQMCNormalSampler(torch.Size([64]), seed=self.seed) - acq = qKnowledgeGradient(model, sampler=sampler, objective=mc_objective) - else: - raise RuntimeError(f"Unknown acquisition {self.acquisition}") - return acq, num_restarts, raw_samples + ) -> Callable[[torch.Tensor, torch.Tensor], torch.Tensor]: + _, y_avg, y_std = dataset.get_obervation_stats() + # Just de-standardise and negate the values if no composition is available + if not self.objective.composition: + return lambda vals, X: -dataset.expand_observations(vals * y_std + y_avg).squeeze(-1) + # Otherwise, use the composition function + return lambda vals, X: -self.objective.composition.composition_torch( + dataset.expand_observations(vals * y_std + y_avg), + dataset.lbounds + (dataset.ubounds - dataset.lbounds) * X, + ) + + def _update_mo_data(self, dataset: BayesDataset) -> float: + # Compute reference point + # TODO: check how to properly handle the ref_point and best_value + composition = self._get_composition(dataset) + std_dataset = dataset.standardised() + # TODO: check how to handle this in non-composite settings + y_points = composition(std_dataset.values, std_dataset.params) + self.ref_point = torch.min(y_points, dim=0).values + # Update partitioning and Pareto front + self.partitioning = FastNondominatedPartitioning(self.ref_point, Y=y_points) + hypervolume = self.partitioning.compute_hypervolume().item() + pareto = self.partitioning.pareto_Y + # Dump the Pareto front to a file + with open(os.path.join(self.output_dir, "pareto_front"), 'w', encoding='utf8') as file: + file.write('\t'.join( + [f'{"Objective_" + str(i + 1):>15}' for i in range(pareto.shape[1])]) + '\n' + ) + for point in pareto: + file.write('\t'.join([f'{-x.item():>15.8f}' for x in point]) + '\n') + # TODO: after updating the parameter set, write the parameters and hash for each point + return -np.log(hypervolume) - def _build_acquisition_composite( + def _acq_func( self, dataset: BayesDataset, model: Model, @@ -250,29 +341,89 @@ def _build_acquisition_composite( # Default values for multi-restart optimisation num_restarts = 12 raw_samples = max(256, 16 * n_dim * n_dim) + sampler = SobolQMCNormalSampler(torch.Size([512]), seed=self.seed) + # Build composite MC objective + composition = self._get_composition(dataset) + mc_objective = ( + GenericMCMultiOutputObjective(composition) + if self.objective.multi_objective else GenericMCObjective(composition) + ) + + # Find best value for the acquisition + std_dataset = dataset.standardised() _, y_avg, y_std = dataset.get_obervation_stats() - mc_objective = GenericMCObjective( - lambda vals, X: -self.objective.composition.composition_torch( - dataset.expand_observations(vals * y_std + y_avg), - dataset.lbounds + (dataset.ubounds - dataset.lbounds) * X, + best = torch.max(composition(std_dataset.values, std_dataset.params)).item() + + # Delegate to the correct acquisition function + # The arguments for each acquisition are different, so we group them into families + acq_class = AVAILABLE_ACQUISITIONS[self.acquisition] + # Analytical acquisitions + if self.acquisition == 'ucb': + acq = acq_class( + model, + self.beta, + maximize=False, + posterior_transform=UnstandardizePosteriorTransform(y_avg, y_std), ) - ) - # Delegate acquisition building - best = torch.max(dataset.values).item() - sampler = SobolQMCNormalSampler(torch.Size([512]), seed=self.seed) - if self.acquisition in ('ucb', 'qucb'): - acq = qUpperConfidenceBound(model, self.beta, sampler=sampler, objective=mc_objective) - elif self.acquisition in ('ei', 'qei'): - acq = qExpectedImprovement(model, best, sampler=sampler, objective=mc_objective) - elif self.acquisition in ('pi', 'qpi'): - acq = qProbabilityOfImprovement(model, best, sampler=sampler, objective=mc_objective) - elif self.acquisition in ('kg', 'qkg'): + elif self.acquisition in ('ei', 'logei', 'pi'): + acq = acq_class( + model, + best, + maximize=False, + posterior_transform=UnstandardizePosteriorTransform(y_avg, y_std), + ) + # Quasi-Monte Carlo acquisitions + elif self.acquisition == 'qucb': + acq = acq_class( + model, + self.beta, + sampler=sampler, + objective=mc_objective, + ) + elif self.acquisition in ('qei', 'qlogei', 'qpi'): + acq = acq_class( + model, + best, + sampler=sampler, + objective=mc_objective, + ) + elif self.acquisition in ('qnei', 'qlognei'): + acq = acq_class( + model, + std_dataset.params, + sampler=sampler, + objective=mc_objective, + ) + elif self.acquisition == 'qkg': # Knowledge gradient is quite expensive: use less samples num_restarts = 6 raw_samples = 128 sampler = SobolQMCNormalSampler(torch.Size([64]), seed=self.seed) - acq = qKnowledgeGradient(model, sampler=sampler, objective=mc_objective) + acq = acq_class(model, sampler=sampler, objective=mc_objective) + # Quasi-Monte Carlo multi-objective acquisitions + elif self.acquisition in ('qehvi', 'qlogehvi'): + acq = acq_class( + model, + self.ref_point, + self.partitioning, + objective=mc_objective, + sampler=sampler, + ) + elif self.acquisition in ('qnehvi', 'qlognehvi'): + acq = acq_class( + model, + self.ref_point, + std_dataset.params, + objective=mc_objective, + sampler=sampler, + ) + elif self.acquisition == 'qhvkg': + acq = acq_class( + model, + self.ref_point, + objective=mc_objective, + ) else: raise RuntimeError(f"Unknown acquisition {self.acquisition}") return acq, num_restarts, raw_samples @@ -337,8 +488,13 @@ def _optimise( test_dataset.push(test_points[i], values, variances) # Find current best point to return to the driver - best_params, best_result = dataset.min(self._value_to_scalar) - self._progress_check(0, self._value_to_scalar(best_result, best_params), best_params) + if self.objective.multi_objective: + best_value = self._update_mo_data(dataset) + best_params = None + else: + best_params, best_result = dataset.min(self._value_to_scalar) + best_value = self._value_to_scalar(best_result, best_params) + self._progress_check(0, best_value, best_params) # Optimisation loop for i_iter in range(n_iter): @@ -356,22 +512,41 @@ def _optimise( results = self._eval_candidates(candidates) # Update dataset - values_batch = [] + results_batch = [] for i, result in enumerate(results): values, variances = self._result_to_dataset(result) - values_batch.append(self._value_to_scalar(values, candidates[i, :])) + results_batch.append(values) dataset.push(candidates[i, :], values, variances) # Find best observation for this batch - best_idx = np.argmin(values_batch) - best_value = values_batch[best_idx] - best_params = candidates[best_idx, :] - - # Update progress (with CV data if available) - extra = f'Val. {cv_error:6.4}' if cv_error else None + if self.objective.multi_objective: + best_value = self._update_mo_data(dataset) + best_params = None + else: + values_batch = [ + self._value_to_scalar(values, candidates[i, :]) + for i, values in enumerate(results_batch) + ] + best_idx = np.argmin(values_batch) + best_value = values_batch[best_idx] + best_params = candidates[best_idx, :] + + # Update progress (with extra data if available) + extra = None + if cv_error: + extra = f'Val. {cv_error:6.4}' + if self.objective.multi_objective: + extra += f' Num Pareto: {self.partitioning.pareto_Y.shape[0]}' + elif self.objective.multi_objective: + extra = f'Num Pareto: {self.partitioning.pareto_Y.shape[0]}' if self._progress_check(i_iter + 1, best_value, best_params, extra_info=extra): break # Return optimisation result - best_params, best_result = dataset.min(self._value_to_scalar) - return best_params, self._value_to_scalar(best_result, best_params) + if self.objective.multi_objective: + best_result = self._update_mo_data(dataset) + best_params = None + else: + best_params, best_result = dataset.min(self._value_to_scalar) + best_result = self._value_to_scalar(best_result, best_params) + return best_params, best_result diff --git a/piglot/optimisers/botorch/dataset.py b/piglot/optimisers/botorch/dataset.py index 2336926..1126208 100644 --- a/piglot/optimisers/botorch/dataset.py +++ b/piglot/optimisers/botorch/dataset.py @@ -115,10 +115,12 @@ def get_obervation_stats(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor def expand_observations(self, reduced: torch.Tensor) -> torch.Tensor: """Expand reduced observations to full size. + Parameters ---------- reduced : torch.Tensor Reduced observations. + Returns ------- torch.Tensor diff --git a/piglot/utils/composition/responses.py b/piglot/utils/composition/responses.py index efa7c60..6a3a8a2 100644 --- a/piglot/utils/composition/responses.py +++ b/piglot/utils/composition/responses.py @@ -342,26 +342,6 @@ def transform(self, params: np.ndarray, responses: List[List[OutputResult]]) -> self.concat.concat(variances) if self.stochastic else None, ) - def composition(self, inner: np.ndarray, params: np.ndarray) -> np.ndarray: - """Compute the outer function of the composition. - - Parameters - ---------- - inner : np.ndarray - Return value from the inner function. - params : np.ndarray - Parameters for the given responses. - - Returns - ------- - np.ndarray - Composition result. - """ - return self.composition_torch( - torch.from_numpy(inner), - torch.from_numpy(params), - ).numpy(force=True) - def composition_torch(self, inner: torch.Tensor, params: torch.Tensor) -> torch.Tensor: """Compute the outer function of the composition with gradients.