Merge pull request #155 from joezuntz/repeat-maxlike

Add repeats option to maxlike sampler
joezuntz · Dec 5, 2024 · b99ed1b · b99ed1b
2 parents b4a3973 + 711efc1
commit b99ed1b
Show file tree

Hide file tree

Showing 6 changed files with 342 additions and 74 deletions.
diff --git a/cosmosis/output/astropy_output.py b/cosmosis/output/astropy_output.py
@@ -31,3 +31,6 @@ def _write_final(self, key, value, comment):
     def _write_comment(self, comment):
         self.table.meta["comments"] = self.table.meta.get("comments", []) + [comment]
 
+    def reset_to_chain_start(self):
+         self.table.remove_rows(slice(None))
+
diff --git a/cosmosis/output/in_memory_output.py b/cosmosis/output/in_memory_output.py
@@ -38,5 +38,8 @@ def from_options(cls, options, resume=False):
     @classmethod
     def load_from_options(cls, options):
         raise ValueError("No output was saved from this run")
+
+    def reset_to_chain_start(self):
+        self.rows = []
 
 
diff --git a/cosmosis/samplers/maxlike/maxlike_sampler.py b/cosmosis/samplers/maxlike/maxlike_sampler.py
@@ -1,59 +1,132 @@
-from .. import Sampler
+from .. import ParallelSampler
 from ...runtime import logs
 import numpy as np
+import scipy.optimize
+import warnings
 
+# The likelihood function we wish to optimize.
+# Not that we are minimizing the negative of the likelihood/posterior
+def likefn(p_in):
+    global sampler
+    # Check the normalization
+    if (not np.all(p_in>=0)) or (not np.all(p_in<=1)):
+        return np.inf
+    p = sampler.pipeline.denormalize_vector(p_in)
+    r = sampler.pipeline.run_results(p)
+    if sampler.max_posterior:
+        return -r.post
+    else:
+        return -r.like
+
+def run_optimizer(start_vector):
+    global sampler
+    return sampler.run_optimizer(start_vector)
+
+class MaxlikeSampler(ParallelSampler):
+    parallel_output = False
+    supports_resume = False # TODO: support resuming
 
-class MaxlikeSampler(Sampler):
     sampler_outputs = [("prior", float), ("like", float), ("post", float)]
 
     def config(self):
+        global sampler
+        sampler = self
         self.tolerance = self.read_ini("tolerance", float, 1e-3)
         self.maxiter = self.read_ini("maxiter", int, 1000)
         self.output_ini = self.read_ini("output_ini", str, "")
         self.output_cov = self.read_ini("output_covmat", str, "")
         self.method = self.read_ini("method",str,"Nelder-Mead")
         self.max_posterior = self.read_ini("max_posterior", bool, False)
-
-        if self.max_posterior:
-            logs.overview("------------------------------------------------")
-            logs.overview("NOTE: Running optimizer in **max-posterior** mode:")
-            logs.overview("NOTE: Will maximize the combined likelihood and prior")
-            logs.overview("------------------------------------------------")
+        self.repeats = self.read_ini("repeats", int, 1)
+        if self.repeats == 1:
+            nsteps = 1
+        elif self.pool is not None:
+            nsteps = self.pool.size
         else:
-            logs.overview("--------------------------------------------------")
-            logs.overview("NOTE: Running optimizer in **max-like** mode:")
-            logs.overview("NOTE: not including the prior, just the likelihood.")
-            logs.overview("NOTE: Set the parameter max_posterior=T to change this.")
-            logs.overview("NOTE: This won't matter unless you set some non-flat")
-            logs.overview("NOTE: priors in a separate priors file.")
-            logs.overview("--------------------------------------------------")
+            nsteps = 1
+        self.nsteps = self.read_ini("nsteps", int, nsteps)
+        start_method = self.read_ini("start_method", str, "")
 
-        self.converged = False
+        if self.repeats > 1:
+            start_method_is_random = start_method in ["prior", "cov", "chain"]
+            have_previous_peak = self.distribution_hints.has_peak()
+            have_previous_cov = self.distribution_hints.has_cov()
 
-    def execute(self):
-        import scipy.optimize
-
-        def likefn(p_in):
-            #Check the normalization
-            if (not np.all(p_in>=0)) or (not np.all(p_in<=1)):
-                return np.inf
-            p = self.pipeline.denormalize_vector(p_in)
-            r = self.pipeline.run_results(p)
+            if have_previous_peak and not have_previous_cov:
+                raise ValueError("You have set repeats>1 in maxlike, and have chained multiple samplers together. "
+                                 "But the previous sampler(s) did not save a covariance matrix. "
+                                 "So we have no way to generate multiple starting points. "
+                                 )
+            elif not start_method_is_random:
+                raise ValueError(f"You set the repeats parameter in the max-like sampler to {self.repeats}"
+                                 " but to you this you must also set a start method to choose a new starting point"
+                                 " each repeat.  You can set start_method to 'prior' to sample from the prior,"
+                                 " or set it to 'cov' or 'chain' and also set start_input to a file to load one"
+                                 " either a covariance matrix or a chain of samples."
+                                 )
+        if self.is_master():
             if self.max_posterior:
-                return -r.post
+                logs.overview("------------------------------------------------")
+                logs.overview("NOTE: Running optimizer in **max-posterior** mode:")
+                logs.overview("NOTE: Will maximize the combined likelihood and prior")
+                logs.overview("------------------------------------------------")
             else:
-                return -r.like
-            return -like
+                logs.overview("--------------------------------------------------")
+                logs.overview("NOTE: Running optimizer in **max-like** mode:")
+                logs.overview("NOTE: not including the prior, just the likelihood.")
+                logs.overview("NOTE: Set the parameter max_posterior=T to change this.")
+                logs.overview("NOTE: This won't matter unless you set some non-flat")
+                logs.overview("NOTE: priors in a separate priors file.")
+                logs.overview("--------------------------------------------------")
 
-        # starting position in the normalized space.  This will be taken from
-        # a previous sampler if available, or the values file if not.
-        start_vector = self.pipeline.normalize_vector(self.start_estimate())
-        bounds = [(0.0, 1.0) for p in self.pipeline.varied_params]
+        self.best_fit_results = []
+
+    def save_final_outputs(self, best_fit_results, final=False):
+
+        # This can happen if the user ctrl'c's the run before any results are saved
+        if not best_fit_results:
+            return
 
-        # check that the starting position is a valid point
-        start_like = likefn(start_vector)
-        if not np.isfinite(start_like):
-            raise RuntimeError('invalid starting point for maxlike')
+        # Sort the repeated best-fit estimates by increasing posterior or likelihood
+        if self.max_posterior:
+            best_fit_results.sort(key=lambda x: x.post)
+        else:
+            best_fit_results.sort(key=lambda x: x.like)
+
+        # Save all the results to the main chain output file.
+        # We will overwrite previous sets of results in the file here
+        self.output.reset_to_chain_start()
+        for results in best_fit_results:
+            self.output.parameters(results.vector, results.extra, results.prior, results.like, results.post)
+
+        # Get the overall best-fit results
+        results = best_fit_results[-1]
+
+        # Also if requested, approximate the covariance matrix with the
+        # inverse of the Hessian matrix.
+        # For a gaussian posterior this is exact.
+        if results.covmat is None:
+            if self.output_cov:
+               warnings.warn("Sorry - the optimization method you chose does not return a covariance (or Hessian) matrix")
+        else:
+            if self.output_cov:
+                np.savetxt(self.output_cov, results.covmat)
+
+        # We only want to update the distribution hints at the very end
+        if final:
+            # These values are used by subsequent samplers, if you chain
+            # some of them together
+            self.distribution_hints.set_peak(results.vector, results.post)
+            if results.covmat is not None:
+                self.distribution_hints.set_cov(results.covmat)
+
+
+    def run_optimizer(self, inputs):
+        rank = self.pool.rank if self.pool is not None else 0
+        start_vector, repeat_index = inputs
+        start_vector_denorm = self.pipeline.denormalize_vector(start_vector)
+        logs.overview(f"[Rank {rank}] Starting from point: {start_vector_denorm}")
+        bounds = [(0.0, 1.0) for p in self.pipeline.varied_params]
 
         if self.method.lower() == "bobyqa":
             # use the specific pybobyqa minimizer
@@ -69,58 +142,123 @@ def likefn(p_in):
                 "rhobeg": 0.1,
                 "rhoend": self.tolerance,
             }
-            result = pybobyqa.solve(likefn, start_vector, **kw)
-            opt_norm = result.x
+            optimizer_result = pybobyqa.solve(likefn, start_vector, **kw)
+            opt_norm = optimizer_result.x
         else:
             # Use scipy mainimizer instead
-            result = scipy.optimize.minimize(likefn, start_vector, method=self.method,
+            optimizer_result = scipy.optimize.minimize(likefn, start_vector, method=self.method,
             jac=False, tol=self.tolerance,
             options={'maxiter':self.maxiter, 'disp':True})
 
-            opt_norm = result.x
+            opt_norm = optimizer_result.x
 
         opt = self.pipeline.denormalize_vector(opt_norm)
 
-        #Some output - first log the parameters to the screen.
+        # Generate a results object containing the best-fit parameters,
+        # their likelihood, posterior, prior, and derived parameters
         results = self.pipeline.run_results(opt)
+
+        # Log some info to the screen
+        par_text = '   '.join(str(x) for x in opt)
+        logs.overview(f"\n\n[Rank {rank}] Optimization run {repeat_index+1} of {self.repeats}")
         if self.max_posterior:
-            logs.overview("Best fit (by posterior):\n%s"%'   '.join(str(x) for x in opt))
+            logs.overview(f"[Rank {rank}] Best fit (by posterior): {par_text}\n%s")
         else:
-            logs.overview("Best fit (by likelihood):\n%s"%'   '.join(str(x) for x in opt))
-        logs.overview("Posterior: {}\n".format(results.post))
-        logs.overview("Likelihood: {}\n".format(results.like))
+            logs.overview(f"[Rank {rank}] Best fit (by likelihood):\n{par_text}")
+        logs.overview(f"[Rank {rank}] Posterior: {results.post}")
+        logs.overview(f"[Rank {rank}] Likelihood: {results.like}\n")
 
-        #Next save them to the proper table file
-        self.output.parameters(opt, results.extra, results.prior, results.like, results.post)
+        # Also attach any covariance matrix saved by the sampler to the results
+        # object, in an ad-hoc way
+        if hasattr(optimizer_result, 'hess_inv'):
+            if self.method == "L-BFGS-B":
+                results.covmat = self.pipeline.denormalize_matrix(optimizer_result.hess_inv.todense())
+            else:
+                results.covmat = self.pipeline.denormalize_matrix(optimizer_result.hess_inv)
+        elif hasattr(optimizer_result, 'hess'):
+            results.covmat = self.pipeline.denormalize_matrix(np.linalg.inv(optimizer_result.hess))
+        else:
+            results.covmat = None
 
-        #If requested, create a new ini file for the
-        #best fit.
-        if self.output_ini:
-          self.pipeline.create_ini(opt, self.output_ini)
+        return results
+
+    def choose_start(self, n):
+        if n == 1:
+            # we just want a single starting point. 
+            # So we can use the basic one from the values file or
+            # previous sampler; we don't have to worry about making
+            # it randomized.
 
-        self.distribution_hints.set_peak(opt, results.post)
+            # It's also possible that this is just the last of our repeats
+            # to be done, but in that case it's also okay to just use the
+            # basic starting point, since we won't already have used it
+            start_vector_denorm = self.start_estimate()
+            start_vector = self.pipeline.normalize_vector(start_vector_denorm)
 
-        #Also if requested, approximate the covariance matrix with the 
-        #inverse of the Hessian matrix.
-        #For a gaussian likelihood this is exact.
-        covmat = None
-        if hasattr(result, 'hess_inv'):
-            if self.method == "L-BFGS-B":
-                covmat = self.pipeline.denormalize_matrix(result.hess_inv.todense())
-            else:
-                covmat = self.pipeline.denormalize_matrix(result.hess_inv)
-        elif hasattr(result, 'hess'):
-            covmat = self.pipeline.denormalize_matrix(np.linalg.inv(result.hess_inv))
+            # If that is invalid we will raise an error
+            if not np.isfinite(likefn(start_vector)):
+                raise RuntimeError("Invalid starting point for maxlike")
+
+            start_vectors = np.array([start_vector]) # 1 x n array
 
-        if covmat is None:
-            if self.output_cov:
-               logs.error("Sorry - the optimization method you chose does not return a covariance (or Hessian) matrix")
         else:
-            if self.output_cov:
-                np.savetxt(self.output_cov, covmat)
-            self.distribution_hints.set_cov(covmat)
+            start_vectors = np.zeros((n, self.pipeline.nvaried))
+            for i in range(n):
+                # If we are taking a random starting point then there is a chance it will randomly
+                # be invalid. So we should try a 20 times to get a valid one.
+                for _ in range(20):
+                    start_vector_denorm = self.start_estimate(prefer_random=True)
+                    start_vector = self.pipeline.normalize_vector(start_vector_denorm)
+                    if np.isfinite(likefn(start_vector)):
+                        break
+                else:
+                    raise RuntimeError("Could not find a valid random starting point for maxlike in 20 tries")
+                start_vectors[i] = start_vector
+
+        return start_vectors
+
+    def execute(self):
+        # Figure out how many steps we need to do
+        # 
+        ndone = len(self.best_fit_results)
+        if ndone + self.nsteps > self.repeats:
+            n = self.repeats - ndone
+        else:
+            n = self.nsteps
+
+
+        # Choose a starting point. If we are repeating our optimization we will need
+        # multiple starting points. Otherwise we use a fixed one
+        starting_points = self.choose_start(n)
+
+        if self.pool is None:
+            # serial mode. Just do everything on this process.
+            # this allows us to also do a nice thing where if we
+            # get part way through the results and then the user
+            # ctrl-c's then we can still output partial results
+            collected_results = []
+            for i, start_vector in enumerate(starting_points):
+                try:
+                    results = self.run_optimizer((start_vector, ndone + i))
+                    collected_results.append(results)
+                except KeyboardInterrupt:
+                    # If we get a ctrl-c we should save the current best fit
+                    # and then exit.
+                    # Otherwise report what we are doing. It should be basically instant
+                    # so it shouldn't annoy anyone
+                    logs.overview("Keyboard interrupt - saving current best fit, if any finished")
+                    self.save_final_outputs(collected_results, final=True)
+                    raise
+        else:
+            inputs = [(start_vector, ndone + i) for i, start_vector in enumerate(starting_points)]
+            collected_results = self.pool.map(run_optimizer, inputs)
+
+        # Add this to our list of best-fits
+        self.best_fit_results.extend(collected_results)
 
-        self.converged = True
+        # we re-save the final outputs each time, rewinding the file
+        # to overwrite the previous ones, so we can re-sort each time.
+        self.save_final_outputs(self.best_fit_results, final=self.is_converged()    )
 
     def is_converged(self):
-        return self.converged
+        return len(self.best_fit_results) >= self.repeats
diff --git a/cosmosis/samplers/maxlike/sampler.yaml b/cosmosis/samplers/maxlike/sampler.yaml
@@ -43,6 +43,8 @@ explanation: >
 
     - trust-ncg
 
+    - bobyqa (requires pybobyqa)
+
 
     Each has different (dis)advantages, and which works best will depend on your
     particular application.  The default in CosmoSIS is Nelder-Mead.
@@ -66,4 +68,8 @@ params:
     maxiter: (integer; default=1000) Maximum number of iterations of the sampler
     output_ini: "(string; default='') if present, save the resulting parameters to a new ini file with this name"
     output_covmat: "(string; default='') if present and the sampler supports it, save the estimated covariance to this file"
-    max_posterior: "(bool; default=False) find the max a posteriori point instead of max like (includes the priors as well)"
+    max_posterior: "(bool; default=False) find the max a posteriori point instead of max like (includes the priors as well)"
+    repeats: "(integer; default=1) number of times to repeat the minimization from different starting points"
+    start_method: "(string; default='') method to use to generate starting points.  Options are 'prior' and 'chain', 'cov', or blank for default start'"
+    start_input: "(string; default='') input file to use for starting points if start_method is 'chain' or 'cov'"
+    nstep: "(integer; default=1) number of steps to output at once"