Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add repeats option to maxlike sampler #155

Merged
merged 5 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cosmosis/output/astropy_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@
def _write_comment(self, comment):
self.table.meta["comments"] = self.table.meta.get("comments", []) + [comment]

def reset_to_chain_start(self):
self.table.remove_rows(slice(None))

Check warning on line 35 in cosmosis/output/astropy_output.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/output/astropy_output.py#L35

Added line #L35 was not covered by tests

3 changes: 3 additions & 0 deletions cosmosis/output/in_memory_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,8 @@ def from_options(cls, options, resume=False):
@classmethod
def load_from_options(cls, options):
raise ValueError("No output was saved from this run")

def reset_to_chain_start(self):
self.rows = []


280 changes: 209 additions & 71 deletions cosmosis/samplers/maxlike/maxlike_sampler.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,132 @@
from .. import Sampler
from .. import ParallelSampler
from ...runtime import logs
import numpy as np
import scipy.optimize
import warnings

# The likelihood function we wish to optimize.
# Not that we are minimizing the negative of the likelihood/posterior
def likefn(p_in):
global sampler
# Check the normalization
if (not np.all(p_in>=0)) or (not np.all(p_in<=1)):
return np.inf

Check warning on line 13 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L13

Added line #L13 was not covered by tests
p = sampler.pipeline.denormalize_vector(p_in)
r = sampler.pipeline.run_results(p)
if sampler.max_posterior:
return -r.post
else:
return -r.like

def run_optimizer(start_vector):
global sampler
return sampler.run_optimizer(start_vector)

Check warning on line 23 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L23

Added line #L23 was not covered by tests

class MaxlikeSampler(ParallelSampler):
parallel_output = False
supports_resume = False # TODO: support resuming

class MaxlikeSampler(Sampler):
sampler_outputs = [("prior", float), ("like", float), ("post", float)]

def config(self):
global sampler
sampler = self
self.tolerance = self.read_ini("tolerance", float, 1e-3)
self.maxiter = self.read_ini("maxiter", int, 1000)
self.output_ini = self.read_ini("output_ini", str, "")
self.output_cov = self.read_ini("output_covmat", str, "")
self.method = self.read_ini("method",str,"Nelder-Mead")
self.max_posterior = self.read_ini("max_posterior", bool, False)

if self.max_posterior:
logs.overview("------------------------------------------------")
logs.overview("NOTE: Running optimizer in **max-posterior** mode:")
logs.overview("NOTE: Will maximize the combined likelihood and prior")
logs.overview("------------------------------------------------")
self.repeats = self.read_ini("repeats", int, 1)
if self.repeats == 1:
nsteps = 1
elif self.pool is not None:
nsteps = self.pool.size

Check warning on line 44 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L44

Added line #L44 was not covered by tests
else:
logs.overview("--------------------------------------------------")
logs.overview("NOTE: Running optimizer in **max-like** mode:")
logs.overview("NOTE: not including the prior, just the likelihood.")
logs.overview("NOTE: Set the parameter max_posterior=T to change this.")
logs.overview("NOTE: This won't matter unless you set some non-flat")
logs.overview("NOTE: priors in a separate priors file.")
logs.overview("--------------------------------------------------")
nsteps = 1
self.nsteps = self.read_ini("nsteps", int, nsteps)
start_method = self.read_ini("start_method", str, "")

self.converged = False
if self.repeats > 1:
start_method_is_random = start_method in ["prior", "cov", "chain"]
have_previous_peak = self.distribution_hints.has_peak()
have_previous_cov = self.distribution_hints.has_cov()

def execute(self):
import scipy.optimize

def likefn(p_in):
#Check the normalization
if (not np.all(p_in>=0)) or (not np.all(p_in<=1)):
return np.inf
p = self.pipeline.denormalize_vector(p_in)
r = self.pipeline.run_results(p)
if have_previous_peak and not have_previous_cov:
raise ValueError("You have set repeats>1 in maxlike, and have chained multiple samplers together. "

Check warning on line 56 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L56

Added line #L56 was not covered by tests
"But the previous sampler(s) did not save a covariance matrix. "
"So we have no way to generate multiple starting points. "
)
elif not start_method_is_random:
raise ValueError(f"You set the repeats parameter in the max-like sampler to {self.repeats}"
" but to you this you must also set a start method to choose a new starting point"
" each repeat. You can set start_method to 'prior' to sample from the prior,"
" or set it to 'cov' or 'chain' and also set start_input to a file to load one"
" either a covariance matrix or a chain of samples."
)
if self.is_master():
if self.max_posterior:
return -r.post
logs.overview("------------------------------------------------")
logs.overview("NOTE: Running optimizer in **max-posterior** mode:")
logs.overview("NOTE: Will maximize the combined likelihood and prior")
logs.overview("------------------------------------------------")
else:
return -r.like
return -like
logs.overview("--------------------------------------------------")
logs.overview("NOTE: Running optimizer in **max-like** mode:")
logs.overview("NOTE: not including the prior, just the likelihood.")
logs.overview("NOTE: Set the parameter max_posterior=T to change this.")
logs.overview("NOTE: This won't matter unless you set some non-flat")
logs.overview("NOTE: priors in a separate priors file.")
logs.overview("--------------------------------------------------")

# starting position in the normalized space. This will be taken from
# a previous sampler if available, or the values file if not.
start_vector = self.pipeline.normalize_vector(self.start_estimate())
bounds = [(0.0, 1.0) for p in self.pipeline.varied_params]
self.best_fit_results = []

def save_final_outputs(self, best_fit_results, final=False):

# This can happen if the user ctrl'c's the run before any results are saved
if not best_fit_results:
return

Check warning on line 88 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L88

Added line #L88 was not covered by tests

# check that the starting position is a valid point
start_like = likefn(start_vector)
if not np.isfinite(start_like):
raise RuntimeError('invalid starting point for maxlike')
# Sort the repeated best-fit estimates by increasing posterior or likelihood
if self.max_posterior:
best_fit_results.sort(key=lambda x: x.post)
else:
best_fit_results.sort(key=lambda x: x.like)

# Save all the results to the main chain output file.
# We will overwrite previous sets of results in the file here
self.output.reset_to_chain_start()
for results in best_fit_results:
self.output.parameters(results.vector, results.extra, results.prior, results.like, results.post)

# Get the overall best-fit results
results = best_fit_results[-1]

# Also if requested, approximate the covariance matrix with the
# inverse of the Hessian matrix.
# For a gaussian posterior this is exact.
if results.covmat is None:
if self.output_cov:
warnings.warn("Sorry - the optimization method you chose does not return a covariance (or Hessian) matrix")

Check warning on line 110 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L110

Added line #L110 was not covered by tests
else:
if self.output_cov:
np.savetxt(self.output_cov, results.covmat)

Check warning on line 113 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L113

Added line #L113 was not covered by tests

# We only want to update the distribution hints at the very end
if final:
# These values are used by subsequent samplers, if you chain
# some of them together
self.distribution_hints.set_peak(results.vector, results.post)
if results.covmat is not None:
self.distribution_hints.set_cov(results.covmat)


def run_optimizer(self, inputs):
rank = self.pool.rank if self.pool is not None else 0
start_vector, repeat_index = inputs
start_vector_denorm = self.pipeline.denormalize_vector(start_vector)
logs.overview(f"[Rank {rank}] Starting from point: {start_vector_denorm}")
bounds = [(0.0, 1.0) for p in self.pipeline.varied_params]

if self.method.lower() == "bobyqa":
# use the specific pybobyqa minimizer
Expand All @@ -69,58 +142,123 @@
"rhobeg": 0.1,
"rhoend": self.tolerance,
}
result = pybobyqa.solve(likefn, start_vector, **kw)
opt_norm = result.x
optimizer_result = pybobyqa.solve(likefn, start_vector, **kw)
opt_norm = optimizer_result.x
else:
# Use scipy mainimizer instead
result = scipy.optimize.minimize(likefn, start_vector, method=self.method,
optimizer_result = scipy.optimize.minimize(likefn, start_vector, method=self.method,
jac=False, tol=self.tolerance,
options={'maxiter':self.maxiter, 'disp':True})

opt_norm = result.x
opt_norm = optimizer_result.x

opt = self.pipeline.denormalize_vector(opt_norm)

#Some output - first log the parameters to the screen.
# Generate a results object containing the best-fit parameters,
# their likelihood, posterior, prior, and derived parameters
results = self.pipeline.run_results(opt)

# Log some info to the screen
par_text = ' '.join(str(x) for x in opt)
logs.overview(f"\n\n[Rank {rank}] Optimization run {repeat_index+1} of {self.repeats}")
if self.max_posterior:
logs.overview("Best fit (by posterior):\n%s"%' '.join(str(x) for x in opt))
logs.overview(f"[Rank {rank}] Best fit (by posterior): {par_text}\n%s")
else:
logs.overview("Best fit (by likelihood):\n%s"%' '.join(str(x) for x in opt))
logs.overview("Posterior: {}\n".format(results.post))
logs.overview("Likelihood: {}\n".format(results.like))
logs.overview(f"[Rank {rank}] Best fit (by likelihood):\n{par_text}")
logs.overview(f"[Rank {rank}] Posterior: {results.post}")
logs.overview(f"[Rank {rank}] Likelihood: {results.like}\n")

#Next save them to the proper table file
self.output.parameters(opt, results.extra, results.prior, results.like, results.post)
# Also attach any covariance matrix saved by the sampler to the results
# object, in an ad-hoc way
if hasattr(optimizer_result, 'hess_inv'):
if self.method == "L-BFGS-B":
results.covmat = self.pipeline.denormalize_matrix(optimizer_result.hess_inv.todense())
else:
results.covmat = self.pipeline.denormalize_matrix(optimizer_result.hess_inv)

Check warning on line 177 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L177

Added line #L177 was not covered by tests
elif hasattr(optimizer_result, 'hess'):
results.covmat = self.pipeline.denormalize_matrix(np.linalg.inv(optimizer_result.hess))

Check warning on line 179 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L179

Added line #L179 was not covered by tests
else:
results.covmat = None

#If requested, create a new ini file for the
#best fit.
if self.output_ini:
self.pipeline.create_ini(opt, self.output_ini)
return results

def choose_start(self, n):
if n == 1:
# we just want a single starting point.
# So we can use the basic one from the values file or
# previous sampler; we don't have to worry about making
# it randomized.

self.distribution_hints.set_peak(opt, results.post)
# It's also possible that this is just the last of our repeats
# to be done, but in that case it's also okay to just use the
# basic starting point, since we won't already have used it
start_vector_denorm = self.start_estimate()
start_vector = self.pipeline.normalize_vector(start_vector_denorm)

#Also if requested, approximate the covariance matrix with the
#inverse of the Hessian matrix.
#For a gaussian likelihood this is exact.
covmat = None
if hasattr(result, 'hess_inv'):
if self.method == "L-BFGS-B":
covmat = self.pipeline.denormalize_matrix(result.hess_inv.todense())
else:
covmat = self.pipeline.denormalize_matrix(result.hess_inv)
elif hasattr(result, 'hess'):
covmat = self.pipeline.denormalize_matrix(np.linalg.inv(result.hess_inv))
# If that is invalid we will raise an error
if not np.isfinite(likefn(start_vector)):
raise RuntimeError("Invalid starting point for maxlike")

Check warning on line 200 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L200

Added line #L200 was not covered by tests

start_vectors = np.array([start_vector]) # 1 x n array

if covmat is None:
if self.output_cov:
logs.error("Sorry - the optimization method you chose does not return a covariance (or Hessian) matrix")
else:
if self.output_cov:
np.savetxt(self.output_cov, covmat)
self.distribution_hints.set_cov(covmat)
start_vectors = np.zeros((n, self.pipeline.nvaried))
for i in range(n):

Check warning on line 206 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L205-L206

Added lines #L205 - L206 were not covered by tests
# If we are taking a random starting point then there is a chance it will randomly
# be invalid. So we should try a 20 times to get a valid one.
for _ in range(20):
start_vector_denorm = self.start_estimate(prefer_random=True)
start_vector = self.pipeline.normalize_vector(start_vector_denorm)
if np.isfinite(likefn(start_vector)):
break

Check warning on line 213 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L209-L213

Added lines #L209 - L213 were not covered by tests
else:
raise RuntimeError("Could not find a valid random starting point for maxlike in 20 tries")
start_vectors[i] = start_vector

Check warning on line 216 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L215-L216

Added lines #L215 - L216 were not covered by tests

return start_vectors

def execute(self):
# Figure out how many steps we need to do
ndone = len(self.best_fit_results)
if ndone + self.nsteps > self.repeats:
n = self.repeats - ndone

Check warning on line 225 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L225

Added line #L225 was not covered by tests
else:
n = self.nsteps


# Choose a starting point. If we are repeating our optimization we will need
# multiple starting points. Otherwise we use a fixed one
starting_points = self.choose_start(n)

if self.pool is None:
# serial mode. Just do everything on this process.
# this allows us to also do a nice thing where if we
# get part way through the results and then the user
# ctrl-c's then we can still output partial results
collected_results = []
for i, start_vector in enumerate(starting_points):
try:
results = self.run_optimizer((start_vector, ndone + i))
collected_results.append(results)
except KeyboardInterrupt:

Check warning on line 244 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L244

Added line #L244 was not covered by tests
# If we get a ctrl-c we should save the current best fit
# and then exit.
# Otherwise report what we are doing. It should be basically instant
# so it shouldn't annoy anyone
logs.overview("Keyboard interrupt - saving current best fit, if any finished")
self.save_final_outputs(collected_results, final=True)
raise

Check warning on line 251 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L249-L251

Added lines #L249 - L251 were not covered by tests
else:
inputs = [(start_vector, ndone + i) for i, start_vector in enumerate(starting_points)]
collected_results = self.pool.map(run_optimizer, inputs)

Check warning on line 254 in cosmosis/samplers/maxlike/maxlike_sampler.py

View check run for this annotation

Codecov / codecov/patch

cosmosis/samplers/maxlike/maxlike_sampler.py#L253-L254

Added lines #L253 - L254 were not covered by tests

# Add this to our list of best-fits
self.best_fit_results.extend(collected_results)

self.converged = True
# we re-save the final outputs each time, rewinding the file
# to overwrite the previous ones, so we can re-sort each time.
self.save_final_outputs(self.best_fit_results, final=self.is_converged() )

def is_converged(self):
return self.converged
return len(self.best_fit_results) >= self.repeats
8 changes: 7 additions & 1 deletion cosmosis/samplers/maxlike/sampler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ explanation: >

- trust-ncg

- bobyqa (requires pybobyqa)


Each has different (dis)advantages, and which works best will depend on your
particular application. The default in CosmoSIS is Nelder-Mead.
Expand All @@ -66,4 +68,8 @@ params:
maxiter: (integer; default=1000) Maximum number of iterations of the sampler
output_ini: "(string; default='') if present, save the resulting parameters to a new ini file with this name"
output_covmat: "(string; default='') if present and the sampler supports it, save the estimated covariance to this file"
max_posterior: "(bool; default=False) find the max a posteriori point instead of max like (includes the priors as well)"
max_posterior: "(bool; default=False) find the max a posteriori point instead of max like (includes the priors as well)"
repeats: "(integer; default=1) number of times to repeat the minimization from different starting points"
start_method: "(string; default='') method to use to generate starting points. Options are 'prior' and 'chain', 'cov', or blank for default start'"
start_input: "(string; default='') input file to use for starting points if start_method is 'chain' or 'cov'"
nstep: "(integer; default=1) number of steps to output at once"
Loading
Loading