Merge pull request #141 from joezuntz/save-hint-from-chains

Have the rest of the samplers save their best-fit as a hint
joezuntz · Sep 8, 2024 · bbcba42 · bbcba42
2 parents 750e74d + 6310fdc
commit bbcba42
Show file tree

Hide file tree

Showing 23 changed files with 125 additions and 26 deletions.
diff --git a/cosmosis/samplers/apriori/apriori_sampler.py b/cosmosis/samplers/apriori/apriori_sampler.py
@@ -75,6 +75,9 @@ def sample_from_prior():
                 #always save the usual text output
                 self.output.parameters(sample, extra, prior, post)
 
+                # this will test to see if the new point is the best so far
+                self.distribution_hints.set_peak(sample, post)
+
         #We only ever run this once, though that could 
         #change if we decide to split up the runs
         self.converged = True

diff --git a/cosmosis/samplers/dynesty/dynesty_sampler.py b/cosmosis/samplers/dynesty/dynesty_sampler.py
@@ -81,16 +81,18 @@ def execute(self):
         results = sampler.results
         results.summary()
 
+        posts = results['blob'][:, 0] + results['logl']
+        self.distribution_hints.set_peak_from_sample(results['samples'], posts)
+
         for sample, logwt, logl, derived in zip(results['samples'],results['logwt'], results['logl'], results['blob']):
             prior = derived[0]
             post = prior + logl
-            self.output.parameters(sample, logwt, prior, post, derived[1:])
+            self.output.parameters(sample, derived[1:], logwt, prior, post)
 
         self.output.final("efficiency", results['eff'])
         self.output.final("nsample", len(results['samples']))
         self.output.final("log_z", results['logz'][-1])
         self.output.final("log_z_error", results['logzerr'][-1])
-
         self.converged = True
 
 

diff --git a/cosmosis/samplers/emcee/emcee_sampler.py b/cosmosis/samplers/emcee/emcee_sampler.py
@@ -131,6 +131,7 @@ def load_covmat(self, covmat_file):
 
 
     def output_samples(self, pos, prob, extra_info):
+        self.distribution_hints.set_peak_from_sample(pos, prob)
         for params, post, extra in zip(pos,prob,extra_info):
             prior, extra = extra      
             self.output.parameters(params, extra, prior, post)

diff --git a/cosmosis/samplers/grid/grid_sampler.py b/cosmosis/samplers/grid/grid_sampler.py
@@ -117,6 +117,7 @@ def execute(self):
             (prob, prior, extra) = result
             #always save the usual text output
             self.output.parameters(sample, extra, prior, prob)
+            self.distribution_hints.set_peak(sample, prob)
 
     def is_converged(self):
         return self.converged
diff --git a/cosmosis/samplers/gridmax/gridmax_sampler.py b/cosmosis/samplers/gridmax/gridmax_sampler.py
@@ -64,6 +64,7 @@ def execute(self):
         #Log the results for posterity
         for p, (pr, po, e) in zip(points, results):
             self.output.parameters(p, e, pr, po)
+            self.distribution_hints.set_peak(p, po)
 
         #And now update our information.
         #We need to find the two points either side
@@ -104,6 +105,8 @@ def execute(self):
 
         logs.overview(f"New best fit L = {posteriors.max()} at {self.pipeline.varied_params[d].name} = {points[best][d]}")
 
+
+
         #and go on to the next dimension
         self.dimension = (d+1)%self.ndim
 

diff --git a/cosmosis/samplers/hints.py b/cosmosis/samplers/hints.py
@@ -1,18 +1,27 @@
+import numpy as np
 
 class Hints(object):
     def __init__(self):
         self._peak = None
         self._cov = None
+        self._peak_post = None
 
     def has_peak(self):
         return self._peak is not None
-    def set_peak(self, peak):
-        self._peak = peak
+    def set_peak(self, peak, post):
+        if self._peak_post is None or post > self._peak_post:
+            self._peak = peak
+            self._peak_post = post
+
+    def set_peak_from_sample(self, samples, posts):
+        assert len(samples) == len(posts)
+        idx = np.argmax(posts)
+        self.set_peak(samples[idx], posts[idx])
     def get_peak(self):
         return self._peak
     def del_peak(self):
         self._peak = None
-
+        self._peak_post = None
     def has_cov(self):
         return self._cov is not None
     def set_cov(self, cov):
@@ -24,6 +33,6 @@ def del_cov(self):
 
     def update(self, other):
         if other.has_peak():
-            self.set_peak(other.get_peak())
+            self.set_peak(other.get_peak(), other._peak_post)
         if other.has_cov():
             self.set_cov(other.get_cov())
diff --git a/cosmosis/samplers/importance/importance_sampler.py b/cosmosis/samplers/importance/importance_sampler.py
@@ -2,6 +2,7 @@
 import collections
 import numpy as np
 from ...runtime import logs
+from ...output import TextColumnOutput, FitsOutput
 
 from .. import ParallelSampler
 
@@ -10,8 +11,8 @@
 
 
 def task(p):
-    r = importance_pipeline.posterior(p)
-    return r
+    r = importance_pipeline.run_results(p)
+    return r.post, (r.prior, r.extra)
 
 
 class ImportanceSampler(ParallelSampler):
@@ -36,13 +37,20 @@ def config(self):
 
     def load_samples(self, filename):
         options = {"filename":filename}
-        col_names, cols, metadata, comments, final_metadata = self.output.__class__.load_from_options(options)
+        if filename.endswith(".txt"):
+            output_cls = TextColumnOutput
+        elif filename.endswith(".fits"):
+            output_cls = FitsOutput
+        else:
+            raise ValueError(f"Can only postprocess files in cosmosis text format (.txt) or fits format (.fits), not {filename}")
+
+        col_names, cols, metadata, comments, final_metadata = output_cls.load_from_options(options)
         # pull out the "post" column first
         col_names = [name.lower() for name in col_names]
-        likelihood_index = col_names.index('post')
-        if likelihood_index<0:
+        posterior_index = col_names.index('post')
+        if posterior_index<0:
             raise ValueError("I could not find a 'post' column in the chain %s"%filename)
-        self.original_likelihoods = cols[0].T[likelihood_index]
+        self.original_posteriors = cols[0].T[posterior_index]
 
         #We split the parameters into three groups:
         #   - ones that we have listed as varying
@@ -91,6 +99,7 @@ def load_samples(self, filename):
         #P' is the new likelihood.
         self.output.add_column("old_post", float) #This is the old likelihood, log(P)
         self.output.add_column("log_weight", float) #This is the log-weight, the ratio of the likelihoods
+        self.output.add_column("prior", float) #This is the new prior
         self.output.add_column("post", float) #This is the new likelihood, log(P')
 
 
@@ -137,21 +146,23 @@ def execute(self):
             results = list(map(task, samples_chunk))
 
         #Collect together and output the results
-        for i,(sample, (new_like, extra)) in enumerate(zip(samples_chunk, results)):
+        for i,(sample, (new_post, extra)) in enumerate(zip(samples_chunk, results)):
             #We already (may) have some extra values from the pipeline
             #as derived parameters.  Add to those any parameters used in the
             #old pipeline but not the new one
+            new_prior, extra = extra
             extra = list(extra)
             for col in list(self.original_extras.values()):
                 extra.append(col[start+i])
             #and then the old and new likelihoods
-            old_like = self.original_likelihoods[start+i]
+            old_post = self.original_posteriors[start+i]
             if self.add_to_likelihood:
-                new_like += old_like
-            weight = new_like-old_like
-            extra = list(extra) + [old_like,weight,new_like]
+                new_post += old_post
+            weight = new_post - old_post
+            extra = list(extra) + [old_post, weight, new_prior, new_post]
             #and save results
             self.output.parameters(sample, extra)
+            self.distribution_hints.set_peak(sample, new_post)
         #Update the current index
         self.current_index+=self.nstep
 

diff --git a/cosmosis/samplers/kombine/kombine_sampler.py b/cosmosis/samplers/kombine/kombine_sampler.py
@@ -108,6 +108,7 @@ def execute(self):
 
         for (pos, post, prop, extra_info) in outputs:
             self.output_samples(pos, post, extra_info)
+            self.distribution_hints.set_peak(pos, post)
 
         # Set the starting positions for the next chunk of samples
         # to the last ones for this chunk

diff --git a/cosmosis/samplers/list/list_sampler.py b/cosmosis/samplers/list/list_sampler.py
@@ -133,6 +133,7 @@ def execute(self):
                 (prob, (prior,extra)) = result
                 # always save the usual text output
                 self.output.parameters(sample, extra, prior, prob)
+                self.distribution_hints.set_peak(sample, prob)
 
         # We only ever run this once, though that could 
         # change if we decide to split up the runs

diff --git a/cosmosis/samplers/maxlike/maxlike_sampler.py b/cosmosis/samplers/maxlike/maxlike_sampler.py
@@ -81,7 +81,7 @@ def likefn(p_in):
         if self.output_ini:
           self.pipeline.create_ini(opt, self.output_ini)
 
-        self.distribution_hints.set_peak(opt)          
+        self.distribution_hints.set_peak(opt, results.post)
 
         #Also if requested, approximate the covariance matrix with the 
         #inverse of the Hessian matrix.

diff --git a/cosmosis/samplers/metropolis/metropolis_sampler.py b/cosmosis/samplers/metropolis/metropolis_sampler.py
@@ -169,6 +169,7 @@ def execute(self):
         if (self.num_samples_post_tuning > 0) or self.save_during_tuning:
             for i, result in enumerate(samples):
                 self.output.parameters(result.vector, result.extra, result.prior, result.post)
+                self.distribution_hints.set_peak(result.vector, result.post)
 
         if self.num_samples_post_tuning <= 0:
             logs.overview("Tuning ends at {} samples\n".format(self.tuning_end))

diff --git a/cosmosis/samplers/minuit/minuit_sampler.py b/cosmosis/samplers/minuit/minuit_sampler.py
@@ -111,7 +111,7 @@ def execute(self):
             logs.overview("SUCCESS: Minuit has converged!")
             self.save_results(param_vector, param_names, results)
             self.converged = True
-            self.distribution_hints.set_peak(param_vector)
+            self.distribution_hints.set_peak(param_vector, results.post)
 
             if made_cov:
                 cov_matrix = cov_vector.reshape((self.ndim, self.ndim))

diff --git a/cosmosis/samplers/multinest/multinest_sampler.py b/cosmosis/samplers/multinest/multinest_sampler.py
@@ -223,6 +223,7 @@ def output_params(self, n, live, posterior, log_z, ins_log_z, log_z_err):
         self.log_z = ins_log_z if self.importance else log_z
         self.log_z_err = log_z_err
         data = np.array([posterior[i] for i in range(n*(self.npar+2))]).reshape((self.npar+2, n))
+        self.distribution_hints.set_peak_from_sample(data[:self.ndim].T, data[self.npar - 1])
         for row in data.T:
             params = row[:self.ndim]
             extra_vals = row[self.ndim:self.npar-2]

diff --git a/cosmosis/samplers/nautilus/nautilus_sampler.py b/cosmosis/samplers/nautilus/nautilus_sampler.py
@@ -99,7 +99,11 @@ def execute(self):
                     discard_exploration=self.discard_exploration,
                     verbose=self.verbose)
 
-        for sample, logwt, logl, blob in zip(*sampler.posterior(return_blobs=True)):
+        results = sampler.posterior(return_blobs=True)
+        posts = results[2] + results[3][:, 0]
+        self.distribution_hints.set_peak_from_sample(results[0], posts)
+
+        for sample, logwt, logl, blob in zip(*results):
             blob = np.atleast_1d(blob)
             prior = blob[0]
             extra = blob[1:]

diff --git a/cosmosis/samplers/pmaxlike/pmaxlike_sampler.py b/cosmosis/samplers/pmaxlike/pmaxlike_sampler.py
@@ -94,7 +94,7 @@ def execute(self):
           self.pipeline.create_ini(opt, self.output_ini)
 
         # If we are coupling to later samplers they can use the peak we have found.
-        self.distribution_hints.set_peak(opt)
+        self.distribution_hints.set_peak(opt, results.post)
 
         #Also if requested, approximate the covariance matrix with the 
         #inverse of the Hessian matrix.

diff --git a/cosmosis/samplers/pmc/pmc_sampler.py b/cosmosis/samplers/pmc/pmc_sampler.py
@@ -73,6 +73,7 @@ def execute(self):
 
         for (vector, post, extra, component, weight) in zip(*results):
             prior, extra = extra
+            self.distribution_hints.set_peak(vector, post)
             self.output.parameters(vector, extra, (component, prior, post, weight))
 
         logs.overview("Done %d iterations, %d samples" % (self.iterations, self.samples))

diff --git a/cosmosis/samplers/poco/poco_sampler.py b/cosmosis/samplers/poco/poco_sampler.py
@@ -143,6 +143,7 @@ def execute(self):
             extra = np.atleast_1d(blob)
             prior = logp
             logpost = logl + prior
+            self.distribution_hints.set_peak(sample, logpost)
             self.output.parameters(sample, extra, logw, prior, logpost)
 
         self.output.final("log_z", logz)

diff --git a/cosmosis/samplers/polychord/polychord_sampler.py b/cosmosis/samplers/polychord/polychord_sampler.py
@@ -319,6 +319,11 @@ def output_params(self, ndead, nlive, npars, live, dead, logweights, log_z, log_
             importance = np.exp(w)
             post = like + prior
             self.output.parameters(params, extra_vals, prior, like, post, importance)
+
+        # priors + likes
+        posts = data[:, self.ndim+self.nderived-1] + data[:, self.ndim+self.nderived+1]
+        self.distribution_hints.set_peak_from_sample(data[:, :self.ndim], posts)
+
         self.output.final("nsample", ndead)
         self.output.flush()
 

diff --git a/cosmosis/samplers/pymc/pymc_sampler.py b/cosmosis/samplers/pymc/pymc_sampler.py
@@ -93,6 +93,8 @@ def sample(self):
                            for param in self.pipeline.varied_params]).T
         likes = -0.5 * self.mcmc.trace('deviance')[:]
 
+        self.distribution_hints.set_peak_from_sample(traces, likes)
+
         for trace, like in zip(traces, likes):
             self.output.parameters(trace, like)
 

diff --git a/cosmosis/samplers/snake/snake_sampler.py b/cosmosis/samplers/snake/snake_sampler.py
@@ -35,11 +35,11 @@ def execute(self):
             try:
                 x = self.pipeline.denormalize_vector(x)
                 self.output.parameters(x, extra, prior, post)
+                self.distribution_hints.set_peak(x, post)
             except ValueError:
                 logs.noisy("The snake is trying to escape its bounds!")
 
 
-
     def is_converged(self):
         if self.snake.converged():
             logs.overview("Snake has converged!")

diff --git a/cosmosis/samplers/star/star_sampler.py b/cosmosis/samplers/star/star_sampler.py
@@ -118,6 +118,7 @@ def execute(self):
             (post, prior, extra) = result
             #always save the usual text output
             self.output.parameters(sample, extra, prior, post)
+            self.distribution_hints.set_peak(sample, post)
 
     def is_converged(self):
         return self.converged
diff --git a/cosmosis/samplers/zeus/zeus_sampler.py b/cosmosis/samplers/zeus/zeus_sampler.py
@@ -243,6 +243,7 @@ def execute(self):
             for j in range(self.nwalkers):
                 prior, extra = blobs[i, j]
                 self.output.parameters(chain[i, j], extra, prior, post[i, j])
+                self.distribution_hints.set_peak(chain[i, j], post[i, j])
 
         #Set the starting positions for the next chunk of samples
         #to the last ones for this chunk