diff --git a/blueice/likelihood.py b/blueice/likelihood.py index 7fe2aa1..b938364 100644 --- a/blueice/likelihood.py +++ b/blueice/likelihood.py @@ -73,6 +73,7 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs): likelihood_config = {} self.config = likelihood_config self.config.setdefault('morpher', 'GridInterpolator') + self.source_wise_interpolation = self.pdf_base_config.get('source_wise_interpolation', False) # Base model: no variations of any settings self.base_model = Model(self.pdf_base_config) @@ -101,6 +102,7 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs): # If there are shape parameters: self.anchor_models = OrderedDict() # dictionary mapping model zs -> actual model + self.anchor_sources = OrderedDict() # dictionary mapping source_name -> zs -> source # Interpolators created by morphers. These map zs to... self.mus_interpolator = None # rates for each source self.ps_interpolator = None # (source, event) p-values (unbinned), or pmf grid (binned) @@ -108,14 +110,68 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs): self.n_model_events_interpolator = lambda x: None self.n_model_events = None + @property + def source_shape_parameters(self): + """Dict of sources with shape parameters. source name -> dict of shape parameters.""" + source_shape_parameters = OrderedDict() + for sn, source, apply_eff, eff_name in zip( + self.source_name_list, + self.base_model.sources, + self.source_apply_efficiency, + self.source_efficiency_names + ): + ignore_parameters = set(source.config['dont_hash_settings']) + # The efficiecny parameter doesn't need to be hashed but it needs to be passed to the morpher + if apply_eff: + ignore_parameters.discard(eff_name) + shape_parameters = OrderedDict({k: v for k, v in self.shape_parameters.items() if k not in ignore_parameters}) + if shape_parameters: + source_shape_parameters[sn] = shape_parameters + return source_shape_parameters + + def _get_shape_indices(self, source_name): + """Return the indices of the shape parameters used by the source.""" + shape_keys = self.source_shape_parameters[source_name].keys() + return [i for i, k in enumerate(self.shape_parameters.keys()) if k in shape_keys] + + def _get_model_anchor(self, anchor, source_name): + """Return the shape anchors of the full model, given the shape anchors of a signle source. + All values of shape parameters not used by this source will be set to None. + """ + shape_indices = self._get_shape_indices(source_name) + model_anchor = [None] * len(self.shape_parameters) + for i, idx in enumerate(shape_indices): + model_anchor[idx] = anchor[i] + return tuple(model_anchor) + def prepare(self, n_cores=1, ipp_client=None): """Prepares a likelihood function with shape parameters for use. This will compute the models for each shape parameter anchor value combination. """ if len(self.shape_parameters): - self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}), - self.shape_parameters) - zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds()) + if self.source_wise_interpolation: + # Create morphers for each source individually + self.source_morphers = OrderedDict() + + def create_source_morpher(shape_parameters): + return MORPHERS[self.config['morpher']]( + self.config.get('morpher_config', {}), + shape_parameters) + + for sn, shape_parameters in self.source_shape_parameters.items(): + self.source_morphers[sn] = create_source_morpher(shape_parameters) + zs_list = set() + for source_name, morpher in self.source_morphers.items(): + anchor_points = morpher.get_anchor_points(bounds=None) + for anchor in anchor_points: + zs = self._get_model_anchor(anchor, source_name) + zs_list.add(zs) + zs_list = list(zs_list) + + else: + self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}), + self.shape_parameters) + zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds()) # Create the configs for each new model configs = [] @@ -123,7 +179,8 @@ def prepare(self, n_cores=1, ipp_client=None): config = deepcopy(self.pdf_base_config) for i, (setting_name, (anchors, _, _)) in enumerate(self.shape_parameters.items()): # Translate from zs to settings using the anchors dict. Maybe not all settings are numerical. - config[setting_name] = anchors[zs[i]] + if zs[i] is not None: + config[setting_name] = anchors[zs[i]] if ipp_client is None and n_cores != 1: # We have to compute in parallel: must have delayed computation on config['delay_pdf_computation'] = True @@ -150,14 +207,47 @@ def prepare(self, n_cores=1, ipp_client=None): # Reload models so computation takes effect models = [Model(c) for c in tqdm(configs, desc="Loading computed models")] - # Add the new models to the anchor_models dict - for zs, model in zip(zs_list, models): - self.anchor_models[tuple(zs)] = model + if self.source_wise_interpolation: + for i, (source_name, morpher) in enumerate(self.source_morphers.items()): + anchors = morpher.get_anchor_points(bounds=None) + self.anchor_sources[source_name] = OrderedDict() + for anchor in anchors: + model_anchor = self._get_model_anchor(anchor, source_name) + model_index = zs_list.index(model_anchor) + self.anchor_sources[source_name][anchor] = models[model_index].sources[i] + mus_interpolators = OrderedDict() + for sn, base_source in zip(self.source_name_list, self.base_model.sources): + if sn in self.source_morphers: + mus_interpolators[sn] = self.source_morphers[sn].make_interpolator( + f=lambda s: s.expected_events, + extra_dims=[1], + anchor_models=self.anchor_sources[sn]) + else: + mus_interpolators[sn] = base_source.expected_events + + def mus_interpolator(*args): + # take zs, convert to values for each source's interpolator call the respective interpolator + mus = [] + for sn in self.source_name_list: + if sn in self.source_shape_parameters: + shape_indices = self._get_shape_indices(sn) + these_args = [args[0][i] for i in shape_indices] + mus.append(mus_interpolators[sn](np.asarray(these_args))[0]) + else: + mus.append(mus_interpolators[sn]) + return np.array(mus) + self.mus_interpolator = mus_interpolator - # Build the interpolator for the rates of each source. - self.mus_interpolator = self.morpher.make_interpolator(f=lambda m: m.expected_events(), - extra_dims=[len(self.source_name_list)], - anchor_models=self.anchor_models) + else: + # Add the new models to the anchor_models dict + for zs, model in zip(zs_list, models): + self.anchor_models[tuple(zs)] = model + + # Build the interpolator for the rates of each source. + self.mus_interpolator = self.morpher.make_interpolator( + f=lambda m: m.expected_events(), + extra_dims=[len(self.source_name_list)], + anchor_models=self.anchor_models) self.is_data_set = False self.is_prepared = True @@ -440,9 +530,33 @@ class UnbinnedLogLikelihood(LogLikelihoodBase): def set_data(self, d): LogLikelihoodBase.set_data(self, d) if len(self.shape_parameters): - self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.score_events(d), - extra_dims=[len(self.source_name_list), len(d)], - anchor_models=self.anchor_models) + if self.source_wise_interpolation: + self.ps_interpolators = OrderedDict() + for sn, base_source in zip(self.source_name_list, self.base_model.sources): + if sn in self.source_morphers: + self.ps_interpolators[sn] = self.source_morphers[sn].make_interpolator( + f=lambda s: s.pdf(*self.base_model.to_analysis_dimensions(d)), + extra_dims=[len(d)], + anchor_models=self.anchor_sources[sn]) + else: + self.ps_interpolators[sn] = base_source.pdf(*self.base_model.to_analysis_dimensions(d)) + + def ps_interpolator(*args): + # take zs, convert to values for each source's interpolator call the respective interpolator + ps = np.zeros((len(self.source_name_list), len(d))) + for i, (sn, ps_interpolator) in enumerate(self.ps_interpolators.items()): + if sn in self.source_shape_parameters: + these_args = np.asarray([args[0][j] for j in self._get_shape_indices(sn)]) + ps[i] = ps_interpolator(these_args) + else: + ps[i] = ps_interpolator + return ps + self.ps_interpolator = ps_interpolator + else: + self.ps_interpolator = self.morpher.make_interpolator( + f=lambda m: m.score_events(d), + extra_dims=[len(self.source_name_list), len(d)], + anchor_models=self.anchor_models) else: self.ps = self.base_model.score_events(d) @@ -472,14 +586,18 @@ def prepare(self, *args): self.ps, self.n_model_events = self.base_model.pmf_grids() if len(self.shape_parameters): - self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[0], - extra_dims=list(self.ps.shape), - anchor_models=self.anchor_models) + if self.source_wise_interpolation: + raise NotImplementedError("Source-wise interpolation not implemented for binned likelihoods") + + self.ps_interpolator = self.morpher.make_interpolator( + f=lambda m: m.pmf_grids()[0], + extra_dims=list(self.ps.shape), + anchor_models=self.anchor_models) if self.model_statistical_uncertainty_handling is not None: self.n_model_events_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[1], - extra_dims=list(self.ps.shape), - anchor_models=self.anchor_models) + extra_dims=list(self.ps.shape), + anchor_models=self.anchor_models) @inherit_docstring_from(LogLikelihoodBase) def set_data(self, d): diff --git a/blueice/model.py b/blueice/model.py index 82fdf4d..9e73a81 100644 --- a/blueice/model.py +++ b/blueice/model.py @@ -106,11 +106,10 @@ def pmf_grids(self): def expected_events(self, s=None): """Return the total number of events expected in the analysis range for the source s. If no source specified, return an array of results for all sources. - # TODO: Why is this not a method of source? """ if s is None: return np.array([self.expected_events(s) for s in self.sources]) - return s.events_per_day * self.config['livetime_days'] * s.fraction_in_range * s.config['rate_multiplier'] + return s.expected_events def show(self, d, ax=None, dims=None, **kwargs): """Plot the events from dataset d in the analysis range diff --git a/blueice/source.py b/blueice/source.py index d31fccb..145a260 100644 --- a/blueice/source.py +++ b/blueice/source.py @@ -183,6 +183,11 @@ def simulate(self, n_events): """ raise NotImplementedError + @property + def expected_events(self): + """Return the default total number of events expected in the analysis range for the source.""" + return self.events_per_day * self.config['livetime_days'] * self.fraction_in_range * self.config['rate_multiplier'] + class HistogramPdfSource(Source): """A source which takes its PDF values from a multihist histogram. diff --git a/tests/test_likelihood.py b/tests/test_likelihood.py index 7377d72..d67d79c 100644 --- a/tests/test_likelihood.py +++ b/tests/test_likelihood.py @@ -92,6 +92,35 @@ def test_shape_uncertainty(): assert lf(strlen_multiplier=2) == -2 + np.log(2 * stats.norm.pdf(0)) + log_prior(2) +def test_source_wise_interpolation(): + data = np.zeros(5, dtype=[('x', float), ('source', int)]) + data['x'] = np.linspace(0, 1, 5) + + config = conf_for_test(events_per_day=1) + + lf = UnbinnedLogLikelihood(config) + lf.add_shape_parameter("mu", anchors={-2:-2, 0:0, 2:2}) + lf.prepare() + lf.set_data(data) + ret_0 = lf(full_output=True) + ret_1 = lf(full_output=True, mu=1) + + config["source_wise_interpolation"] = True + lf_source_wise = UnbinnedLogLikelihood(config) + lf_source_wise.add_shape_parameter("mu", anchors={-2:-2, 0:0, 2:2}) + lf_source_wise.prepare() + lf_source_wise.set_data(data) + ret_source_wise_0 = lf_source_wise(full_output=True) + ret_source_wise_1 = lf_source_wise(full_output=True, mu=1) + + assert ret_0[0] == ret_source_wise_0[0] + assert (ret_0[1] == ret_source_wise_0[1]).all() + assert (ret_0[2] == ret_source_wise_0[2]).all() + assert ret_1[0] == ret_source_wise_1[0] + assert (ret_1[1] == ret_source_wise_1[1]).all() + assert (ret_1[2] == ret_source_wise_1[2]).all() + + def test_multisource_likelihood(): lf = UnbinnedLogLikelihood(conf_for_test(n_sources=2)) diff --git a/tests/test_model.py b/tests/test_model.py index 04d25c3..a59cdf2 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -5,9 +5,11 @@ def test_rates(): m = Model(conf_for_test(n_sources=1)) np.testing.assert_array_equal(m.expected_events(), np.array([1000])) - m.config['livetime_days'] = 2 + for source in m.sources: + source.config['livetime_days'] = 2 np.testing.assert_array_equal(m.expected_events(), np.array([2000])) - m.config['livetime_days'] = 1 + for source in m.sources: + source.config['livetime_days'] = 1 m.sources[0].fraction_in_range = 0.5 np.testing.assert_array_equal(m.expected_events(), np.array([500]))