Skip to content

Commit

Permalink
Merge pull request #46 from JelleAalbers/source_wise_interpolation
Browse files Browse the repository at this point in the history
Source-wise interpolation
  • Loading branch information
hammannr authored Nov 8, 2024
2 parents f12c2ad + 17b9abd commit 0206fd9
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 23 deletions.
156 changes: 137 additions & 19 deletions blueice/likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs):
likelihood_config = {}
self.config = likelihood_config
self.config.setdefault('morpher', 'GridInterpolator')
self.source_wise_interpolation = self.pdf_base_config.get('source_wise_interpolation', False)

# Base model: no variations of any settings
self.base_model = Model(self.pdf_base_config)
Expand Down Expand Up @@ -101,29 +102,85 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs):

# If there are shape parameters:
self.anchor_models = OrderedDict() # dictionary mapping model zs -> actual model
self.anchor_sources = OrderedDict() # dictionary mapping source_name -> zs -> source
# Interpolators created by morphers. These map zs to...
self.mus_interpolator = None # rates for each source
self.ps_interpolator = None # (source, event) p-values (unbinned), or pmf grid (binned)
# number of events per bin observed in Monte Carlo / calibration data that gave rise to the model.
self.n_model_events_interpolator = lambda x: None
self.n_model_events = None

@property
def source_shape_parameters(self):
"""Dict of sources with shape parameters. source name -> dict of shape parameters."""
source_shape_parameters = OrderedDict()
for sn, source, apply_eff, eff_name in zip(
self.source_name_list,
self.base_model.sources,
self.source_apply_efficiency,
self.source_efficiency_names
):
ignore_parameters = set(source.config['dont_hash_settings'])
# The efficiecny parameter doesn't need to be hashed but it needs to be passed to the morpher
if apply_eff:
ignore_parameters.discard(eff_name)
shape_parameters = OrderedDict({k: v for k, v in self.shape_parameters.items() if k not in ignore_parameters})
if shape_parameters:
source_shape_parameters[sn] = shape_parameters
return source_shape_parameters

def _get_shape_indices(self, source_name):
"""Return the indices of the shape parameters used by the source."""
shape_keys = self.source_shape_parameters[source_name].keys()
return [i for i, k in enumerate(self.shape_parameters.keys()) if k in shape_keys]

def _get_model_anchor(self, anchor, source_name):
"""Return the shape anchors of the full model, given the shape anchors of a signle source.
All values of shape parameters not used by this source will be set to None.
"""
shape_indices = self._get_shape_indices(source_name)
model_anchor = [None] * len(self.shape_parameters)
for i, idx in enumerate(shape_indices):
model_anchor[idx] = anchor[i]
return tuple(model_anchor)

def prepare(self, n_cores=1, ipp_client=None):
"""Prepares a likelihood function with shape parameters for use.
This will compute the models for each shape parameter anchor value combination.
"""
if len(self.shape_parameters):
self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}),
self.shape_parameters)
zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds())
if self.source_wise_interpolation:
# Create morphers for each source individually
self.source_morphers = OrderedDict()

def create_source_morpher(shape_parameters):
return MORPHERS[self.config['morpher']](
self.config.get('morpher_config', {}),
shape_parameters)

for sn, shape_parameters in self.source_shape_parameters.items():
self.source_morphers[sn] = create_source_morpher(shape_parameters)
zs_list = set()
for source_name, morpher in self.source_morphers.items():
anchor_points = morpher.get_anchor_points(bounds=None)
for anchor in anchor_points:
zs = self._get_model_anchor(anchor, source_name)
zs_list.add(zs)
zs_list = list(zs_list)

else:
self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}),
self.shape_parameters)
zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds())

# Create the configs for each new model
configs = []
for zs in zs_list:
config = deepcopy(self.pdf_base_config)
for i, (setting_name, (anchors, _, _)) in enumerate(self.shape_parameters.items()):
# Translate from zs to settings using the anchors dict. Maybe not all settings are numerical.
config[setting_name] = anchors[zs[i]]
if zs[i] is not None:
config[setting_name] = anchors[zs[i]]
if ipp_client is None and n_cores != 1:
# We have to compute in parallel: must have delayed computation on
config['delay_pdf_computation'] = True
Expand All @@ -150,14 +207,47 @@ def prepare(self, n_cores=1, ipp_client=None):
# Reload models so computation takes effect
models = [Model(c) for c in tqdm(configs, desc="Loading computed models")]

# Add the new models to the anchor_models dict
for zs, model in zip(zs_list, models):
self.anchor_models[tuple(zs)] = model
if self.source_wise_interpolation:
for i, (source_name, morpher) in enumerate(self.source_morphers.items()):
anchors = morpher.get_anchor_points(bounds=None)
self.anchor_sources[source_name] = OrderedDict()
for anchor in anchors:
model_anchor = self._get_model_anchor(anchor, source_name)
model_index = zs_list.index(model_anchor)
self.anchor_sources[source_name][anchor] = models[model_index].sources[i]
mus_interpolators = OrderedDict()
for sn, base_source in zip(self.source_name_list, self.base_model.sources):
if sn in self.source_morphers:
mus_interpolators[sn] = self.source_morphers[sn].make_interpolator(
f=lambda s: s.expected_events,
extra_dims=[1],
anchor_models=self.anchor_sources[sn])
else:
mus_interpolators[sn] = base_source.expected_events

def mus_interpolator(*args):
# take zs, convert to values for each source's interpolator call the respective interpolator
mus = []
for sn in self.source_name_list:
if sn in self.source_shape_parameters:
shape_indices = self._get_shape_indices(sn)
these_args = [args[0][i] for i in shape_indices]
mus.append(mus_interpolators[sn](np.asarray(these_args))[0])
else:
mus.append(mus_interpolators[sn])
return np.array(mus)
self.mus_interpolator = mus_interpolator

# Build the interpolator for the rates of each source.
self.mus_interpolator = self.morpher.make_interpolator(f=lambda m: m.expected_events(),
extra_dims=[len(self.source_name_list)],
anchor_models=self.anchor_models)
else:
# Add the new models to the anchor_models dict
for zs, model in zip(zs_list, models):
self.anchor_models[tuple(zs)] = model

# Build the interpolator for the rates of each source.
self.mus_interpolator = self.morpher.make_interpolator(
f=lambda m: m.expected_events(),
extra_dims=[len(self.source_name_list)],
anchor_models=self.anchor_models)

self.is_data_set = False
self.is_prepared = True
Expand Down Expand Up @@ -440,9 +530,33 @@ class UnbinnedLogLikelihood(LogLikelihoodBase):
def set_data(self, d):
LogLikelihoodBase.set_data(self, d)
if len(self.shape_parameters):
self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.score_events(d),
extra_dims=[len(self.source_name_list), len(d)],
anchor_models=self.anchor_models)
if self.source_wise_interpolation:
self.ps_interpolators = OrderedDict()
for sn, base_source in zip(self.source_name_list, self.base_model.sources):
if sn in self.source_morphers:
self.ps_interpolators[sn] = self.source_morphers[sn].make_interpolator(
f=lambda s: s.pdf(*self.base_model.to_analysis_dimensions(d)),
extra_dims=[len(d)],
anchor_models=self.anchor_sources[sn])
else:
self.ps_interpolators[sn] = base_source.pdf(*self.base_model.to_analysis_dimensions(d))

def ps_interpolator(*args):
# take zs, convert to values for each source's interpolator call the respective interpolator
ps = np.zeros((len(self.source_name_list), len(d)))
for i, (sn, ps_interpolator) in enumerate(self.ps_interpolators.items()):
if sn in self.source_shape_parameters:
these_args = np.asarray([args[0][j] for j in self._get_shape_indices(sn)])
ps[i] = ps_interpolator(these_args)
else:
ps[i] = ps_interpolator
return ps
self.ps_interpolator = ps_interpolator
else:
self.ps_interpolator = self.morpher.make_interpolator(
f=lambda m: m.score_events(d),
extra_dims=[len(self.source_name_list), len(d)],
anchor_models=self.anchor_models)
else:
self.ps = self.base_model.score_events(d)

Expand Down Expand Up @@ -472,14 +586,18 @@ def prepare(self, *args):
self.ps, self.n_model_events = self.base_model.pmf_grids()

if len(self.shape_parameters):
self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[0],
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)
if self.source_wise_interpolation:
raise NotImplementedError("Source-wise interpolation not implemented for binned likelihoods")

self.ps_interpolator = self.morpher.make_interpolator(
f=lambda m: m.pmf_grids()[0],
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)

if self.model_statistical_uncertainty_handling is not None:
self.n_model_events_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[1],
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)

@inherit_docstring_from(LogLikelihoodBase)
def set_data(self, d):
Expand Down
3 changes: 1 addition & 2 deletions blueice/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,10 @@ def pmf_grids(self):
def expected_events(self, s=None):
"""Return the total number of events expected in the analysis range for the source s.
If no source specified, return an array of results for all sources.
# TODO: Why is this not a method of source?
"""
if s is None:
return np.array([self.expected_events(s) for s in self.sources])
return s.events_per_day * self.config['livetime_days'] * s.fraction_in_range * s.config['rate_multiplier']
return s.expected_events

def show(self, d, ax=None, dims=None, **kwargs):
"""Plot the events from dataset d in the analysis range
Expand Down
5 changes: 5 additions & 0 deletions blueice/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ def simulate(self, n_events):
"""
raise NotImplementedError

@property
def expected_events(self):
"""Return the default total number of events expected in the analysis range for the source."""
return self.events_per_day * self.config['livetime_days'] * self.fraction_in_range * self.config['rate_multiplier']


class HistogramPdfSource(Source):
"""A source which takes its PDF values from a multihist histogram.
Expand Down
29 changes: 29 additions & 0 deletions tests/test_likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,35 @@ def test_shape_uncertainty():
assert lf(strlen_multiplier=2) == -2 + np.log(2 * stats.norm.pdf(0)) + log_prior(2)


def test_source_wise_interpolation():
data = np.zeros(5, dtype=[('x', float), ('source', int)])
data['x'] = np.linspace(0, 1, 5)

config = conf_for_test(events_per_day=1)

lf = UnbinnedLogLikelihood(config)
lf.add_shape_parameter("mu", anchors={-2:-2, 0:0, 2:2})
lf.prepare()
lf.set_data(data)
ret_0 = lf(full_output=True)
ret_1 = lf(full_output=True, mu=1)

config["source_wise_interpolation"] = True
lf_source_wise = UnbinnedLogLikelihood(config)
lf_source_wise.add_shape_parameter("mu", anchors={-2:-2, 0:0, 2:2})
lf_source_wise.prepare()
lf_source_wise.set_data(data)
ret_source_wise_0 = lf_source_wise(full_output=True)
ret_source_wise_1 = lf_source_wise(full_output=True, mu=1)

assert ret_0[0] == ret_source_wise_0[0]
assert (ret_0[1] == ret_source_wise_0[1]).all()
assert (ret_0[2] == ret_source_wise_0[2]).all()
assert ret_1[0] == ret_source_wise_1[0]
assert (ret_1[1] == ret_source_wise_1[1]).all()
assert (ret_1[2] == ret_source_wise_1[2]).all()


def test_multisource_likelihood():
lf = UnbinnedLogLikelihood(conf_for_test(n_sources=2))

Expand Down
6 changes: 4 additions & 2 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ def test_rates():
m = Model(conf_for_test(n_sources=1))
np.testing.assert_array_equal(m.expected_events(), np.array([1000]))

m.config['livetime_days'] = 2
for source in m.sources:
source.config['livetime_days'] = 2
np.testing.assert_array_equal(m.expected_events(), np.array([2000]))
m.config['livetime_days'] = 1
for source in m.sources:
source.config['livetime_days'] = 1

m.sources[0].fraction_in_range = 0.5
np.testing.assert_array_equal(m.expected_events(), np.array([500]))
Expand Down

0 comments on commit 0206fd9

Please sign in to comment.