Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Source-wise interpolation #46

Merged
merged 14 commits into from
Nov 8, 2024
156 changes: 137 additions & 19 deletions blueice/likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs):
likelihood_config = {}
self.config = likelihood_config
self.config.setdefault('morpher', 'GridInterpolator')
self.source_wise_interpolation = self.pdf_base_config.get('source_wise_interpolation', False)

# Base model: no variations of any settings
self.base_model = Model(self.pdf_base_config)
Expand Down Expand Up @@ -101,29 +102,85 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs):

# If there are shape parameters:
self.anchor_models = OrderedDict() # dictionary mapping model zs -> actual model
self.anchor_sources = OrderedDict() # dictionary mapping source_name -> zs -> source
# Interpolators created by morphers. These map zs to...
self.mus_interpolator = None # rates for each source
self.ps_interpolator = None # (source, event) p-values (unbinned), or pmf grid (binned)
# number of events per bin observed in Monte Carlo / calibration data that gave rise to the model.
self.n_model_events_interpolator = lambda x: None
self.n_model_events = None

@property
def source_shape_parameters(self):
"""Dict of sources with shape parameters. source name -> dict of shape parameters."""
source_shape_parameters = OrderedDict()
for sn, source, apply_eff, eff_name in zip(
self.source_name_list,
self.base_model.sources,
self.source_apply_efficiency,
self.source_efficiency_names
):
ignore_parameters = set(source.config['dont_hash_settings'])
# The efficiecny parameter doesn't need to be hashed but it needs to be passed to the morpher
if apply_eff:
ignore_parameters.discard(eff_name)
shape_parameters = OrderedDict({k: v for k, v in self.shape_parameters.items() if k not in ignore_parameters})
if shape_parameters:
source_shape_parameters[sn] = shape_parameters
return source_shape_parameters

def _get_shape_indices(self, source_name):
"""Return the indices of the shape parameters used by the source."""
shape_keys = self.source_shape_parameters[source_name].keys()
return [i for i, k in enumerate(self.shape_parameters.keys()) if k in shape_keys]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for arrays should we return it sorted? Or return a set?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I misunderstand your suggestion but I think the return should by this definition be a sorted list, or not? Also entries should be unique.


def _get_model_anchor(self, anchor, source_name):
"""Return the shape anchors of the full model, given the shape anchors of a signle source.
All values of shape parameters not used by this source will be set to None.
"""
shape_indices = self._get_shape_indices(source_name)
model_anchor = [None] * len(self.shape_parameters)
for i, idx in enumerate(shape_indices):
model_anchor[idx] = anchor[i]
return tuple(model_anchor)

def prepare(self, n_cores=1, ipp_client=None):
"""Prepares a likelihood function with shape parameters for use.
This will compute the models for each shape parameter anchor value combination.
"""
if len(self.shape_parameters):
self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}),
self.shape_parameters)
zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds())
if self.source_wise_interpolation:
# Create morphers for each source individually
self.source_morphers = OrderedDict()

def create_source_morpher(shape_parameters):
return MORPHERS[self.config['morpher']](
self.config.get('morpher_config', {}),
shape_parameters)

for sn, shape_parameters in self.source_shape_parameters.items():
self.source_morphers[sn] = create_source_morpher(shape_parameters)
zs_list = set()
for source_name, morpher in self.source_morphers.items():
anchor_points = morpher.get_anchor_points(bounds=None)
for anchor in anchor_points:
zs = self._get_model_anchor(anchor, source_name)
zs_list.add(zs)
zs_list = list(zs_list)

else:
self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}),
self.shape_parameters)
zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds())

# Create the configs for each new model
configs = []
for zs in zs_list:
config = deepcopy(self.pdf_base_config)
for i, (setting_name, (anchors, _, _)) in enumerate(self.shape_parameters.items()):
# Translate from zs to settings using the anchors dict. Maybe not all settings are numerical.
config[setting_name] = anchors[zs[i]]
if zs[i] is not None:
config[setting_name] = anchors[zs[i]]
if ipp_client is None and n_cores != 1:
# We have to compute in parallel: must have delayed computation on
config['delay_pdf_computation'] = True
Expand All @@ -150,14 +207,47 @@ def prepare(self, n_cores=1, ipp_client=None):
# Reload models so computation takes effect
models = [Model(c) for c in tqdm(configs, desc="Loading computed models")]

# Add the new models to the anchor_models dict
for zs, model in zip(zs_list, models):
self.anchor_models[tuple(zs)] = model
if self.source_wise_interpolation:
for i, (source_name, morpher) in enumerate(self.source_morphers.items()):
anchors = morpher.get_anchor_points(bounds=None)
self.anchor_sources[source_name] = OrderedDict()
for anchor in anchors:
model_anchor = self._get_model_anchor(anchor, source_name)
model_index = zs_list.index(model_anchor)
self.anchor_sources[source_name][anchor] = models[model_index].sources[i]
mus_interpolators = OrderedDict()
for sn, base_source in zip(self.source_name_list, self.base_model.sources):
if sn in self.source_morphers:
mus_interpolators[sn] = self.source_morphers[sn].make_interpolator(
f=lambda s: s.expected_events,
extra_dims=[1],
anchor_models=self.anchor_sources[sn])
else:
mus_interpolators[sn] = base_source.expected_events

def mus_interpolator(*args):
# take zs, convert to values for each source's interpolator call the respective interpolator
mus = []
for sn in self.source_name_list:
if sn in self.source_shape_parameters:
shape_indices = self._get_shape_indices(sn)
these_args = [args[0][i] for i in shape_indices]
mus.append(mus_interpolators[sn](np.asarray(these_args))[0])
else:
mus.append(mus_interpolators[sn])
return np.array(mus)
self.mus_interpolator = mus_interpolator

# Build the interpolator for the rates of each source.
self.mus_interpolator = self.morpher.make_interpolator(f=lambda m: m.expected_events(),
extra_dims=[len(self.source_name_list)],
anchor_models=self.anchor_models)
else:
# Add the new models to the anchor_models dict
for zs, model in zip(zs_list, models):
self.anchor_models[tuple(zs)] = model

# Build the interpolator for the rates of each source.
self.mus_interpolator = self.morpher.make_interpolator(
f=lambda m: m.expected_events(),
extra_dims=[len(self.source_name_list)],
anchor_models=self.anchor_models)

self.is_data_set = False
self.is_prepared = True
Expand Down Expand Up @@ -440,9 +530,33 @@ class UnbinnedLogLikelihood(LogLikelihoodBase):
def set_data(self, d):
LogLikelihoodBase.set_data(self, d)
if len(self.shape_parameters):
self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.score_events(d),
extra_dims=[len(self.source_name_list), len(d)],
anchor_models=self.anchor_models)
if self.source_wise_interpolation:
self.ps_interpolators = OrderedDict()
for sn, base_source in zip(self.source_name_list, self.base_model.sources):
if sn in self.source_morphers:
self.ps_interpolators[sn] = self.source_morphers[sn].make_interpolator(
f=lambda s: s.pdf(*self.base_model.to_analysis_dimensions(d)),
extra_dims=[len(d)],
anchor_models=self.anchor_sources[sn])
else:
self.ps_interpolators[sn] = base_source.pdf(*self.base_model.to_analysis_dimensions(d))

def ps_interpolator(*args):
# take zs, convert to values for each source's interpolator call the respective interpolator
ps = np.zeros((len(self.source_name_list), len(d)))
for i, (sn, ps_interpolator) in enumerate(self.ps_interpolators.items()):
if sn in self.source_shape_parameters:
these_args = np.asarray([args[0][j] for j in self._get_shape_indices(sn)])
ps[i] = ps_interpolator(these_args)
else:
ps[i] = ps_interpolator
return ps
self.ps_interpolator = ps_interpolator
else:
self.ps_interpolator = self.morpher.make_interpolator(
f=lambda m: m.score_events(d),
extra_dims=[len(self.source_name_list), len(d)],
anchor_models=self.anchor_models)
else:
self.ps = self.base_model.score_events(d)

Expand Down Expand Up @@ -472,14 +586,18 @@ def prepare(self, *args):
self.ps, self.n_model_events = self.base_model.pmf_grids()

if len(self.shape_parameters):
self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[0],
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)
if self.source_wise_interpolation:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For readability, might it be better to just have "if self.source_wise_interpolation: raise error" and then have the rest not in an else block but just normal indentation?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah yes -- I basically wanted to use this as a placeholder if someone is eager to implement this at one point but of course you're right that if the if clause raises an error we don't need the else -- removed! 👍

raise NotImplementedError("Source-wise interpolation not implemented for binned likelihoods")

self.ps_interpolator = self.morpher.make_interpolator(
f=lambda m: m.pmf_grids()[0],
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)

if self.model_statistical_uncertainty_handling is not None:
self.n_model_events_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[1],
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)
extra_dims=list(self.ps.shape),
anchor_models=self.anchor_models)

@inherit_docstring_from(LogLikelihoodBase)
def set_data(self, d):
Expand Down
3 changes: 1 addition & 2 deletions blueice/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,10 @@ def pmf_grids(self):
def expected_events(self, s=None):
"""Return the total number of events expected in the analysis range for the source s.
If no source specified, return an array of results for all sources.
# TODO: Why is this not a method of source?
"""
if s is None:
return np.array([self.expected_events(s) for s in self.sources])
return s.events_per_day * self.config['livetime_days'] * s.fraction_in_range * s.config['rate_multiplier']
return s.expected_events
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we loosing lifetime scaling here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Answer: yes, @hammannr had told me but I overlooked it-- what changes is that the expected_events is now per-source and not adapted to change on-the-go (you would do that by changing livetime instead)
I approve of that solution.


def show(self, d, ax=None, dims=None, **kwargs):
"""Plot the events from dataset d in the analysis range
Expand Down
5 changes: 5 additions & 0 deletions blueice/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ def simulate(self, n_events):
"""
raise NotImplementedError

@property
def expected_events(self):
"""Return the default total number of events expected in the analysis range for the source."""
return self.events_per_day * self.config['livetime_days'] * self.fraction_in_range * self.config['rate_multiplier']


class HistogramPdfSource(Source):
"""A source which takes its PDF values from a multihist histogram.
Expand Down
29 changes: 29 additions & 0 deletions tests/test_likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,35 @@ def test_shape_uncertainty():
assert lf(strlen_multiplier=2) == -2 + np.log(2 * stats.norm.pdf(0)) + log_prior(2)


def test_source_wise_interpolation():
data = np.zeros(5, dtype=[('x', float), ('source', int)])
data['x'] = np.linspace(0, 1, 5)

config = conf_for_test(events_per_day=1)

lf = UnbinnedLogLikelihood(config)
lf.add_shape_parameter("mu", anchors={-2:-2, 0:0, 2:2})
lf.prepare()
lf.set_data(data)
ret_0 = lf(full_output=True)
ret_1 = lf(full_output=True, mu=1)

config["source_wise_interpolation"] = True
lf_source_wise = UnbinnedLogLikelihood(config)
lf_source_wise.add_shape_parameter("mu", anchors={-2:-2, 0:0, 2:2})
lf_source_wise.prepare()
lf_source_wise.set_data(data)
ret_source_wise_0 = lf_source_wise(full_output=True)
ret_source_wise_1 = lf_source_wise(full_output=True, mu=1)

assert ret_0[0] == ret_source_wise_0[0]
assert (ret_0[1] == ret_source_wise_0[1]).all()
assert (ret_0[2] == ret_source_wise_0[2]).all()
assert ret_1[0] == ret_source_wise_1[0]
assert (ret_1[1] == ret_source_wise_1[1]).all()
assert (ret_1[2] == ret_source_wise_1[2]).all()


def test_multisource_likelihood():
lf = UnbinnedLogLikelihood(conf_for_test(n_sources=2))

Expand Down
6 changes: 4 additions & 2 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ def test_rates():
m = Model(conf_for_test(n_sources=1))
np.testing.assert_array_equal(m.expected_events(), np.array([1000]))

m.config['livetime_days'] = 2
for source in m.sources:
source.config['livetime_days'] = 2
np.testing.assert_array_equal(m.expected_events(), np.array([2000]))
m.config['livetime_days'] = 1
for source in m.sources:
source.config['livetime_days'] = 1

m.sources[0].fraction_in_range = 0.5
np.testing.assert_array_equal(m.expected_events(), np.array([500]))
Expand Down
Loading