-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Source-wise interpolation #46
Changes from all commits
1324a30
d92246a
0fdb5b2
5baa97a
324b3e0
305fedc
cb20f4a
98a9a2b
7a5d797
ff49044
0e99fa2
47e3ba1
9b6cad4
17b9abd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,6 +73,7 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs): | |
likelihood_config = {} | ||
self.config = likelihood_config | ||
self.config.setdefault('morpher', 'GridInterpolator') | ||
self.source_wise_interpolation = self.pdf_base_config.get('source_wise_interpolation', False) | ||
|
||
# Base model: no variations of any settings | ||
self.base_model = Model(self.pdf_base_config) | ||
|
@@ -101,29 +102,85 @@ def __init__(self, pdf_base_config, likelihood_config=None, **kwargs): | |
|
||
# If there are shape parameters: | ||
self.anchor_models = OrderedDict() # dictionary mapping model zs -> actual model | ||
self.anchor_sources = OrderedDict() # dictionary mapping source_name -> zs -> source | ||
# Interpolators created by morphers. These map zs to... | ||
self.mus_interpolator = None # rates for each source | ||
self.ps_interpolator = None # (source, event) p-values (unbinned), or pmf grid (binned) | ||
# number of events per bin observed in Monte Carlo / calibration data that gave rise to the model. | ||
self.n_model_events_interpolator = lambda x: None | ||
self.n_model_events = None | ||
|
||
@property | ||
def source_shape_parameters(self): | ||
"""Dict of sources with shape parameters. source name -> dict of shape parameters.""" | ||
source_shape_parameters = OrderedDict() | ||
for sn, source, apply_eff, eff_name in zip( | ||
self.source_name_list, | ||
self.base_model.sources, | ||
self.source_apply_efficiency, | ||
self.source_efficiency_names | ||
): | ||
ignore_parameters = set(source.config['dont_hash_settings']) | ||
# The efficiecny parameter doesn't need to be hashed but it needs to be passed to the morpher | ||
if apply_eff: | ||
ignore_parameters.discard(eff_name) | ||
shape_parameters = OrderedDict({k: v for k, v in self.shape_parameters.items() if k not in ignore_parameters}) | ||
if shape_parameters: | ||
source_shape_parameters[sn] = shape_parameters | ||
return source_shape_parameters | ||
|
||
def _get_shape_indices(self, source_name): | ||
"""Return the indices of the shape parameters used by the source.""" | ||
shape_keys = self.source_shape_parameters[source_name].keys() | ||
return [i for i, k in enumerate(self.shape_parameters.keys()) if k in shape_keys] | ||
|
||
def _get_model_anchor(self, anchor, source_name): | ||
"""Return the shape anchors of the full model, given the shape anchors of a signle source. | ||
All values of shape parameters not used by this source will be set to None. | ||
""" | ||
shape_indices = self._get_shape_indices(source_name) | ||
model_anchor = [None] * len(self.shape_parameters) | ||
for i, idx in enumerate(shape_indices): | ||
model_anchor[idx] = anchor[i] | ||
return tuple(model_anchor) | ||
|
||
def prepare(self, n_cores=1, ipp_client=None): | ||
"""Prepares a likelihood function with shape parameters for use. | ||
This will compute the models for each shape parameter anchor value combination. | ||
""" | ||
if len(self.shape_parameters): | ||
self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}), | ||
self.shape_parameters) | ||
zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds()) | ||
if self.source_wise_interpolation: | ||
# Create morphers for each source individually | ||
self.source_morphers = OrderedDict() | ||
|
||
def create_source_morpher(shape_parameters): | ||
return MORPHERS[self.config['morpher']]( | ||
self.config.get('morpher_config', {}), | ||
shape_parameters) | ||
|
||
for sn, shape_parameters in self.source_shape_parameters.items(): | ||
self.source_morphers[sn] = create_source_morpher(shape_parameters) | ||
zs_list = set() | ||
for source_name, morpher in self.source_morphers.items(): | ||
anchor_points = morpher.get_anchor_points(bounds=None) | ||
for anchor in anchor_points: | ||
zs = self._get_model_anchor(anchor, source_name) | ||
zs_list.add(zs) | ||
zs_list = list(zs_list) | ||
|
||
else: | ||
self.morpher = MORPHERS[self.config['morpher']](self.config.get('morpher_config', {}), | ||
self.shape_parameters) | ||
zs_list = self.morpher.get_anchor_points(bounds=self.get_bounds()) | ||
|
||
# Create the configs for each new model | ||
configs = [] | ||
for zs in zs_list: | ||
config = deepcopy(self.pdf_base_config) | ||
for i, (setting_name, (anchors, _, _)) in enumerate(self.shape_parameters.items()): | ||
# Translate from zs to settings using the anchors dict. Maybe not all settings are numerical. | ||
config[setting_name] = anchors[zs[i]] | ||
if zs[i] is not None: | ||
config[setting_name] = anchors[zs[i]] | ||
if ipp_client is None and n_cores != 1: | ||
# We have to compute in parallel: must have delayed computation on | ||
config['delay_pdf_computation'] = True | ||
|
@@ -150,14 +207,47 @@ def prepare(self, n_cores=1, ipp_client=None): | |
# Reload models so computation takes effect | ||
models = [Model(c) for c in tqdm(configs, desc="Loading computed models")] | ||
|
||
# Add the new models to the anchor_models dict | ||
for zs, model in zip(zs_list, models): | ||
self.anchor_models[tuple(zs)] = model | ||
if self.source_wise_interpolation: | ||
for i, (source_name, morpher) in enumerate(self.source_morphers.items()): | ||
anchors = morpher.get_anchor_points(bounds=None) | ||
self.anchor_sources[source_name] = OrderedDict() | ||
for anchor in anchors: | ||
model_anchor = self._get_model_anchor(anchor, source_name) | ||
model_index = zs_list.index(model_anchor) | ||
self.anchor_sources[source_name][anchor] = models[model_index].sources[i] | ||
mus_interpolators = OrderedDict() | ||
for sn, base_source in zip(self.source_name_list, self.base_model.sources): | ||
if sn in self.source_morphers: | ||
mus_interpolators[sn] = self.source_morphers[sn].make_interpolator( | ||
f=lambda s: s.expected_events, | ||
extra_dims=[1], | ||
anchor_models=self.anchor_sources[sn]) | ||
else: | ||
mus_interpolators[sn] = base_source.expected_events | ||
|
||
def mus_interpolator(*args): | ||
# take zs, convert to values for each source's interpolator call the respective interpolator | ||
mus = [] | ||
for sn in self.source_name_list: | ||
if sn in self.source_shape_parameters: | ||
shape_indices = self._get_shape_indices(sn) | ||
these_args = [args[0][i] for i in shape_indices] | ||
mus.append(mus_interpolators[sn](np.asarray(these_args))[0]) | ||
else: | ||
mus.append(mus_interpolators[sn]) | ||
return np.array(mus) | ||
self.mus_interpolator = mus_interpolator | ||
|
||
# Build the interpolator for the rates of each source. | ||
self.mus_interpolator = self.morpher.make_interpolator(f=lambda m: m.expected_events(), | ||
extra_dims=[len(self.source_name_list)], | ||
anchor_models=self.anchor_models) | ||
else: | ||
# Add the new models to the anchor_models dict | ||
for zs, model in zip(zs_list, models): | ||
self.anchor_models[tuple(zs)] = model | ||
|
||
# Build the interpolator for the rates of each source. | ||
self.mus_interpolator = self.morpher.make_interpolator( | ||
f=lambda m: m.expected_events(), | ||
extra_dims=[len(self.source_name_list)], | ||
anchor_models=self.anchor_models) | ||
|
||
self.is_data_set = False | ||
self.is_prepared = True | ||
|
@@ -440,9 +530,33 @@ class UnbinnedLogLikelihood(LogLikelihoodBase): | |
def set_data(self, d): | ||
LogLikelihoodBase.set_data(self, d) | ||
if len(self.shape_parameters): | ||
self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.score_events(d), | ||
extra_dims=[len(self.source_name_list), len(d)], | ||
anchor_models=self.anchor_models) | ||
if self.source_wise_interpolation: | ||
self.ps_interpolators = OrderedDict() | ||
for sn, base_source in zip(self.source_name_list, self.base_model.sources): | ||
if sn in self.source_morphers: | ||
self.ps_interpolators[sn] = self.source_morphers[sn].make_interpolator( | ||
f=lambda s: s.pdf(*self.base_model.to_analysis_dimensions(d)), | ||
extra_dims=[len(d)], | ||
anchor_models=self.anchor_sources[sn]) | ||
else: | ||
self.ps_interpolators[sn] = base_source.pdf(*self.base_model.to_analysis_dimensions(d)) | ||
|
||
def ps_interpolator(*args): | ||
# take zs, convert to values for each source's interpolator call the respective interpolator | ||
ps = np.zeros((len(self.source_name_list), len(d))) | ||
for i, (sn, ps_interpolator) in enumerate(self.ps_interpolators.items()): | ||
if sn in self.source_shape_parameters: | ||
these_args = np.asarray([args[0][j] for j in self._get_shape_indices(sn)]) | ||
ps[i] = ps_interpolator(these_args) | ||
else: | ||
ps[i] = ps_interpolator | ||
return ps | ||
self.ps_interpolator = ps_interpolator | ||
else: | ||
self.ps_interpolator = self.morpher.make_interpolator( | ||
f=lambda m: m.score_events(d), | ||
extra_dims=[len(self.source_name_list), len(d)], | ||
anchor_models=self.anchor_models) | ||
else: | ||
self.ps = self.base_model.score_events(d) | ||
|
||
|
@@ -472,14 +586,18 @@ def prepare(self, *args): | |
self.ps, self.n_model_events = self.base_model.pmf_grids() | ||
|
||
if len(self.shape_parameters): | ||
self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[0], | ||
extra_dims=list(self.ps.shape), | ||
anchor_models=self.anchor_models) | ||
if self.source_wise_interpolation: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For readability, might it be better to just have "if self.source_wise_interpolation: raise error" and then have the rest not in an else block but just normal indentation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah yes -- I basically wanted to use this as a placeholder if someone is eager to implement this at one point but of course you're right that if the if clause raises an error we don't need the else -- removed! 👍 |
||
raise NotImplementedError("Source-wise interpolation not implemented for binned likelihoods") | ||
|
||
self.ps_interpolator = self.morpher.make_interpolator( | ||
f=lambda m: m.pmf_grids()[0], | ||
extra_dims=list(self.ps.shape), | ||
anchor_models=self.anchor_models) | ||
|
||
if self.model_statistical_uncertainty_handling is not None: | ||
self.n_model_events_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[1], | ||
extra_dims=list(self.ps.shape), | ||
anchor_models=self.anchor_models) | ||
extra_dims=list(self.ps.shape), | ||
anchor_models=self.anchor_models) | ||
|
||
@inherit_docstring_from(LogLikelihoodBase) | ||
def set_data(self, d): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,11 +106,10 @@ def pmf_grids(self): | |
def expected_events(self, s=None): | ||
"""Return the total number of events expected in the analysis range for the source s. | ||
If no source specified, return an array of results for all sources. | ||
# TODO: Why is this not a method of source? | ||
""" | ||
if s is None: | ||
return np.array([self.expected_events(s) for s in self.sources]) | ||
return s.events_per_day * self.config['livetime_days'] * s.fraction_in_range * s.config['rate_multiplier'] | ||
return s.expected_events | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we loosing lifetime scaling here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Answer: yes, @hammannr had told me but I overlooked it-- what changes is that the expected_events is now per-source and not adapted to change on-the-go (you would do that by changing livetime instead) |
||
|
||
def show(self, d, ax=None, dims=None, **kwargs): | ||
"""Plot the events from dataset d in the analysis range | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for arrays should we return it sorted? Or return a set?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe I misunderstand your suggestion but I think the return should by this definition be a sorted list, or not? Also entries should be unique.