More artifacts that are needed to make the gghbb analysis work and ot…

…her cosmetic things (#30) * make Nick happy * egamma-pog scale factor converter * move kept histograms option to finalize * opportunistically use pre-calculated values * pass in a matching function to (arg)match * deal with TMath::* correctly * first implementation of factorized jet corrector * jet resolutions, uncertainties, sfs * photon id sfs * growing pains * all jetmet corrections now function correctly, speed still not what I would like * done trying to squeeze speed out, try again later * no need for the 1s array in the returned values for JUNCS * add jetmet_tools.JetTransformer to apply JECs, etc. in full and decorate outputs * clean up includes for jec related lookups * docstrings
scikit-hep · Jan 7, 2019 · 55229ad · 55229ad
1 parent 7fa71ca
commit 55229ad
Show file tree

Hide file tree

Showing 16 changed files with 1,190 additions and 119 deletions.
diff --git a/fnal_column_analysis_tools/__init__.py b/fnal_column_analysis_tools/__init__.py
@@ -33,5 +33,6 @@
 import fnal_column_analysis_tools.lookup_tools
 import fnal_column_analysis_tools.analysis_objects
 import fnal_column_analysis_tools.striped
+import fnal_column_analysis_tools.jetmet_tools
 
 from fnal_column_analysis_tools.version import __version__
diff --git a/fnal_column_analysis_tools/analysis_objects/JaggedCandidateArray.py b/fnal_column_analysis_tools/analysis_objects/JaggedCandidateArray.py
diff --git a/fnal_column_analysis_tools/jetmet_tools/FactorizedJetCorrector.py b/fnal_column_analysis_tools/jetmet_tools/FactorizedJetCorrector.py
@@ -0,0 +1,164 @@
+from ..lookup_tools.jme_standard_function import jme_standard_function
+import warnings
+import re
+import numpy as np
+from copy import deepcopy
+from awkward import JaggedArray
+
+def _checkConsistency(against,tocheck):
+    if against is None:
+        against = tocheck
+    else:
+        if against != tocheck:
+            raise Exception('Corrector for {} is mixed'/
+                            'with correctors for {}!'.format(tocheck,against))
+    return tocheck
+
+_levelre = re.compile('[L1-7]+')
+def _getLevel(levelName):
+    matches = _levelre.findall(levelName)
+    if len(matches) > 1:
+        raise Exception('Malformed JEC level name: {}'.format(levelName))
+    return matches[0]
+
+_level_order = ['L1','L2','L3','L2L3']
+
+class FactorizedJetCorrector(object):
+    """
+        This class is a columnar implementation of the FactorizedJetCorrector tool in
+        CMSSW and FWLite. It applies a series of JECs in ascending order as defined by
+        '_level_order', and checks for the consistency of input corrections.
+        You can use this class as follows:
+        fjc = FactorizedJetCorrector(name1=corrL1,...)
+        jetCorrs = fjc(JetParameter1=jet.parameter1,...)
+    """
+    def __init__(self,**kwargs):
+        """
+            You construct a FactorizedJetCorrector by passing in a dict of names and functions.
+            Names must be formatted as '<campaign>_<dataera>_<datatype>_<level>_<jettype>'.
+        """
+        jettype = None
+        levels = []
+        funcs = []
+        datatype = None
+        campaign = None
+        dataera = None
+        for name,func in kwargs.items():
+            if not isinstance(func,jme_standard_function):
+                raise Exception('{} is a {} and not a jme_standard_function!'.format(name,
+                                                                                     type(func)))
+            info = name.split('_')
+            if len(info) != 5:
+                raise Exception('Corrector name is not properly formatted!')
+
+            campaign = _checkConsistency(campaign,info[0])
+            dataera  = _checkConsistency(dataera,info[1])
+            datatype = _checkConsistency(datatype,info[2])
+            levels.append(info[3])
+            funcs.append(func)
+            jettype  = _checkConsistency(jettype,info[4])
+
+        if campaign is None:
+            raise Exception('Unable to determine production campaign of JECs!')
+        else:
+            self._campaign = campaign
+
+        if dataera is None:
+            raise Exception('Unable to determine data era of JECs!')
+        else:
+            self._dataera = dataera
+
+        if datatype is None:
+            raise Exception('Unable to determine if JECs are for MC or Data!')
+        else:
+            self._datatype = datatype
+
+        if len(levels) == 0:
+            raise Exception('No levels provided?')
+        else:
+            self._levels = levels
+            self._funcs = funcs
+
+        if jettype is None:
+            raise Exception('Unable to determine type of jet to correct!')
+        else:
+            self._jettype = jettype
+
+        for i,level in enumerate(self._levels):
+            this_level = _getLevel(level)
+            ord_idx = _level_order.index(this_level)
+            if i != this_level:
+                self._levels[i],self._levels[ord_idx] = self._levels[ord_idx],self._levels[i]
+                self._funcs[i],self._funcs[ord_idx] = self._funcs[ord_idx],self._funcs[i]
+
+        #now we setup the call signature for this factorized JEC
+        self._signature = []
+        for func in self._funcs:
+            sig = func.signature
+            for input in sig:
+                if input not in self._signature:
+                    self._signature.append(input)
+
+    @property
+    def signature(self):
+        """ list the necessary jet properties that must be input to this function """
+        return self._signature
+
+    def __repr__(self):
+        out  = 'campaign   : %s\n'%(self._campaign)
+        out += 'data era   : %s\n'%(self._dataera)
+        out += 'data type  : %s\n'%(self._datatype)
+        out += 'jet type   : %s\n'%(self._jettype)
+        out += 'levels     : %s\n'%(','.join(self._levels))
+        out += 'signature  : (%s)\n'%(','.join(self._signature))
+        return out
+
+    def getCorrection(self,**kwargs):
+        """
+            Returns the set of corrections for all input jets at the highest available level
+            use like:
+            jecs = corrector.getCorrection(JetProperty1=jet.property1,...)
+        """
+        subCorrs = self.getSubCorrections(**kwargs)
+        return subCorrs[-1]
+
+    def getSubCorrections(self,**kwargs):
+        """
+            Returns the set of corrections for all input jets broken down by level
+            use like:
+            jecs = corrector.getSubCorrections(JetProperty1=jet.property1,...)
+            'jecs' will be formatted like [[jec_jet1 jec_jet2 ...] ...]
+        """
+        localargs = kwargs
+        firstarg = localargs[self._signature[0]]
+        cumulativeCorrection = 1.0
+        offsets = None
+        if isinstance(firstarg,JaggedArray):
+            offsets = firstarg.offsets
+            cumulativeCorrection = firstarg.ones_like().content
+            for key in localargs.keys():
+                localargs[key] = localargs[key].content
+        else:
+            cumulativeCorrection = np.ones_like(firstarg)
+        corrVars = []
+        if 'JetPt' in localargs.keys():
+            corrVars.append('JetPt')
+        if 'JetE' in localargs.keys():
+            corrVars.append('JetE')
+        if len(corrVars) == 0:
+            raise Exception('No variable to correct, need JetPt or JetE in inputs!')
+        corrections = []
+        for i,func in enumerate(self._funcs):
+            sig = func.signature
+            args = []
+            for input in sig:
+                args.append(localargs[input])
+            corr = func(*tuple(args))
+            for var in corrVars:
+                localargs[var] *= corr
+            cumulativeCorrection *= corr
+            corrections.append(cumulativeCorrection)
+        if offsets is not None:
+            for i in range(len(corrections)):
+                corrections[i] = JaggedArray.fromoffsets(offsets,corrections[i])
+        return corrections
diff --git a/fnal_column_analysis_tools/jetmet_tools/JetCorrectionUncertainty.py b/fnal_column_analysis_tools/jetmet_tools/JetCorrectionUncertainty.py
@@ -0,0 +1,130 @@
+from ..lookup_tools.jec_uncertainty_lookup import jec_uncertainty_lookup
+import warnings
+import re
+import numpy as np
+from copy import deepcopy
+from awkward import JaggedArray
+
+def _checkConsistency(against,tocheck):
+    if against is None:
+        against = tocheck
+    else:
+        if against != tocheck:
+            raise Exception('Corrector for {} is mixed'/
+                            'with correctors for {}!'.format(tocheck,against))
+    return tocheck
+
+_levelre = re.compile('Uncertainty')
+def _getLevel(levelName):
+    matches = _levelre.findall(levelName)
+    if len(matches) != 1:
+        raise Exception('Malformed JUNC level name: {}'.format(levelName))
+    return matches[0]
+
+_level_order = ['Uncertainty']
+
+class JetCorrectionUncertainty(object):
+    """
+        This class is a columnar implementation of the JetCorrectionUncertainty tool in
+        CMSSW and FWLite. It calculates the jet energy scale uncertainty for a corrected jet
+        in a given binning.
+        You can use this class as follows:
+        jcu = JetCorrectionUncertainty(name1=corrL1,...)
+        jetUncs = jcu(JetParameter1=jet.parameter1,...)
+    """
+    def __init__(self,**kwargs):
+        """
+            You construct a JetCorrectionUncertainty by passing in a dict of names and functions.
+            Names must be formatted as '<campaign>_<dataera>_<datatype>_<level>_<jettype>'.
+        """
+        jettype = None
+        levels = []
+        funcs = []
+        datatype = None
+        campaign = None
+        dataera = None
+        for name,func in kwargs.items():
+            if not isinstance(func,jec_uncertainty_lookup):
+                raise Exception('{} is a {} and not a jec_uncertainty_lookup!'.format(name,
+                                                                                      type(func)))
+            info = name.split('_')
+            if len(info) != 5:
+                raise Exception('Corrector name is not properly formatted!')
+
+            campaign = _checkConsistency(campaign,info[0])
+            dataera  = _checkConsistency(dataera,info[1])
+            datatype = _checkConsistency(datatype,info[2])
+            levels.append(info[3])
+            funcs.append(func)
+            jettype  = _checkConsistency(jettype,info[4])
+
+        if campaign is None:
+            raise Exception('Unable to determine production campaign of JECs!')
+        else:
+            self._campaign = campaign
+
+        if dataera is None:
+            raise Exception('Unable to determine data era of JECs!')
+        else:
+            self._dataera = dataera
+
+        if datatype is None:
+            raise Exception('Unable to determine if JECs are for MC or Data!')
+        else:
+            self._datatype = datatype
+
+        if len(levels) == 0:
+            raise Exception('No levels provided?')
+        else:
+            self._levels = levels
+            self._funcs = funcs
+
+        if jettype is None:
+            raise Exception('Unable to determine type of jet to correct!')
+        else:
+            self._jettype = jettype
+
+        for i,level in enumerate(self._levels):
+            this_level = _getLevel(level)
+            ord_idx = _level_order.index(this_level)
+            if i != this_level:
+                self._levels[i],self._levels[ord_idx] = self._levels[ord_idx],self._levels[i]
+                self._funcs[i],self._funcs[ord_idx] = self._funcs[ord_idx],self._funcs[i]
+
+        #now we setup the call signature for this factorized JEC
+        self._signature = []
+        for func in self._funcs:
+            sig = func.signature
+            for input in sig:
+                if input not in self._signature:
+                    self._signature.append(input)
+
+    @property
+    def signature(self):
+        """ list the necessary jet properties that must be input to this function """
+        return self._signature
+
+    def __repr__(self):
+        out  = 'campaign   : %s\n'%(self._campaign)
+        out += 'data era   : %s\n'%(self._dataera)
+        out += 'data type  : %s\n'%(self._datatype)
+        out += 'jet type   : %s\n'%(self._jettype)
+        out += 'levels     : %s\n'%(','.join(self._levels))
+        out += 'signature  : (%s)\n'%(','.join(self._signature))
+        return out
+
+    def getUncertainty(self,**kwargs):
+        """
+            Returns the set of uncertainties for all input jets at the highest available level
+            use like:
+            juncs = uncertainty.getUncertainty(JetProperty1=jet.property1,...)
+            'juncs' will be formatted like [[[up_val down_val]_jet1 ... ] ...]
+        """
+        uncs = []
+        for i,func in enumerate(self._funcs):
+            sig = func.signature
+            args = []
+            for input in sig:
+                args.append(kwargs[input])
+            uncs.append(func(*tuple(args)))
+        return uncs[-1]