Merge pull request #257 from TopEFT/miscellaneous-updates

Miscellaneous updates
TopEFT · May 7, 2022 · 012de99 · 012de99
2 parents c62f328 + f7c160c
commit 012de99
Show file tree

Hide file tree

Showing 18 changed files with 160 additions and 100 deletions.
diff --git a/analysis/topEFT/datacard_maker.py b/analysis/topEFT/datacard_maker.py
@@ -78,7 +78,7 @@ def read(self):
         if 'ljptsum' in self.hists:
             self.analysis_bins['ljptsum'] = [0, 400, 600, 1000, self.hists['ljptsum'].axis('ljptsum').edges()[-1]]
         if 'ptz' in self.hists:
-            self.analysis_bins['ptz'] = [0, 80, 200, 320, 440, self.hists['ptz'].axis('ptz').edges()[-1]]
+            self.analysis_bins['ptz'] = [0, 200, 300, 400, 500, self.hists['ptz'].axis('ptz').edges()[-1]]
         if 'o0pt' in self.hists:
             self.analysis_bins['o0pt'] = [0, 100, 200, 400, self.hists['o0pt'].axis('o0pt').edges()[-1]]
         if 'bl0pt' in self.hists:
@@ -635,6 +635,13 @@ def addYields(p, name, h_sm, allyields, iproc, signalcount, bkgcount, d_sigs, d_
 
             for n,wc in enumerate(self.coeffs):
                 if self.do_sm: break
+
+                # NOTE: This is an ad hoc fix for the issue where ctlTi ends up in tttt for one category
+                #     - It barely makes it over the tolerance threshold (with an integral of ~1.08e-05)
+                #     - We don't know of any reason why ctlTi should affect tttt, so we think it is just noise
+                #     - This causes problems because then the list of selected WCs is different per channel (and the model assumes they are the same for every channel, so this causes a mismatch) 
+                #     - So the current solution is to just hard code a check to enforce that this does not happen in this case
+                if wc == "ctlTi" and proc == "tttt": continue
 
                 # Check if linear terms are non null
                 name = '_'.join([pname[:-1],'lin',wc])
@@ -857,7 +864,7 @@ def condor_job(self, pklfile, njobs, wcs, do_nuisance, do_sm, var_lst):
         condorFile.write('error                 = condor/log/$(ClusterID)_$(ProcId).err\n')
         condorFile.write('log                   = condor/log/$(ClusterID).log\n')
         condorFile.write('Rank                  = Memory >= 64\n')
-        condorFile.write('Request_Memory        = 4 Gb\n')
+        condorFile.write('Request_Memory        = 6 Gb\n')
         condorFile.write('+JobFlavour           = "workday"\n')
         condorFile.write('getenv                = True\n')
         condorFile.write('Should_Transfer_Files = NO\n')

diff --git a/analysis/topEFT/fullR2_run.sh b/analysis/topEFT/fullR2_run.sh
@@ -0,0 +1,13 @@
+# This script runs the wq run script with all of the settings appropriate for making SR histos for the full R2 analysis
+
+# Name the output
+OUT_NAME="example_name"
+
+# Build the run command
+CFGS="../../topcoffea/cfg/mc_signal_samples_NDSkim.cfg,../../topcoffea/cfg/mc_background_samples_NDSkim.cfg,../../topcoffea/cfg/data_samples_NDSkim.cfg"
+OPTIONS="--hist-list ana --skip-cr --do-systs -s 50000 --do-np -o $OUT_NAME"
+RUN_COMMAND="time python work_queue_run.py $CFGS $OPTIONS"
+
+# Run the processor over all Run2 samples
+printf "\nRunning the following command:\n$RUN_COMMAND\n\n"
+$RUN_COMMAND
diff --git a/analysis/topEFT/parse_datacard_templtes.py b/analysis/topEFT/parse_datacard_templtes.py
@@ -80,15 +80,20 @@ def draw_nom_up_do_overlay(h_n,h_u,h_d,save_path):
     max_u = h_u.GetMaximum()
     max_d = h_d.GetMaximum()
     max_n = h_n.GetMaximum()
+    min_u = h_u.GetMinimum()
+    min_d = h_d.GetMinimum()
+    min_n = h_n.GetMinimum()
     max_y = max(max_n, max(max_u,max_d))
-    h_u.GetYaxis().SetRangeUser(0.0,1.3*max_y)
+    min_y = min(min_n, min(min_u,min_d))
+    h_u.GetYaxis().SetRangeUser(min(1.3*min_y,0),1.3*max_y)
 
     # Save
     print("Saviang",save_path)
     canvas.Print(save_path)
 
 
 
+
 # Main function
 def main():
 
@@ -100,49 +105,72 @@ def main():
 
     out_basepath = args.out_path
 
+    # Very crude way of switching between run modes, maybe should put into the command line options
+    print_all_templates = 0
+    dump_negative = 0
+    make_plots = 1
+
     # Get the list of template root files in the dc dir
     files_all = os.listdir(args.datacards_dir_path)
     template_files = dy.get_dc_file_names(files_all,ext=".root")
 
-    # Get list of all histos for a given category, just for ref
-    print_all_templates = False
+    ### Get list of all histos for a given category, just for ref ###
     if print_all_templates:
         example_cat = "ttx_multileptons-2lss_p_2b.root"
         all_histos = get_histo_names(ROOT.TFile.Open(os.path.join(args.datacards_dir_path,example_cat),"READ"),only_sm=True)
         print(f"Printing all histos for cat {example_cat}:")
         for name in all_histos: print(name)
         print(f"({len(all_histos)} total)")
-        exit()
-
-    # Loop over templates
-    for template_name in template_files:
-
-        # Get root file and cat name
-        template_path_full = os.path.join(args.datacards_dir_path,template_name)
-        in_file  = ROOT.TFile.Open(template_path_full,"READ")
-        cat_name = dy.get_cat_name_from_dc_name(template_name,".root")
-        print("Cat name:",cat_name)
-
-        # Get the dictionary of the variations
-        syst_name_dict = get_dict_of_nom_up_do_names(in_file)
-
-        # Make an output subdir for this category
-        out_basepath_forthiscat = os.path.join(out_basepath,cat_name)
-        os.mkdir(out_basepath_forthiscat)
-
-        # Make plot for each variation
-        ROOT.gROOT.SetBatch()
-        for proc_syst_var_name in syst_name_dict.keys():
-            print("proc_syst_var_name",proc_syst_var_name)
-            save_fpath = os.path.join(out_basepath_forthiscat,proc_syst_var_name+".png")
-            draw_nom_up_do_overlay(
-                h_n = in_file.Get(syst_name_dict[proc_syst_var_name]["nom"]),
-                h_u = in_file.Get(syst_name_dict[proc_syst_var_name]["up"]),
-                h_d = in_file.Get(syst_name_dict[proc_syst_var_name]["do"]),
-                save_path = save_fpath,
-            )
-
-        make_html(out_basepath_forthiscat)
+
+    ### Get info about any negative bins ###
+    if dump_negative:
+        for template_name in template_files:
+            # Get root file and cat name
+            template_path_full = os.path.join(args.datacards_dir_path,template_name)
+            in_file  = ROOT.TFile.Open(template_path_full,"READ")
+            cat_name = dy.get_cat_name_from_dc_name(template_name,".root")
+            print("Cat name:",cat_name)
+            all_histos = get_histo_names(in_file,only_sm=True)
+            for h_name in all_histos:
+                h = in_file.Get(h_name)
+                m = h.GetMinimum()
+                a = h.Integral()
+                if a < 0:
+                    print(f"\t{h_name} sum val: {a}")
+                #if m < 0:
+                #    print(f"\t{h_name} min val: {m}")
+
+    ### Make plots for the nominal up and down ###
+    if make_plots:
+        # Loop over templates
+        for template_name in template_files:
+
+            # Get root file and cat name
+            template_path_full = os.path.join(args.datacards_dir_path,template_name)
+            in_file  = ROOT.TFile.Open(template_path_full,"READ")
+            cat_name = dy.get_cat_name_from_dc_name(template_name,".root")
+            print("Cat name:",cat_name)
+
+            # Get the dictionary of the variations
+            syst_name_dict = get_dict_of_nom_up_do_names(in_file)
+
+            # Make an output subdir for this category
+            out_basepath_forthiscat = os.path.join(out_basepath,cat_name)
+            os.mkdir(out_basepath_forthiscat)
+
+            # Make plot for each variation
+            ROOT.gROOT.SetBatch()
+            for proc_syst_var_name in syst_name_dict.keys():
+                print("proc_syst_var_name",proc_syst_var_name)
+                save_fpath = os.path.join(out_basepath_forthiscat,proc_syst_var_name+".png")
+                n_dict = draw_nom_up_do_overlay(
+                    h_n = in_file.Get(syst_name_dict[proc_syst_var_name]["nom"]),
+                    h_u = in_file.Get(syst_name_dict[proc_syst_var_name]["up"]),
+                    h_d = in_file.Get(syst_name_dict[proc_syst_var_name]["do"]),
+                    save_path = save_fpath,
+                )
+
+            make_html(out_basepath_forthiscat)
 
 
 if __name__ == "__main__":

diff --git a/analysis/topEFT/run.py b/analysis/topEFT/run.py
@@ -82,7 +82,7 @@
   # Figure out which hists to include
   if args.hist_list == ["ana"]:
     # Here we hardcode a list of hists used for the analysis
-    hist_lst = ["njets","ht","ptbl","ptz"]
+    hist_lst = ["njets","lj0pt","ptz"]
   else:
     # We want to specify a custom list
     # If we don't specify this argument, it will be None, and the processor will fill all hists 

diff --git a/analysis/topEFT/topeft.py b/analysis/topEFT/topeft.py
@@ -62,22 +62,22 @@ def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None,
         self._accumulator = processor.dict_accumulator({
             "invmass" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("invmass", "$m_{\ell\ell}$ (GeV) ", 20, 0, 1000)),
             "ptbl"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ptbl",    "$p_{T}^{b\mathrm{-}jet+\ell_{min(dR)}}$ (GeV) ", 40, 0, 1000)),
-            "ptz"     : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ptz",     "$p_{T}$ Z (GeV)", 40, 0, 1000)),
+            "ptz"     : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ptz",     "$p_{T}$ Z (GeV)", 12, 0, 600)),
             "njets"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("njets",   "Jet multiplicity ", 10, 0, 10)),
             "nbtagsl" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("nbtagsl", "Loose btag multiplicity ", 5, 0, 5)),
-            "l0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l0pt",    "Leading lep $p_{T}$ (GeV)", 10, 0, 100)),
+            "l0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l0pt",    "Leading lep $p_{T}$ (GeV)", 10, 0, 500)),
             "l1pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l1pt",    "Subleading lep $p_{T}$ (GeV)", 10, 0, 100)),
             "l1eta"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l1eta",   "Subleading $\eta$", 20, -2.5, 2.5)),
-            "j0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("j0pt",    "Leading jet  $p_{T}$ (GeV)", 100, 0, 1000)),
-            "b0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("b0pt",    "Leading b jet  $p_{T}$ (GeV)", 100, 0, 1000)),
+            "j0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("j0pt",    "Leading jet  $p_{T}$ (GeV)", 10, 0, 500)),
+            "b0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("b0pt",    "Leading b jet  $p_{T}$ (GeV)", 10, 0, 500)),
             "l0eta"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l0eta",   "Leading lep $\eta$", 20, -2.5, 2.5)),
             "j0eta"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("j0eta",   "Leading jet  $\eta$", 30, -3.0, 3.0)),
-            "ht"      : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ht",      "H$_{T}$ (GeV)", 80, 0, 2000)),
-            "met"     : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("met",     "MET (GeV)", 40, 0, 400)),
-            "ljptsum" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ljptsum", "S$_{T}$ (GeV)", 80, 0, 2000)),
-            "o0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("o0pt",    "Leading l or b jet $p_{T}$ (GeV)", 40, 0, 1000)),
-            "bl0pt"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("bl0pt",   "Leading (b+l) $p_{T}$ (GeV)", 40, 0, 1000)),
-            "lj0pt"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("lj0pt",   "Leading pt of pair from l+j collection (GeV)", 40, 0, 1000)),
+            "ht"      : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ht",      "H$_{T}$ (GeV)", 20, 0, 1000)),
+            "met"     : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("met",     "MET (GeV)", 20, 0, 400)),
+            "ljptsum" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ljptsum", "S$_{T}$ (GeV)", 11, 0, 1100)),
+            "o0pt"    : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("o0pt",    "Leading l or b jet $p_{T}$ (GeV)", 10, 0, 500)),
+            "bl0pt"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("bl0pt",   "Leading (b+l) $p_{T}$ (GeV)", 10, 0, 500)),
+            "lj0pt"   : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("lj0pt",   "Leading pt of pair from l+j collection (GeV)", 12, 0, 600)),
         })
 
         # Set the list of hists to fill
@@ -151,12 +151,11 @@ def process(self, events):
             if d in dataset: dataset = dataset.split('_')[0]
 
         # Set the sampleType (used for MC matching requirement)
-        conversionDatasets=[x%y for x in ['UL%s_TTGJets'] for y in '16APV,16,17,18'.split(",")]
-        sampleType = 'prompt'
+        sampleType = "prompt"
         if isData:
-            sampleType = 'data'
-        elif dataset in conversionDatasets: 
-            sampleType = 'conversions'
+            sampleType = "data"
+        elif histAxisName in get_param("conv_samples"):
+            sampleType = "conversions"
 
         # Initialize objects
         met  = events.MET
@@ -723,12 +722,12 @@ def process(self, events):
                   },
               },
               "3l_CR" : {
-                  "atleast_1j" : {
+                  "exactly_0j" : {
                       "lep_chan_lst" : ["3l_CR"],
                       "lep_flav_lst" : ["eee" , "eem" , "emm", "mmm"],
                       "appl_lst"     : ["isSR_3l" , "isAR_3l"],
                   },
-                  "atleast_0j" : {
+                  "atleast_1j" : {
                       "lep_chan_lst" : ["3l_CR"],
                       "lep_flav_lst" : ["eee" , "eem" , "emm", "mmm"],
                       "appl_lst"     : ["isSR_3l" , "isAR_3l"],

diff --git a/analysis/topEFT/work_queue_run.py b/analysis/topEFT/work_queue_run.py
@@ -89,7 +89,7 @@
 # Figure out which hists to include
 if args.hist_list == ["ana"]:
   # Here we hardcode a list of hists used for the analysis
-  hist_lst = ["njets","ht","ptbl","ptz"]
+  hist_lst = ["njets","lj0pt","ptz"]
 else:
   # We want to specify a custom list
   # If we don't specify this argument, it will be None, and the processor will fill all hists