Skip to content

Commit

Permalink
Merge pull request #257 from TopEFT/miscellaneous-updates
Browse files Browse the repository at this point in the history
Miscellaneous updates
  • Loading branch information
kmohrman authored May 7, 2022
2 parents c62f328 + f7c160c commit 012de99
Show file tree
Hide file tree
Showing 18 changed files with 160 additions and 100 deletions.
11 changes: 9 additions & 2 deletions analysis/topEFT/datacard_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def read(self):
if 'ljptsum' in self.hists:
self.analysis_bins['ljptsum'] = [0, 400, 600, 1000, self.hists['ljptsum'].axis('ljptsum').edges()[-1]]
if 'ptz' in self.hists:
self.analysis_bins['ptz'] = [0, 80, 200, 320, 440, self.hists['ptz'].axis('ptz').edges()[-1]]
self.analysis_bins['ptz'] = [0, 200, 300, 400, 500, self.hists['ptz'].axis('ptz').edges()[-1]]
if 'o0pt' in self.hists:
self.analysis_bins['o0pt'] = [0, 100, 200, 400, self.hists['o0pt'].axis('o0pt').edges()[-1]]
if 'bl0pt' in self.hists:
Expand Down Expand Up @@ -635,6 +635,13 @@ def addYields(p, name, h_sm, allyields, iproc, signalcount, bkgcount, d_sigs, d_

for n,wc in enumerate(self.coeffs):
if self.do_sm: break

# NOTE: This is an ad hoc fix for the issue where ctlTi ends up in tttt for one category
# - It barely makes it over the tolerance threshold (with an integral of ~1.08e-05)
# - We don't know of any reason why ctlTi should affect tttt, so we think it is just noise
# - This causes problems because then the list of selected WCs is different per channel (and the model assumes they are the same for every channel, so this causes a mismatch)
# - So the current solution is to just hard code a check to enforce that this does not happen in this case
if wc == "ctlTi" and proc == "tttt": continue

# Check if linear terms are non null
name = '_'.join([pname[:-1],'lin',wc])
Expand Down Expand Up @@ -857,7 +864,7 @@ def condor_job(self, pklfile, njobs, wcs, do_nuisance, do_sm, var_lst):
condorFile.write('error = condor/log/$(ClusterID)_$(ProcId).err\n')
condorFile.write('log = condor/log/$(ClusterID).log\n')
condorFile.write('Rank = Memory >= 64\n')
condorFile.write('Request_Memory = 4 Gb\n')
condorFile.write('Request_Memory = 6 Gb\n')
condorFile.write('+JobFlavour = "workday"\n')
condorFile.write('getenv = True\n')
condorFile.write('Should_Transfer_Files = NO\n')
Expand Down
13 changes: 13 additions & 0 deletions analysis/topEFT/fullR2_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# This script runs the wq run script with all of the settings appropriate for making SR histos for the full R2 analysis

# Name the output
OUT_NAME="example_name"

# Build the run command
CFGS="../../topcoffea/cfg/mc_signal_samples_NDSkim.cfg,../../topcoffea/cfg/mc_background_samples_NDSkim.cfg,../../topcoffea/cfg/data_samples_NDSkim.cfg"
OPTIONS="--hist-list ana --skip-cr --do-systs -s 50000 --do-np -o $OUT_NAME"
RUN_COMMAND="time python work_queue_run.py $CFGS $OPTIONS"

# Run the processor over all Run2 samples
printf "\nRunning the following command:\n$RUN_COMMAND\n\n"
$RUN_COMMAND
96 changes: 62 additions & 34 deletions analysis/topEFT/parse_datacard_templtes.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,20 @@ def draw_nom_up_do_overlay(h_n,h_u,h_d,save_path):
max_u = h_u.GetMaximum()
max_d = h_d.GetMaximum()
max_n = h_n.GetMaximum()
min_u = h_u.GetMinimum()
min_d = h_d.GetMinimum()
min_n = h_n.GetMinimum()
max_y = max(max_n, max(max_u,max_d))
h_u.GetYaxis().SetRangeUser(0.0,1.3*max_y)
min_y = min(min_n, min(min_u,min_d))
h_u.GetYaxis().SetRangeUser(min(1.3*min_y,0),1.3*max_y)

# Save
print("Saviang",save_path)
canvas.Print(save_path)




# Main function
def main():

Expand All @@ -100,49 +105,72 @@ def main():

out_basepath = args.out_path

# Very crude way of switching between run modes, maybe should put into the command line options
print_all_templates = 0
dump_negative = 0
make_plots = 1

# Get the list of template root files in the dc dir
files_all = os.listdir(args.datacards_dir_path)
template_files = dy.get_dc_file_names(files_all,ext=".root")

# Get list of all histos for a given category, just for ref
print_all_templates = False
### Get list of all histos for a given category, just for ref ###
if print_all_templates:
example_cat = "ttx_multileptons-2lss_p_2b.root"
all_histos = get_histo_names(ROOT.TFile.Open(os.path.join(args.datacards_dir_path,example_cat),"READ"),only_sm=True)
print(f"Printing all histos for cat {example_cat}:")
for name in all_histos: print(name)
print(f"({len(all_histos)} total)")
exit()

# Loop over templates
for template_name in template_files:

# Get root file and cat name
template_path_full = os.path.join(args.datacards_dir_path,template_name)
in_file = ROOT.TFile.Open(template_path_full,"READ")
cat_name = dy.get_cat_name_from_dc_name(template_name,".root")
print("Cat name:",cat_name)

# Get the dictionary of the variations
syst_name_dict = get_dict_of_nom_up_do_names(in_file)

# Make an output subdir for this category
out_basepath_forthiscat = os.path.join(out_basepath,cat_name)
os.mkdir(out_basepath_forthiscat)

# Make plot for each variation
ROOT.gROOT.SetBatch()
for proc_syst_var_name in syst_name_dict.keys():
print("proc_syst_var_name",proc_syst_var_name)
save_fpath = os.path.join(out_basepath_forthiscat,proc_syst_var_name+".png")
draw_nom_up_do_overlay(
h_n = in_file.Get(syst_name_dict[proc_syst_var_name]["nom"]),
h_u = in_file.Get(syst_name_dict[proc_syst_var_name]["up"]),
h_d = in_file.Get(syst_name_dict[proc_syst_var_name]["do"]),
save_path = save_fpath,
)

make_html(out_basepath_forthiscat)

### Get info about any negative bins ###
if dump_negative:
for template_name in template_files:
# Get root file and cat name
template_path_full = os.path.join(args.datacards_dir_path,template_name)
in_file = ROOT.TFile.Open(template_path_full,"READ")
cat_name = dy.get_cat_name_from_dc_name(template_name,".root")
print("Cat name:",cat_name)
all_histos = get_histo_names(in_file,only_sm=True)
for h_name in all_histos:
h = in_file.Get(h_name)
m = h.GetMinimum()
a = h.Integral()
if a < 0:
print(f"\t{h_name} sum val: {a}")
#if m < 0:
# print(f"\t{h_name} min val: {m}")

### Make plots for the nominal up and down ###
if make_plots:
# Loop over templates
for template_name in template_files:

# Get root file and cat name
template_path_full = os.path.join(args.datacards_dir_path,template_name)
in_file = ROOT.TFile.Open(template_path_full,"READ")
cat_name = dy.get_cat_name_from_dc_name(template_name,".root")
print("Cat name:",cat_name)

# Get the dictionary of the variations
syst_name_dict = get_dict_of_nom_up_do_names(in_file)

# Make an output subdir for this category
out_basepath_forthiscat = os.path.join(out_basepath,cat_name)
os.mkdir(out_basepath_forthiscat)

# Make plot for each variation
ROOT.gROOT.SetBatch()
for proc_syst_var_name in syst_name_dict.keys():
print("proc_syst_var_name",proc_syst_var_name)
save_fpath = os.path.join(out_basepath_forthiscat,proc_syst_var_name+".png")
n_dict = draw_nom_up_do_overlay(
h_n = in_file.Get(syst_name_dict[proc_syst_var_name]["nom"]),
h_u = in_file.Get(syst_name_dict[proc_syst_var_name]["up"]),
h_d = in_file.Get(syst_name_dict[proc_syst_var_name]["do"]),
save_path = save_fpath,
)

make_html(out_basepath_forthiscat)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion analysis/topEFT/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
# Figure out which hists to include
if args.hist_list == ["ana"]:
# Here we hardcode a list of hists used for the analysis
hist_lst = ["njets","ht","ptbl","ptz"]
hist_lst = ["njets","lj0pt","ptz"]
else:
# We want to specify a custom list
# If we don't specify this argument, it will be None, and the processor will fill all hists
Expand Down
33 changes: 16 additions & 17 deletions analysis/topEFT/topeft.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,22 @@ def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None,
self._accumulator = processor.dict_accumulator({
"invmass" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("invmass", "$m_{\ell\ell}$ (GeV) ", 20, 0, 1000)),
"ptbl" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ptbl", "$p_{T}^{b\mathrm{-}jet+\ell_{min(dR)}}$ (GeV) ", 40, 0, 1000)),
"ptz" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ptz", "$p_{T}$ Z (GeV)", 40, 0, 1000)),
"ptz" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ptz", "$p_{T}$ Z (GeV)", 12, 0, 600)),
"njets" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("njets", "Jet multiplicity ", 10, 0, 10)),
"nbtagsl" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("nbtagsl", "Loose btag multiplicity ", 5, 0, 5)),
"l0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l0pt", "Leading lep $p_{T}$ (GeV)", 10, 0, 100)),
"l0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l0pt", "Leading lep $p_{T}$ (GeV)", 10, 0, 500)),
"l1pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l1pt", "Subleading lep $p_{T}$ (GeV)", 10, 0, 100)),
"l1eta" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l1eta", "Subleading $\eta$", 20, -2.5, 2.5)),
"j0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("j0pt", "Leading jet $p_{T}$ (GeV)", 100, 0, 1000)),
"b0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("b0pt", "Leading b jet $p_{T}$ (GeV)", 100, 0, 1000)),
"j0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("j0pt", "Leading jet $p_{T}$ (GeV)", 10, 0, 500)),
"b0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("b0pt", "Leading b jet $p_{T}$ (GeV)", 10, 0, 500)),
"l0eta" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("l0eta", "Leading lep $\eta$", 20, -2.5, 2.5)),
"j0eta" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("j0eta", "Leading jet $\eta$", 30, -3.0, 3.0)),
"ht" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ht", "H$_{T}$ (GeV)", 80, 0, 2000)),
"met" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("met", "MET (GeV)", 40, 0, 400)),
"ljptsum" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ljptsum", "S$_{T}$ (GeV)", 80, 0, 2000)),
"o0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("o0pt", "Leading l or b jet $p_{T}$ (GeV)", 40, 0, 1000)),
"bl0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("bl0pt", "Leading (b+l) $p_{T}$ (GeV)", 40, 0, 1000)),
"lj0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("lj0pt", "Leading pt of pair from l+j collection (GeV)", 40, 0, 1000)),
"ht" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ht", "H$_{T}$ (GeV)", 20, 0, 1000)),
"met" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("met", "MET (GeV)", 20, 0, 400)),
"ljptsum" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("ljptsum", "S$_{T}$ (GeV)", 11, 0, 1100)),
"o0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("o0pt", "Leading l or b jet $p_{T}$ (GeV)", 10, 0, 500)),
"bl0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("bl0pt", "Leading (b+l) $p_{T}$ (GeV)", 10, 0, 500)),
"lj0pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("systematic", "Systematic Uncertainty"),hist.Cat("appl", "AR/SR"), hist.Bin("lj0pt", "Leading pt of pair from l+j collection (GeV)", 12, 0, 600)),
})

# Set the list of hists to fill
Expand Down Expand Up @@ -151,12 +151,11 @@ def process(self, events):
if d in dataset: dataset = dataset.split('_')[0]

# Set the sampleType (used for MC matching requirement)
conversionDatasets=[x%y for x in ['UL%s_TTGJets'] for y in '16APV,16,17,18'.split(",")]
sampleType = 'prompt'
sampleType = "prompt"
if isData:
sampleType = 'data'
elif dataset in conversionDatasets:
sampleType = 'conversions'
sampleType = "data"
elif histAxisName in get_param("conv_samples"):
sampleType = "conversions"

# Initialize objects
met = events.MET
Expand Down Expand Up @@ -723,12 +722,12 @@ def process(self, events):
},
},
"3l_CR" : {
"atleast_1j" : {
"exactly_0j" : {
"lep_chan_lst" : ["3l_CR"],
"lep_flav_lst" : ["eee" , "eem" , "emm", "mmm"],
"appl_lst" : ["isSR_3l" , "isAR_3l"],
},
"atleast_0j" : {
"atleast_1j" : {
"lep_chan_lst" : ["3l_CR"],
"lep_flav_lst" : ["eee" , "eem" , "emm", "mmm"],
"appl_lst" : ["isSR_3l" , "isAR_3l"],
Expand Down
2 changes: 1 addition & 1 deletion analysis/topEFT/work_queue_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
# Figure out which hists to include
if args.hist_list == ["ana"]:
# Here we hardcode a list of hists used for the analysis
hist_lst = ["njets","ht","ptbl","ptz"]
hist_lst = ["njets","lj0pt","ptz"]
else:
# We want to specify a custom list
# If we don't specify this argument, it will be None, and the processor will fill all hists
Expand Down
Loading

0 comments on commit 012de99

Please sign in to comment.