Skip to content

Commit

Permalink
Merge branch 'master' into ch_update_histeft
Browse files Browse the repository at this point in the history
  • Loading branch information
anpicci authored Dec 19, 2024
2 parents 884b4cf + 656b5a6 commit 71d3485
Show file tree
Hide file tree
Showing 86 changed files with 8,281 additions and 573 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,14 @@ The [v0.5 tag](https://github.com/TopEFT/topcoffea/releases/tag/v0.5) was used t
time source fullR2_run.sh
```
2. Run the datacard maker to obtain the cards and templates (from the pickled histogram file produced in Step 1, be sure to use the version with the nonprompt estimation, i.e. the one with `_np` appended to the name you specified for the `OUT_NAME` in `fullR2_run.sh`).
2. Run the datacard maker to obtain the cards and templates from SM (from the pickled histogram file produced in Step 1, be sure to use the version with the nonprompt estimation, i.e. the one with `_np` appended to the name you specified for the `OUT_NAME` in `fullR2_run.sh`). This step would also produce scalings-preselect.json file which the later version is necessary for IM workspace making. Note that command option `--wc-scalings` is not mandatory but to enforce the ordering of wcs in scalings. Add command `-A` to include all EFT templates in datacards for previous AAC model. Add option `-C` to run on condor.
```
time python make_cards.py /path/to/your/examplename_np.pkl.gz -C --do-nuisance --var-lst lj0pt ptz -d /scratch365/you/somedir --unblind --do-mc-stat
time python make_cards.py /path/to/your/examplename_np.pkl.gz --do-nuisance --var-lst lj0pt ptz -d /scratch365/you/somedir --unblind --do-mc-stat --wc-scalings cQQ1 cQei cQl3i cQlMi cQq11 cQq13 cQq81 cQq83 cQt1 cQt8 cbW cpQ3 cpQM cpt cptb ctG ctW ctZ ctei ctlSi ctlTi ctli ctp ctq1 ctq8 ctt1
```
3. Run the post-processing checks on the cards to look for any unexpected errors in the condor logs and to grab the right set of ptz and lj0pt templates and cards used in TOP-22-006. The script will copy the relevant cards/templates to a directory called `ptz-lj0pt_withSys` that it makes inside of the directory you pass that points to the cards and templates made in Step 2. This `ptz-lj0pt_withSys` is the directory that can be copied to wherever you plan to run the `combine` steps (e.g. PSI).
3. Run the post-processing checks on the cards to look for any unexpected errors, to grab the right set of ptz and lj0pt templates/cards used in TOP-22-006, and to get final version of scalings.json file. The script will copy the relevant cards/templates/ and create the json file to a directory called `ptz-lj0pt_withSys` that it makes inside of the directory you pass that points to the cards and templates made in Step 2. This `ptz-lj0pt_withSys` is the directory that can be copied to wherever you plan to run the `combine` steps (e.g. PSI). Can also run this on condor with `-c`.
```
time python datacards_post_processing.py /scratch365/you/somedir -c -s
time python datacards_post_processing.py /scratch365/you/somedir -s
```
4. Check the yields with `get_datacard_yields.py` script. This scrip will read the datacards in the directory produced in Step 3 and will dump the SM yields (summed over jet bins) to the screen (the text is formatted as a latex table). Use the `--unblind` option if you want to also see the data numbers.
Expand Down
135 changes: 78 additions & 57 deletions analysis/topeft_run2/analysis_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ def process(self, events):
xsec = self._samples[dataset]["xsec"]
sow = self._samples[dataset]["nSumOfWeights"]

is_run3 = False
if year.startswith("202"):
is_run3 = True
is_run2 = not is_run3

run_era = None
if isData:
run_era = self._samples[dataset]["path"].split("/")[2].split("-")[0][-1]

# Get up down weights from input dict
if (self._do_systematics and not isData):
if histAxisName in get_te_param("lo_xsec_samples"):
Expand Down Expand Up @@ -194,9 +203,10 @@ def process(self, events):
sow_renormfactUp = -1
sow_renormfactDown = -1

datasets = ["SingleMuon", "SingleElectron", "EGamma", "MuonEG", "DoubleMuon", "DoubleElectron", "DoubleEG"]
datasets = ["Muon", "SingleMuon", "SingleElectron", "EGamma", "MuonEG", "DoubleMuon", "DoubleElectron", "DoubleEG"]
for d in datasets:
if d in dataset: dataset = dataset.split('_')[0]
if dataset.startswith(d):
dataset = dataset.split('_')[0]

# Set the sampleType (used for MC matching requirement)
sampleType = "prompt"
Expand All @@ -215,13 +225,21 @@ def process(self, events):
tau = events.Tau
jets = events.Jet


if is_run3:
leptonSelection = te_os.run3leptonselection()
jetsRho = events.Rho["fixedGridRhoFastjetAll"]
elif is_run2:
leptonSelection = te_os.run2leptonselection()
jetsRho = events.fixedGridRhoFastjetAll

# An array of lenght events that is just 1 for each event
# Probably there's a better way to do this, but we use this method elsewhere so I guess why not..
events.nom = ak.ones_like(events.MET.pt)

ele["idEmu"] = te_os.ttH_idEmu_cuts_E3(ele.hoe, ele.eta, ele.deltaEtaSC, ele.eInvMinusPInv, ele.sieie)
ele["conept"] = te_os.coneptElec(ele.pt, ele.mvaTTHUL, ele.jetRelIso)
mu["conept"] = te_os.coneptMuon(mu.pt, mu.mvaTTHUL, mu.jetRelIso, mu.mediumId)
ele["conept"] = leptonSelection.coneptElec(ele)
mu["conept"] = leptonSelection.coneptMuon(mu)
ele["btagDeepFlavB"] = ak.fill_none(ele.matched_jet.btagDeepFlavB, -99)
mu["btagDeepFlavB"] = ak.fill_none(mu.matched_jet.btagDeepFlavB, -99)
if not isData:
Expand All @@ -235,6 +253,10 @@ def process(self, events):
golden_json_path = topcoffea_path("data/goldenJsons/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt")
elif year == "2018":
golden_json_path = topcoffea_path("data/goldenJsons/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt")
elif year == "2022":
golden_json_path = topcoffea_path("data/goldenJsons/Cert_Collisions2022_355100_362760_Golden.txt")
elif year == "2023":
golden_json_path = topcoffea_path("data/goldenJsons/Cert_Collisions2023_366442_370790_Golden.txt")
else:
raise ValueError(f"Error: Unknown year \"{year}\".")
lumi_mask = LumiMask(golden_json_path)(events.run,events.luminosityBlock)
Expand All @@ -254,18 +276,18 @@ def process(self, events):

################### Electron selection ####################

ele["isPres"] = te_os.isPresElec(ele.pt, ele.eta, ele.dxy, ele.dz, ele.miniPFRelIso_all, ele.sip3d, getattr(ele,"mvaFall17V2noIso_WPL"))
ele["isLooseE"] = te_os.isLooseElec(ele.miniPFRelIso_all,ele.sip3d,ele.lostHits)
ele["isFO"] = te_os.isFOElec(ele.pt, ele.conept, ele.btagDeepFlavB, ele.idEmu, ele.convVeto, ele.lostHits, ele.mvaTTHUL, ele.jetRelIso, ele.mvaFall17V2noIso_WP90, year)
ele["isTightLep"] = te_os.tightSelElec(ele.isFO, ele.mvaTTHUL)
ele["isPres"] = leptonSelection.isPresElec(ele)
ele["isLooseE"] = leptonSelection.isLooseElec(ele)
ele["isFO"] = leptonSelection.isFOElec(ele, year)
ele["isTightLep"] = leptonSelection.tightSelElec(ele)

################### Muon selection ####################

mu["pt"] = ApplyRochesterCorrections(year, mu, isData) # Need to apply corrections before doing muon selection
mu["isPres"] = te_os.isPresMuon(mu.dxy, mu.dz, mu.sip3d, mu.eta, mu.pt, mu.miniPFRelIso_all)
mu["isLooseM"] = te_os.isLooseMuon(mu.miniPFRelIso_all,mu.sip3d,mu.looseId)
mu["isFO"] = te_os.isFOMuon(mu.pt, mu.conept, mu.btagDeepFlavB, mu.mvaTTHUL, mu.jetRelIso, year)
mu["isTightLep"]= te_os.tightSelMuon(mu.isFO, mu.mediumId, mu.mvaTTHUL)
mu["pt"] = ApplyRochesterCorrections(year, mu, isData) # Run3 ready
mu["isPres"] = leptonSelection.isPresMuon(mu)
mu["isLooseM"] = leptonSelection.isLooseMuon(mu)
mu["isFO"] = leptonSelection.isFOMuon(mu, year)
mu["isTightLep"]= leptonSelection.tightSelMuon(mu)

################### Loose selection ####################

Expand Down Expand Up @@ -326,15 +348,22 @@ def process(self, events):
######### Systematics ###########

# Define the lists of systematics we include
obj_jes_entries = []
with open(topeft_path('modules/jerc_dict.json'), 'r') as f:
jerc_dict = json.load(f)
for junc in jerc_dict[year]['junc']:
junc = junc.replace("Regrouped_", "")
obj_jes_entries.append(f'JES_{junc}Up')
obj_jes_entries.append(f'JES_{junc}Down')
obj_correction_syst_lst = [
f'JER_{year}Up',f'JER_{year}Down', # Systs that affect the kinematics of objects
'JES_FlavorQCDUp', 'JES_AbsoluteUp', 'JES_RelativeBalUp', 'JES_BBEC1Up', 'JES_RelativeSampleUp', 'JES_FlavorQCDDown', 'JES_AbsoluteDown', 'JES_RelativeBalDown', 'JES_BBEC1Down', 'JES_RelativeSampleDown'
]
f'JER_{year}Up', f'JER_{year}Down'
] + obj_jes_entries
if self.tau_h_analysis:
obj_correction_syst_lst.append("TESUp")
obj_correction_syst_lst.append("TESDown")
obj_correction_syst_lst.append("FESUp")
obj_correction_syst_lst.append("FESDown")

wgt_correction_syst_lst = [
"lepSF_muonUp","lepSF_muonDown","lepSF_elecUp","lepSF_elecDown",f"btagSFbc_{year}Up",f"btagSFbc_{year}Down","btagSFbc_corrUp","btagSFbc_corrDown",f"btagSFlight_{year}Up",f"btagSFlight_{year}Down","btagSFlight_corrUp","btagSFlight_corrDown","PUUp","PUDown","PreFiringUp","PreFiringDown",f"triggerSF_{year}Up",f"triggerSF_{year}Down", # Exp systs
"FSRUp","FSRDown","ISRUp","ISRDown","renormUp","renormDown","factUp","factDown", # Theory systs
Expand All @@ -344,7 +373,7 @@ def process(self, events):
wgt_correction_syst_lst.append("lepSF_taus_realDown")
wgt_correction_syst_lst.append("lepSF_taus_fakeUp")
wgt_correction_syst_lst.append("lepSF_taus_fakeDown")

data_syst_lst = [
"FFUp","FFDown","FFptUp","FFptDown","FFetaUp","FFetaDown",f"FFcloseEl_{year}Up",f"FFcloseEl_{year}Down",f"FFcloseMu_{year}Up",f"FFcloseMu_{year}Down"
]
Expand All @@ -355,29 +384,35 @@ def process(self, events):
# Note: Here we will to the weights object the SFs that do not depend on any of the forthcoming loops
weights_obj_base = coffea.analysis_tools.Weights(len(events),storeIndividual=True)
if not isData:

# If this is no an eft sample, get the genWeight
if eft_coeffs is None: genw = events["genWeight"]
else: genw= np.ones_like(events["event"])
if eft_coeffs is None:
genw = events["genWeight"]
else:
genw= np.ones_like(events["event"])

# Normalize by (xsec/sow)*genw where genw is 1 for EFT samples
# Note that for theory systs, will need to multiply by sow/sow_wgtUP to get (xsec/sow_wgtUp)*genw and same for Down
lumi = 1000.0*get_tc_param(f"lumi_{year}")
weights_obj_base.add("norm",(xsec/sow)*genw*lumi)

if is_run2:
l1prefiring_args = [events.L1PreFiringWeight.Nom, events.L1PreFiringWeight.Up, events.L1PreFiringWeight.Dn]
elif is_run3:
l1prefiring_args = [ak.ones_like(events.nom), ak.ones_like(events.nom), ak.ones_like(events.nom)]

# Attach PS weights (ISR/FSR) and scale weights (renormalization/factorization) and PDF weights
tc_cor.AttachPSWeights(events)
tc_cor.AttachScaleWeights(events)
#AttachPdfWeights(events) # TODO
# FSR/ISR weights
tc_cor.AttachPSWeights(events) #Run3 ready
tc_cor.AttachScaleWeights(events) #Run3 ready (with caveat on "nominal")
#AttachPdfWeights(events) #TODO
# FSR/ISR weights -- corrections come from AttachPSWeights
weights_obj_base.add('ISR', events.nom, events.ISRUp*(sow/sow_ISRUp), events.ISRDown*(sow/sow_ISRDown))
weights_obj_base.add('FSR', events.nom, events.FSRUp*(sow/sow_FSRUp), events.FSRDown*(sow/sow_FSRDown))
# renorm/fact scale
# renorm/fact scale -- corrections come from AttachScaleWeights
weights_obj_base.add('renorm', events.nom, events.renormUp*(sow/sow_renormUp), events.renormDown*(sow/sow_renormDown))
weights_obj_base.add('fact', events.nom, events.factUp*(sow/sow_factUp), events.factDown*(sow/sow_factDown))
# Prefiring and PU (note prefire weights only available in nanoAODv9)
weights_obj_base.add('PreFiring', events.L1PreFiringWeight.Nom, events.L1PreFiringWeight.Up, events.L1PreFiringWeight.Dn)
weights_obj_base.add('PU', tc_cor.GetPUSF((events.Pileup.nTrueInt), year), tc_cor.GetPUSF(events.Pileup.nTrueInt, year, 'up'), tc_cor.GetPUSF(events.Pileup.nTrueInt, year, 'down'))
# Prefiring and PU (note prefire weights only available in nanoAODv9 and for Run2)
weights_obj_base.add('PreFiring', *l1prefiring_args) #Run3 ready
weights_obj_base.add('PU', tc_cor.GetPUSF((events.Pileup.nTrueInt), year), tc_cor.GetPUSF(events.Pileup.nTrueInt, year, 'up'), tc_cor.GetPUSF(events.Pileup.nTrueInt, year, 'down')) #Run3 ready


######### The rest of the processor is inside this loop over systs that affect object kinematics ###########
Expand Down Expand Up @@ -408,20 +443,22 @@ def process(self, events):
# Selecting jets and cleaning them
jetptname = "pt_nom" if hasattr(cleanedJets, "pt_nom") else "pt"

cleanedJets["pt_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.pt
cleanedJets["mass_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.mass
cleanedJets["rho"] = ak.broadcast_arrays(jetsRho, cleanedJets.pt)[0]

# Jet energy corrections
if not isData:
cleanedJets["pt_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.pt
cleanedJets["mass_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.mass
cleanedJets["pt_gen"] =ak.values_astype(ak.fill_none(cleanedJets.matched_gen.pt, 0), np.float32)
cleanedJets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, cleanedJets.pt)[0]
events_cache = events.caches[0]
cleanedJets = ApplyJetCorrections(year, corr_type='jets').build(cleanedJets, lazy_cache=events_cache)
cleanedJets=ApplyJetSystematics(year,cleanedJets,syst_var)
met=ApplyJetCorrections(year, corr_type='met').build(met_raw, cleanedJets, lazy_cache=events_cache)
cleanedJets["pt_gen"] = ak.values_astype(ak.fill_none(cleanedJets.matched_gen.pt, 0), np.float32)
if self.tau_h_analysis:
tau["pt"], tau["mass"] = ApplyTESSystematic(year, tau, isData, syst_var)
tau["pt"], tau["mass"] = ApplyFESSystematic(year, tau, isData, syst_var)


events_cache = events.caches[0]
cleanedJets = ApplyJetCorrections(year, corr_type='jets', isData=isData, era=run_era).build(cleanedJets, lazy_cache=events_cache) #Run3 ready
cleanedJets = ApplyJetSystematics(year,cleanedJets,syst_var)
met = ApplyJetCorrections(year, corr_type='met', isData=isData, era=run_era).build(met_raw, cleanedJets, lazy_cache=events_cache)

cleanedJets["isGood"] = tc_os.is_tight_jet(getattr(cleanedJets, jetptname), cleanedJets.eta, cleanedJets.jetId, pt_cut=30., eta_cut=get_te_param("eta_j_cut"), id_cut=get_te_param("jet_id_cut"))
cleanedJets["isFwd"] = te_os.isFwdJet(getattr(cleanedJets, jetptname), cleanedJets.eta, cleanedJets.jetId, jetPtCut=40.)
goodJets = cleanedJets[cleanedJets.isGood]
Expand All @@ -434,31 +471,15 @@ def process(self, events):
j0 = goodJets[ak.argmax(goodJets.pt,axis=-1,keepdims=True)]

# Loose DeepJet WP
if year == "2017":
btagwpl = get_tc_param("btag_wp_loose_UL17")
elif year == "2018":
btagwpl = get_tc_param("btag_wp_loose_UL18")
elif year=="2016":
btagwpl = get_tc_param("btag_wp_loose_UL16")
elif year=="2016APV":
btagwpl = get_tc_param("btag_wp_loose_UL16APV")
else:
raise ValueError(f"Error: Unknown year \"{year}\".")
loose_tag = "btag_wp_loose_" + year.replace("201", "UL1")
btagwpl = get_tc_param(loose_tag)
isBtagJetsLoose = (goodJets.btagDeepFlavB > btagwpl)
isNotBtagJetsLoose = np.invert(isBtagJetsLoose)
nbtagsl = ak.num(goodJets[isBtagJetsLoose])

# Medium DeepJet WP
if year == "2017":
btagwpm = get_tc_param("btag_wp_medium_UL17")
elif year == "2018":
btagwpm = get_tc_param("btag_wp_medium_UL18")
elif year=="2016":
btagwpm = get_tc_param("btag_wp_medium_UL16")
elif year=="2016APV":
btagwpm = get_tc_param("btag_wp_medium_UL16APV")
else:
raise ValueError(f"Error: Unknown year \"{year}\".")
medium_tag = "btag_wp_medium_" + year.replace("201", "UL1")
btagwpm = get_tc_param(medium_tag)
isBtagJetsMedium = (goodJets.btagDeepFlavB > btagwpm)
isNotBtagJetsMedium = np.invert(isBtagJetsMedium)
nbtagsm = ak.num(goodJets[isBtagJetsMedium])
Expand Down
7 changes: 4 additions & 3 deletions analysis/topeft_run2/datacards_post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def main():

###### Print out general info ######

with open(os.path.join(args.datacards_path,'scalings-preselect.json'), 'r') as file:
scalings_content = json.load(file)

# Count the number of text data cards and root templates
n_text_cards = 0
n_root_templates = 0
Expand Down Expand Up @@ -93,8 +96,6 @@ def main():
for line in lines_from_condor_out_to_print:
print(f"\t\t* In {line[0]}: {line[1]}")



####### Copy the TOP-22-006 relevant files to their own dir ######


Expand Down Expand Up @@ -143,6 +144,7 @@ def main():
os.mkdir(ptzlj0pt_path)
if args.set_up_top22006:
print(f"\nCopying TOP-22-006 relevant files to {ptzlj0pt_path}...")

if args.set_up_offZdivision:
print(f"\nCopying 3l-offZ-division relevant files to {ptzlj0pt_path}...")
for fname in datacard_files:
Expand All @@ -163,5 +165,4 @@ def main():
print("Done.\n")



main()
Loading

0 comments on commit 71d3485

Please sign in to comment.