Merge pull request #207 from lgray/fix_deepjet_btag_parsing

Detect and deal with btag csv files that do not contain a tagger name
scikit-hep · Nov 19, 2019 · 26cc66d · 26cc66d
2 parents c345b0f + 29d8fb5
commit 26cc66d
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 7 deletions.
diff --git a/coffea/lookup_tools/csv_converters.py b/coffea/lookup_tools/csv_converters.py
@@ -1,6 +1,7 @@
 from ..util import awkward
 from ..util import numpy as np
 import sys
+import warnings
 
 # pt except for reshaping, then discriminant
 btag_feval_dims = {0: [1], 1: [1], 2: [1], 3: [2]}
@@ -16,8 +17,15 @@ def convert_btag_csv_file(csvFilePath):
     btag_f = fopen(csvFilePath, fmode)
     nameandcols = btag_f.readline().split(';')
     btag_f.close()
-    name = nameandcols[0].strip()
-    columns = nameandcols[1].strip()
+    name = 'btagsf'
+    columns = None
+    if len(nameandcols) == 2:
+        name = nameandcols[0].strip()
+        columns = nameandcols[1].strip()
+    else:
+        warnings.warn('btagging SF file does not contain a name, using default!',
+                      RuntimeWarning)
+        columns = nameandcols[0].strip()
     columns = [column.strip() for column in columns.split(',')]
 
     corrections = np.genfromtxt(csvFilePath,
@@ -38,13 +46,13 @@ def convert_btag_csv_file(csvFilePath):
     for label in labels:
         etaMins = np.unique(corrections[np.where(all_names == label)][columns[4]])
         etaMaxs = np.unique(corrections[np.where(all_names == label)][columns[5]])
-        etaBins = np.union1d(etaMins, etaMaxs)
+        etaBins = np.union1d(etaMins, etaMaxs).astype(np.double)
         ptMins = np.unique(corrections[np.where(all_names == label)][columns[6]])
         ptMaxs = np.unique(corrections[np.where(all_names == label)][columns[7]])
-        ptBins = np.union1d(ptMins, ptMaxs)
+        ptBins = np.union1d(ptMins, ptMaxs).astype(np.double)
         discrMins = np.unique(corrections[np.where(all_names == label)][columns[8]])
         discrMaxs = np.unique(corrections[np.where(all_names == label)][columns[9]])
-        discrBins = np.union1d(discrMins, discrMaxs)
+        discrBins = np.union1d(discrMins, discrMaxs).astype(np.double)
         vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1),
                         dtype=corrections.dtype[10])
         for i, eta_bin in enumerate(etaBins[:-1]):

diff --git a/coffea/version.py b/coffea/version.py
@@ -30,7 +30,7 @@
 
 import re
 
-__version__ = "0.6.17"
+__version__ = "0.6.18"
 version = __version__
 version_info = tuple(re.split(r"[-\.]", __version__))
 

diff --git a/tests/samples/DeepJet_102XSF_WP_V1.btag.csv.gz b/tests/samples/DeepJet_102XSF_WP_V1.btag.csv.gz
diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py
@@ -106,7 +106,9 @@ def test_root_scalefactors():
 def test_btag_csv_scalefactors():
     extractor = lookup_tools.extractor()
     extractor.add_weight_sets(["testBTag * tests/samples/testBTagSF.btag.csv",
-                               "* * tests/samples/DeepCSV_102XSF_V1.btag.csv.gz"])
+                               "* * tests/samples/DeepCSV_102XSF_V1.btag.csv.gz",
+                               "* * tests/samples/DeepJet_102XSF_WP_V1.btag.csv.gz"
+                               ])
     extractor.finalize()
 
     evaluator = extractor.make_evaluator()
@@ -117,6 +119,10 @@ def test_btag_csv_scalefactors():
 
     print(evaluator['testBTagCSVv2_1_comb_up_0'])
 
+    print(evaluator['DeepCSV_1_comb_up_0'])
+
+    print(evaluator['btagsf_1_comb_up_0'])
+
     sf_out = evaluator['testBTagCSVv2_1_comb_up_0'](test_eta, test_pt, test_discr)