From e5b7b2bbfa8c3a91a68292ef1e4fd29ad36f0e0b Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 29 Jan 2020 18:06:55 +0100
Subject: [PATCH 01/28] Add option to pass variables on the command line for
 formatting

---
 fast_plotter/__main__.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index b7fadd8..19f4f53 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -2,6 +2,7 @@
 Turn them tables into plots
 """
 import os
+import six
 import logging
 import matplotlib
 matplotlib.use('Agg')
@@ -41,6 +42,10 @@ def arg_parser(args=None):
                         help="Scale the MC yields by this lumi")
     parser.add_argument("-y", "--yscale", default="log", choices=["log", "linear"],
                         help="Use this scale for the y-axis")
+    def split_equals(arg):
+        return arg.split("=")
+    parser.add_argument("-v", "--variable", dest="variables", action="append", default=[], type=split_equals,
+                        help="Define a variable to expand in the config file")
     parser.add_argument("--halt-errors", dest="continue_errors", default=True, action="store_false",
                         help="Stop at the first time an error occurs")
     return parser
@@ -64,6 +69,8 @@ def main(args=None):
 
 def process_cfg(cfg_file, args):
     import yaml
+    from argparse import Namespace
+    from string import Template
     with open(cfg_file, "r") as infile:
         cfg = yaml.load(infile)
     # Only way to neatly allow cmd-line args to override config and handle
@@ -71,6 +78,20 @@ def process_cfg(cfg_file, args):
     parser = arg_parser()
     parser.set_defaults(**cfg)
     args = parser.parse_args()
+    if args.variables:
+
+        def recursive_replace(value, replacements):
+            if isinstance(value, (tuple, list)):
+                return type(value)([recursive_replace(v, replacements) for v in value])
+            if isinstance(value, dict):
+                return {k: recursive_replace(v, replacements) for k, v in value.items()}
+            if isinstance(value, six.string_types):
+                return Template(value).safe_substitute(replacements)
+                #return value.format(**replacements)
+            return value
+
+        replacements = dict(args.variables)
+        args = Namespace(**recursive_replace(vars(args), replacements))
 
     return args
 

From 32e44665d464e93704fa70e673a8dd914666cec8 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 5 Feb 2020 16:42:17 +0100
Subject: [PATCH 02/28] Extend the unit tests for pad_zero

---
 tests/test_plotting.py | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index a7f2745..4fdc173 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -2,7 +2,7 @@
 from fast_plotter import plotting
 
 
-def test_pad_zero():
+def test_pad_zero_noYs():
     x = np.arange(5)
     padded = plotting.pad_zero(x)
     assert (padded == np.arange(-1, 6)).all()
@@ -18,3 +18,38 @@ def test_pad_zero():
     x = np.concatenate(([-np.inf], x), axis=0)
     padded = plotting.pad_zero(x)
     assert (padded == np.arange(1, 5)).all()
+
+def test_pad_zero_oneY():
+
+    x = np.arange(5)
+    y = np.arange(5, 0, -1)
+    pad_x, pad_y = plotting.pad_zero(x, [y])
+    assert (pad_x == np.arange(-1, 6)).all()
+    expected_y = np.concatenate(([0], y, [0]), axis=0)
+    assert np.array_equal(pad_y, expected_y)
+
+    x = np.concatenate(([-np.inf], x, [np.inf]), axis=0)
+    y = np.arange(len(x), 0, -1)
+    pad_x, pad_y = plotting.pad_zero(x, [y])
+    assert (pad_x == np.arange(-1, 6)).all()
+    assert np.array_equal(pad_y, y)
+
+    x = np.arange(2, 4)
+    y = np.arange(len(x), 0, -1)
+    pad_x, pad_y = plotting.pad_zero(x, y)
+    print(x, y)
+    print(pad_x, pad_y)
+    assert (pad_x == np.arange(1, 5)).all()
+    expected_y = np.concatenate(([0], y, [0]), axis=0)
+    assert np.array_equal(pad_y, expected_y)
+
+    x = np.concatenate(([-np.inf], x), axis=0)
+    y = np.arange(len(x), 0, -1)
+    pad_x, pad_y = plotting.pad_zero(x, y)
+    print(x, y)
+    print(pad_x, pad_y)
+    assert (pad_x == np.arange(1, 5)).all()
+    expected_y = np.concatenate((y, [0]), axis=0)
+    assert np.array_equal(pad_y, expected_y)
+
+    assert False

From 6ac3a96673ea363628b382b11f97254f08ec2ce7 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 5 Feb 2020 16:57:18 +0100
Subject: [PATCH 03/28] Rename method

---
 fast_plotter/plotting.py |  8 ++++++--
 tests/test_plotting.py   | 16 ++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 5824b49..57ce7d8 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -185,7 +185,11 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
         raise RuntimeError("Unknown value for 'kind', '{}'".format(kind))
 
 
-def pad_zero(x, y_values=[], fill_val=0):
+def normalize_values(x, y_values=[], fill_val=0):
+    # if X has +/- inf at an end, replace this X value with +/- the previous/next value of X +/- the mean width in X
+    # if any requested X values are missing:
+        # insert dummy values into X and Y values at the right location
+    # insert a dummy entry to X and Y for all arrays
     if x.dtype.kind not in 'bifc':
         return (x,) + tuple(y_values)
     do_pad_left = not np.isneginf(x[0])
@@ -361,7 +365,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]):
 def draw(ax, method, x, ys, **kwargs):
     fill_val = kwargs.pop("fill_val", 0)
     if x.dtype.kind in 'biufc':
-        values = pad_zero(x, [kwargs[y] for y in ys], fill_val=fill_val)
+        values = normalize_values(x, [kwargs[y] for y in ys], fill_val=fill_val)
         x = values[0]
         new_ys = values[1:]
         kwargs.update(dict(zip(ys, new_ys)))
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index 4fdc173..a75aebc 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -4,39 +4,39 @@
 
 def test_pad_zero_noYs():
     x = np.arange(5)
-    padded = plotting.pad_zero(x)
+    padded = plotting.normalize_values(x)
     assert (padded == np.arange(-1, 6)).all()
 
     x = np.concatenate(([-np.inf], x, [np.inf]), axis=0)
-    padded = plotting.pad_zero(x)
+    padded = plotting.normalize_values(x)
     assert (padded == np.arange(-1, 6)).all()
 
     x = np.arange(2, 4)
-    padded = plotting.pad_zero(x)
+    padded = plotting.normalize_values(x)
     assert (padded == np.arange(1, 5)).all()
 
     x = np.concatenate(([-np.inf], x), axis=0)
-    padded = plotting.pad_zero(x)
+    padded = plotting.normalize_values(x)
     assert (padded == np.arange(1, 5)).all()
 
 def test_pad_zero_oneY():
 
     x = np.arange(5)
     y = np.arange(5, 0, -1)
-    pad_x, pad_y = plotting.pad_zero(x, [y])
+    pad_x, pad_y = plotting.normalize_values(x, [y])
     assert (pad_x == np.arange(-1, 6)).all()
     expected_y = np.concatenate(([0], y, [0]), axis=0)
     assert np.array_equal(pad_y, expected_y)
 
     x = np.concatenate(([-np.inf], x, [np.inf]), axis=0)
     y = np.arange(len(x), 0, -1)
-    pad_x, pad_y = plotting.pad_zero(x, [y])
+    pad_x, pad_y = plotting.normalize_values(x, [y])
     assert (pad_x == np.arange(-1, 6)).all()
     assert np.array_equal(pad_y, y)
 
     x = np.arange(2, 4)
     y = np.arange(len(x), 0, -1)
-    pad_x, pad_y = plotting.pad_zero(x, y)
+    pad_x, pad_y = plotting.normalize_values(x, y)
     print(x, y)
     print(pad_x, pad_y)
     assert (pad_x == np.arange(1, 5)).all()
@@ -45,7 +45,7 @@ def test_pad_zero_oneY():
 
     x = np.concatenate(([-np.inf], x), axis=0)
     y = np.arange(len(x), 0, -1)
-    pad_x, pad_y = plotting.pad_zero(x, y)
+    pad_x, pad_y = plotting.normalize_values(x, y)
     print(x, y)
     print(pad_x, pad_y)
     assert (pad_x == np.arange(1, 5)).all()

From 9d63709fea8ba3218255fed5a8b570a1c7f05932 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Fri, 7 Feb 2020 15:30:28 +0100
Subject: [PATCH 04/28] Add option to control margins

---
 fast_plotter/__main__.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 19f4f53..5065c4b 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -142,12 +142,12 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
         main_ax.grid(True)
         main_ax.set_axisbelow(True)
         for axis, lims in limits.items():
-            lims = map(float, lims)
-            if axis.lower() == "x":
-                main_ax.set_xlim(*lims)
-            if axis.lower() == "y":
-                main_ax.set_ylim(*lims)
-
+            if isinstance(lims, (tuple, list)):
+                lims = map(float, lims)
+                if axis.lower() in "xy":
+                    getattr(main_ax, "set_%slim" % axis)(*lims)
+            elif lims.endswith("%"):
+                main_ax.margins(**{axis: float(lims[:-1])})
 
 def save_plots(infile, weight, plots, outdir, extensions):
     binning, name = decipher_filename(infile)

From 9a087d0915f2dacf518840c458ac1e9253cfccac Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Fri, 7 Feb 2020 15:30:52 +0100
Subject: [PATCH 05/28] Fix up padding functions and tests

---
 fast_plotter/plotting.py | 70 ++++++++++++++++++++++++++++------------
 tests/test_plotting.py   | 63 ++++++++++++++++++++++--------------
 2 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 57ce7d8..082281e 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -185,33 +185,61 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
         raise RuntimeError("Unknown value for 'kind', '{}'".format(kind))
 
 
-def normalize_values(x, y_values=[], fill_val=0):
-    # if X has +/- inf at an end, replace this X value with +/- the previous/next value of X +/- the mean width in X
+def standardize_values(x, y_values=[], fill_val=0, expected_xs=None):
+    """
+    Standardize a set of arrays so they're ready to be plotted directly for matplotlib
+    """
     # if any requested X values are missing:
         # insert dummy values into X and Y values at the right location
-    # insert a dummy entry to X and Y for all arrays
-    if x.dtype.kind not in 'bifc':
-        return (x,) + tuple(y_values)
-    do_pad_left = not np.isneginf(x[0])
-    do_pad_right = not np.isposinf(x[-1])
-    width_slice = x[None if do_pad_left else 1:None if do_pad_right else -1]
+    if expected_xs is not None:
+        x, y_values = add_missing_vals(x, expected_xs, y_values=y_values, fill_val=fill_val)
+
+    if x.dtype.kind in 'bifc':
+        x = replace_infs(x)
+
+        x, y_values = pad_ends(x, y_values=y_values, fill_val=fill_val)
+    return (x,) + tuple(y_values)
+
+
+def replace_infs(x):
+    """
+    Replace (pos or neg) infinities at the ends of an array of floats
+
+    Algorithm: X has +/- inf at an end, replace this X value with +/- the
+    previous/next value of X +/- the mean width in X
+    """
+    x = x[:] # Make a copy of the array
+    is_left_inf = np.isneginf(x[0])
+    is_right_inf = np.isposinf(x[-1])
+    width_slice = x[1 if is_left_inf else None:-1 if is_right_inf else None]
     mean_width = width_slice[0]
     if len(width_slice) > 1:
         mean_width = np.diff(width_slice).mean()
-    x_left_padding = [x[0] - mean_width, x[0]
-                      ] if do_pad_left else [x[1] - mean_width]
-    x_right_padding = [x[-1], x[-1] + mean_width] if do_pad_right else [x[-2] + mean_width]
+    if is_left_inf:
+        x[0] = x[1] - mean_width
+    if is_right_inf:
+        x[-1] = x[-2] + mean_width
+    return x
+
+
+def add_missing_vals(x, expected_xs, y_values=[], fill_val=0):
+    """
+    Check from a list of expected x values, if all occur in x.  If any are missing 
+    """
+    raise NotImplementedError()
+
 
-    x = np.concatenate((x_left_padding, x[1:-1], x_right_padding))
-    new_values = []
-    for y in y_values:
-        y_left_padding = [fill_val, y[1]] if do_pad_left else [fill_val]
-        y_right_padding = [y[-2], fill_val] if do_pad_right else [fill_val]
-        y[np.isnan(y)] = fill_val
-        y = np.concatenate((y_left_padding, y[1:-1], y_right_padding))
-        new_values.append(y)
+def pad_ends(x, y_values=[], fill_val=0):
+    """
+    Insert a dummy entry to X and Y for all arrays
+    """
+    mean_width = x[0]
+    if len(x) > 1:
+        mean_width = np.diff(x).mean()
 
-    return (x,) + tuple(new_values)
+    x = np.concatenate((x[0:1] - mean_width, x, x[-1:] + mean_width), axis=0)
+    new_values = [np.concatenate(([fill_val], y, [fill_val]), axis=0) for y in y_values]
+    return x, tuple(new_values)
 
 
 def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
@@ -365,7 +393,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]):
 def draw(ax, method, x, ys, **kwargs):
     fill_val = kwargs.pop("fill_val", 0)
     if x.dtype.kind in 'biufc':
-        values = normalize_values(x, [kwargs[y] for y in ys], fill_val=fill_val)
+        values = standardize_values(x, [kwargs[y] for y in ys], fill_val=fill_val)
         x = values[0]
         new_ys = values[1:]
         kwargs.update(dict(zip(ys, new_ys)))
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index a75aebc..e6b118a 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -2,54 +2,67 @@
 from fast_plotter import plotting
 
 
+def test_replace_inf():
+    x = np.arange(5)
+    replaced = plotting.replace_infs(x)
+    assert np.array_equal(replaced, x)
+
+    x = np.concatenate(([-np.inf], x, [np.inf]), axis=0)
+    replaced = plotting.replace_infs(x)
+    assert np.array_equal(replaced, np.arange(-1, 6))
+
+    x = np.arange(2, 4)
+    replaced = plotting.replace_infs(x)
+    assert np.array_equal(replaced, np.arange(2, 4))
+
+    x = np.concatenate(([-np.inf], x), axis=0)
+    replaced = plotting.replace_infs(x)
+    assert np.array_equal(replaced, np.arange(1, 4))
+
+
 def test_pad_zero_noYs():
     x = np.arange(5)
-    padded = plotting.normalize_values(x)
-    assert (padded == np.arange(-1, 6)).all()
+    padded, = plotting.standardize_values(x)
+    assert np.array_equal(padded, np.arange(-1, 6))
 
     x = np.concatenate(([-np.inf], x, [np.inf]), axis=0)
-    padded = plotting.normalize_values(x)
-    assert (padded == np.arange(-1, 6)).all()
+    padded, = plotting.standardize_values(x)
+    assert np.array_equal(padded, np.arange(-2, 7))
 
     x = np.arange(2, 4)
-    padded = plotting.normalize_values(x)
-    assert (padded == np.arange(1, 5)).all()
+    padded, = plotting.standardize_values(x)
+    assert np.array_equal(padded, np.arange(1, 5))
 
     x = np.concatenate(([-np.inf], x), axis=0)
-    padded = plotting.normalize_values(x)
-    assert (padded == np.arange(1, 5)).all()
+    padded, = plotting.standardize_values(x)
+    assert np.array_equal(padded, np.arange(0, 5, dtype=float))
 
-def test_pad_zero_oneY():
 
+def test_pad_zero_oneY():
     x = np.arange(5)
     y = np.arange(5, 0, -1)
-    pad_x, pad_y = plotting.normalize_values(x, [y])
-    assert (pad_x == np.arange(-1, 6)).all()
     expected_y = np.concatenate(([0], y, [0]), axis=0)
+    pad_x, pad_y = plotting.standardize_values(x, [y])
+    assert np.array_equal(pad_x, np.arange(-1, 6))
     assert np.array_equal(pad_y, expected_y)
 
     x = np.concatenate(([-np.inf], x, [np.inf]), axis=0)
     y = np.arange(len(x), 0, -1)
-    pad_x, pad_y = plotting.normalize_values(x, [y])
-    assert (pad_x == np.arange(-1, 6)).all()
-    assert np.array_equal(pad_y, y)
+    expected_y = np.concatenate(([0], y, [0]), axis=0)
+    pad_x, pad_y = plotting.standardize_values(x, [y])
+    assert np.array_equal(pad_x, np.arange(-2, 7))
+    assert np.array_equal(pad_y, expected_y)
 
     x = np.arange(2, 4)
     y = np.arange(len(x), 0, -1)
-    pad_x, pad_y = plotting.normalize_values(x, y)
-    print(x, y)
-    print(pad_x, pad_y)
-    assert (pad_x == np.arange(1, 5)).all()
     expected_y = np.concatenate(([0], y, [0]), axis=0)
+    pad_x, pad_y = plotting.standardize_values(x, [y])
+    assert np.array_equal(pad_x, np.arange(1, 5))
     assert np.array_equal(pad_y, expected_y)
 
     x = np.concatenate(([-np.inf], x), axis=0)
     y = np.arange(len(x), 0, -1)
-    pad_x, pad_y = plotting.normalize_values(x, y)
-    print(x, y)
-    print(pad_x, pad_y)
-    assert (pad_x == np.arange(1, 5)).all()
-    expected_y = np.concatenate((y, [0]), axis=0)
+    expected_y = np.concatenate(([0], y, [0]), axis=0)
+    pad_x, pad_y = plotting.standardize_values(x, [y])
+    assert np.array_equal(pad_x, np.arange(0, 5))
     assert np.array_equal(pad_y, expected_y)
-
-    assert False

From 956beb993c95e39cde8dc6f9ae33c0b89132b648 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Fri, 7 Feb 2020 15:49:44 +0100
Subject: [PATCH 06/28] Fix pep8 issues

---
 fast_plotter/__main__.py |  3 ++-
 fast_plotter/plotting.py | 10 ++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index a9f152e..e552ff7 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -42,6 +42,7 @@ def arg_parser(args=None):
                         help="Scale the MC yields by this lumi")
     parser.add_argument("-y", "--yscale", default="log", choices=["log", "linear"],
                         help="Use this scale for the y-axis")
+
     def split_equals(arg):
         return arg.split("=")
     parser.add_argument("-v", "--variable", dest="variables", action="append", default=[], type=split_equals,
@@ -87,7 +88,6 @@ def recursive_replace(value, replacements):
                 return {k: recursive_replace(v, replacements) for k, v in value.items()}
             if isinstance(value, six.string_types):
                 return Template(value).safe_substitute(replacements)
-                #return value.format(**replacements)
             return value
 
         replacements = dict(args.variables)
@@ -149,6 +149,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
             elif lims.endswith("%"):
                 main_ax.margins(**{axis: float(lims[:-1])})
 
+
 def save_plots(infile, weight, plots, outdir, extensions):
     binning, name = decipher_filename(infile)
     kernel = "plot_" + ".".join(binning)
diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index b59aa47..5912c2b 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -188,9 +188,11 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
 def standardize_values(x, y_values=[], fill_val=0, expected_xs=None):
     """
     Standardize a set of arrays so they're ready to be plotted directly for matplotlib
+
+    Algorithm:
+    if any requested X values are missing:
+        insert dummy values into X and Y values at the right location
     """
-    # if any requested X values are missing:
-        # insert dummy values into X and Y values at the right location
     if expected_xs is not None:
         x, y_values = add_missing_vals(x, expected_xs, y_values=y_values, fill_val=fill_val)
 
@@ -208,7 +210,7 @@ def replace_infs(x):
     Algorithm: X has +/- inf at an end, replace this X value with +/- the
     previous/next value of X +/- the mean width in X
     """
-    x = x[:] # Make a copy of the array
+    x = x[:]  # Make a copy of the array
     is_left_inf = np.isneginf(x[0])
     is_right_inf = np.isposinf(x[-1])
     width_slice = x[1 if is_left_inf else None:-1 if is_right_inf else None]
@@ -224,7 +226,7 @@ def replace_infs(x):
 
 def add_missing_vals(x, expected_xs, y_values=[], fill_val=0):
     """
-    Check from a list of expected x values, if all occur in x.  If any are missing 
+    Check from a list of expected x values, if all occur in x.  If any are missing
     """
     raise NotImplementedError()
 

From b2852f3c2ebcf8efb6f129c0edb4d3594761f153 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 00:54:29 +0100
Subject: [PATCH 07/28] Resolve warnings about SettingWithCopyWarning from
 pandas

---
 fast_plotter/__main__.py | 4 +++-
 fast_plotter/utils.py    | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index e552ff7..9647ee4 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -115,7 +115,9 @@ def process_one_file(infile, args):
                                       regex=args.data,
                                       level=args.dataset_col)
                 for col in df_filtered.columns:
-                    df_filtered[col][data_rows] = df["n"][data_rows]
+                    if col == "n":
+                        continue
+                    df_filtered.loc[data_rows, col] = df["n"][data_rows]
             df_filtered.columns = [
                 n.replace(weight + ":", "") for n in df_filtered.columns]
         if hasattr(args, "value_replacements"):
diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index db1e717..21186b1 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -97,8 +97,9 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r
     for column in df:
         if do_rel_err and column.endswith("sumw"):
             err_name = column.replace("sumw", err_label)
-            df[err_name] = np.true_divide(df[column], root_n)
-            df[err_name][~np.isfinite(df[err_name])] = np.nan
+            errs = np.true_divide(df[column], root_n)
+            errs = np.nan_to_num(errs, copy=False, nan=np.nan, posinf=np.nan, neginf=np.nan)
+            df[err_name] = errs
         elif not do_rel_err and sumw2_label in column:
             err_name = column.replace(sumw2_label, err_label)
             df[err_name] = np.sqrt(df[column])

From dc4311211ec0b66151a8c002aa9b835cfd383a8a Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 00:56:25 +0100
Subject: [PATCH 08/28] Resolve warning from YAML about safe_load

---
 fast_plotter/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 9647ee4..5cc467f 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -73,7 +73,7 @@ def process_cfg(cfg_file, args):
     from argparse import Namespace
     from string import Template
     with open(cfg_file, "r") as infile:
-        cfg = yaml.load(infile)
+        cfg = yaml.safe_load(infile)
     # Only way to neatly allow cmd-line args to override config and handle
     # defaults seems to be:
     parser = arg_parser()

From a1a35d5e5f20a446b13cc9849dcbd0395a2e11fc Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 01:14:27 +0100
Subject: [PATCH 09/28] Don't use nan_to_num --> signature only valid in numpy
 1.17

---
 fast_plotter/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index 21186b1..223ba68 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -98,7 +98,7 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r
         if do_rel_err and column.endswith("sumw"):
             err_name = column.replace("sumw", err_label)
             errs = np.true_divide(df[column], root_n)
-            errs = np.nan_to_num(errs, copy=False, nan=np.nan, posinf=np.nan, neginf=np.nan)
+            errs[~np.isfinite(errs)] = np.nan
             df[err_name] = errs
         elif not do_rel_err and sumw2_label in column:
             err_name = column.replace(sumw2_label, err_label)

From c11814c20bdff12f3cca9787a5cffad25625d9b9 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 01:31:53 +0100
Subject: [PATCH 10/28] Add function to insert missing values

---
 fast_plotter/plotting.py |  8 +++++++-
 tests/test_plotting.py   | 12 ++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 5912c2b..ce3639f 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -228,7 +228,13 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0):
     """
     Check from a list of expected x values, if all occur in x.  If any are missing
     """
-    raise NotImplementedError()
+    insert = np.isin(expected_xs, x)
+    new_ys = []
+    for y in y_values:
+        new = np.full_like(expected_xs, fill_val)
+        new[insert] = y
+        new_ys.append(new)
+    return expected_xs[:], new_ys
 
 
 def pad_ends(x, y_values=[], fill_val=0):
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index e6b118a..ce51805 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -66,3 +66,15 @@ def test_pad_zero_oneY():
     pad_x, pad_y = plotting.standardize_values(x, [y])
     assert np.array_equal(pad_x, np.arange(0, 5))
     assert np.array_equal(pad_y, expected_y)
+
+
+def test_add_missing_vals():
+    x = np.arange(3)*2
+    expected = np.arange(7)
+    outx, _ = plotting.add_missing_vals(x, expected)
+    assert np.array_equal(outx, expected)
+
+    y = np.arange(3)[::-1] + 1
+    outx, outy = plotting.add_missing_vals(x, expected, y_values=[y])
+    assert np.array_equal(outx, expected)
+    assert np.array_equal(outy[0], [3, 0, 2, 0, 1, 0, 0])

From c6b9a14e66503dc40a3091d238183a031e99b429 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 02:02:05 +0100
Subject: [PATCH 11/28] Fix issue with error band on sims not completing

---
 fast_plotter/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index ce3639f..84952ab 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -173,7 +173,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
     elif kind == "fill-error-last":
         actually_plot(df, x_axis, y, yerr, "fill", label, ax,
                       dataset_col=dataset_col, colourmap=colourmap, dataset_order=dataset_order)
-        summed = df.unstack(dataset_col)
+        summed = df.unstack(dataset_col).fillna(method="ffill", axis="columns")
         last_dataset = summed.columns.get_level_values(1)[n_datasets - 1]
         summed = summed.xs(last_dataset, level=1, axis="columns")
         x = summed.index.values

From 35fa2366c76a8de6ff0a16ea943fedc119d32871 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 02:16:48 +0100
Subject: [PATCH 12/28] Fix partially filled steps

---
 fast_plotter/plotting.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 84952ab..fed3f3a 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -71,8 +71,9 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat
 
 class FillColl(object):
     def __init__(self, n_colors=10, ax=None, fill=True, line=True,
-                 colourmap="nipy_spectral", dataset_order=None, linewidth=0.5):
+                 colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None):
         self.calls = 0
+        self.expected_xs = expected_xs
 
         self.dataset_order = {}
         if dataset_order is not None:
@@ -132,7 +133,7 @@ def __call__(self, col, **kwargs):
                 label = col.name
                 width = 2
                 style = "--"
-            draw(ax, "step", x=x, ys=["y"], y=y,
+            draw(ax, "step", x=x, ys=["y"], y=y, expected_xs=self.expected_xs,
                  color=color, linewidth=width, where="mid", label=label, linestyle=style)
         self.calls += 1
 
@@ -155,21 +156,27 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
         df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr,
                                       color="k", label=label, ax=ax, s=13)
         return
+    expected_xs = df.index.unique(x_axis)
     if dataset_order is not None:
         input_datasets = df.index.unique(dataset_col)
         dataset_order = dataset_order + [d for d in input_datasets if d not in dataset_order]
     n_datasets = df.groupby(level=dataset_col).count()
     n_datasets = len(n_datasets[n_datasets != 0])
+
+    vals = df[y].unstack(dataset_col).fillna(method="ffill", axis="columns")
     if kind == "line":
-        filler = FillColl(n_datasets, ax=ax, fill=False, colourmap=colourmap, dataset_order=dataset_order)
-        df[y].unstack(dataset_col).iloc[:, ::-1].apply(filler, axis=0, step="mid")
+        filler = FillColl(n_datasets, ax=ax, fill=False, colourmap=colourmap,
+                          dataset_order=dataset_order, expected_xs=expected_xs)
+        vals.apply(filler, axis=0, step="mid")
         return
     elif kind == "bar":
-        filler = BarColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order)
-        df[y].unstack(dataset_col).iloc[:, ::-1].apply(filler, axis=0, step="mid")
+        filler = BarColl(n_datasets, ax=ax, colourmap=colourmap,
+                         dataset_order=dataset_order, expected_xs=expected_xs)
+        vals.apply(filler, axis=0, step="mid")
     elif kind == "fill":
-        filler = FillColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order, line=False)
-        df[y].unstack(dataset_col).iloc[:, ::-1].apply(filler, axis=0, step="mid")
+        filler = FillColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order,
+                          line=False, expected_xs=expected_xs)
+        vals.iloc[:, ::-1].apply(filler, axis=0, step="mid")
     elif kind == "fill-error-last":
         actually_plot(df, x_axis, y, yerr, "fill", label, ax,
                       dataset_col=dataset_col, colourmap=colourmap, dataset_order=dataset_order)
@@ -180,7 +187,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
         y_down = (summed[y] - summed[yerr]).values
         y_up = (summed[y] + summed[yerr]).values
         draw(ax, "fill_between", x, ys=["y1", "y2"], y2=y_down, y1=y_up,
-             color="gray", step="mid", alpha=0.7)
+             color="gray", step="mid", alpha=0.7, expected_xs=expected_xs)
     else:
         raise RuntimeError("Unknown value for 'kind', '{}'".format(kind))
 
@@ -400,8 +407,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]):
 
 def draw(ax, method, x, ys, **kwargs):
     fill_val = kwargs.pop("fill_val", 0)
+    expected_xs = kwargs.pop("expected_xs", None)
     if x.dtype.kind in 'biufc':
-        values = standardize_values(x, [kwargs[y] for y in ys], fill_val=fill_val)
+        values = standardize_values(x, [kwargs[y] for y in ys],
+                                    fill_val=fill_val, expected_xs=expected_xs)
         x = values[0]
         new_ys = values[1:]
         kwargs.update(dict(zip(ys, new_ys)))

From 6a80e6b6a8967b4b20e95c1b9bc64faf6f30770b Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 02:17:20 +0100
Subject: [PATCH 13/28] pep8

---
 tests/test_plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index ce51805..586723d 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -69,7 +69,7 @@ def test_pad_zero_oneY():
 
 
 def test_add_missing_vals():
-    x = np.arange(3)*2
+    x = np.arange(3) * 2
     expected = np.arange(7)
     outx, _ = plotting.add_missing_vals(x, expected)
     assert np.array_equal(outx, expected)

From 08604dc51ffdcdbb67284d2df2c34d86856dcdb3 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 8 Feb 2020 20:34:09 +0100
Subject: [PATCH 14/28] Add test to catch bad dtype error

---
 tests/test_plotting.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index 586723d..1d599e5 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -78,3 +78,14 @@ def test_add_missing_vals():
     outx, outy = plotting.add_missing_vals(x, expected, y_values=[y])
     assert np.array_equal(outx, expected)
     assert np.array_equal(outy[0], [3, 0, 2, 0, 1, 0, 0])
+    assert outy[0].dtype == y.dtype
+
+    x = np.logspace(0, 10, 11)
+    expected = np.logspace(0, 10, 21)
+    y = np.linspace(1, 100, 11)
+    outx, outy = plotting.add_missing_vals(x, expected, y_values=[y])
+    assert np.array_equal(outx, expected)
+    assert np.array_equal(outy[0][::2], y)
+    assert all(outy[0][1::2] == 0)
+    assert outy[0].dtype == y.dtype
+    assert outx.dtype == expected.dtype

From 55ef1bce558869e1510d23545db004fefad8dc99 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 9 Feb 2020 10:53:28 +0100
Subject: [PATCH 15/28] Improve test so it actually catches the issue

---
 tests/test_plotting.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index 1d599e5..23b0849 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -80,12 +80,14 @@ def test_add_missing_vals():
     assert np.array_equal(outy[0], [3, 0, 2, 0, 1, 0, 0])
     assert outy[0].dtype == y.dtype
 
-    x = np.logspace(0, 10, 11)
-    expected = np.logspace(0, 10, 21)
-    y = np.linspace(1, 100, 11)
-    outx, outy = plotting.add_missing_vals(x, expected, y_values=[y])
+    x = np.logspace(0, 10, 11, dtype=int)
+    expected = np.zeros(22, dtype=int)
+    expected[0::2] = x
+    expected[1::2] = x / 2
+    y = np.linspace(1, 3, 11)
+    outx, (outy,) = plotting.add_missing_vals(x, expected, y_values=[y], fill_val=0)
     assert np.array_equal(outx, expected)
-    assert np.array_equal(outy[0][::2], y)
-    assert all(outy[0][1::2] == 0)
-    assert outy[0].dtype == y.dtype
+    assert np.array_equal(outy[::2], y)
+    assert all(outy[1::2] == 0)
+    assert outy.dtype == y.dtype
     assert outx.dtype == expected.dtype

From 7b0a8b62de1137f4209626c01cc67232463b0ee5 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 9 Feb 2020 10:53:47 +0100
Subject: [PATCH 16/28] Fix bug with ints being created instead of matching the
 input dtype

---
 fast_plotter/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index fed3f3a..3f73a23 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -238,7 +238,7 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0):
     insert = np.isin(expected_xs, x)
     new_ys = []
     for y in y_values:
-        new = np.full_like(expected_xs, fill_val)
+        new = np.full_like(expected_xs, fill_val, dtype=y.dtype)
         new[insert] = y
         new_ys.append(new)
     return expected_xs[:], new_ys

From eed29ecefaa351c773adc0a48bb42ca11cdb6773 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 9 Feb 2020 13:13:02 +0100
Subject: [PATCH 17/28] Add code and functions to remove under/overflow bins

---
 fast_plotter/plotting.py | 4 +++-
 fast_plotter/utils.py    | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 3f73a23..07c77d3 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -261,7 +261,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                  plot_sims="stack", plot_data="sum", plot_signal=None,
                  kind_data="scatter", kind_sims="fill-error-last", kind_signal="line",
                  scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral",
-                 dataset_order=None, figsize=(5, 6), **kwargs):
+                 dataset_order=None, figsize=(5, 6), no_over_underflow=True, **kwargs):
     y = "sumw"
     yvar = "sumw2"
     yerr = "err"
@@ -271,6 +271,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
         yerr = prefix + ":" + yerr
 
     df = utils.convert_intervals(df, to="mid")
+    if no_over_underflow:
+        df = utils.drop_over_underflow(df)
     in_df_data, in_df_sims = utils.split_data_sims(
         df, data_labels=data, dataset_level=dataset_col)
     if scale_sims is not None:
diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index 223ba68..9d8103d 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -166,3 +166,10 @@ def rename_index(df, name_replacements):
         return df
     df.index.names = [name_replacements.get(n, n) for n in df.index.names]
     return df
+
+
+def drop_over_underflow(df):
+    index = df.index.to_frame()
+    index = index.select_dtypes(exclude=['object'])
+    good_rows = np.isfinite(index).all(axis=1)
+    return df.loc[good_rows]

From cf58a09e18df552d3f6dd56bed810d2eae243cab Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 9 Feb 2020 13:40:42 +0100
Subject: [PATCH 18/28] Add unit test for new drop_over_underflow method

---
 tests/test_utils.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 14721d2..08afe80 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -53,3 +53,36 @@ def test_read_binned_df():
     assert df.index.names[1] == "njet"
     assert set(df.index.unique(level='dataset')) == set(("data", "mc_1", "mc_2"))
     assert len(df) == 12
+
+
+def test_drop_over_underflow():
+    x1 = np.concatenate(([-np.inf], np.linspace(0, 100, 3), [np.inf]), axis=0)
+    x2 = ["one", "TWO", "3"]
+    x3 = [10, 11, 20]
+
+    def build_df(*indices):
+        index = pd.MultiIndex.from_product(indices)
+        df = pd.DataFrame({"A": np.arange(len(index))}, index=index)
+        return df
+
+    df = build_df(x1)
+    cleaned = utils.drop_over_underflow(df)
+    assert len(cleaned) == 3
+    assert np.array_equal(cleaned.A, np.arange(1, 4))
+
+    df = build_df(x2)
+    cleaned = utils.drop_over_underflow(df)
+    assert len(cleaned) == 3
+    assert np.array_equal(cleaned.A, np.arange(0, 3))
+
+    df = build_df(x3)
+    cleaned = utils.drop_over_underflow(df)
+    assert len(cleaned) == 3
+    assert np.array_equal(cleaned.A, np.arange(0, 3))
+
+    df = build_df(x2, x3, x1)
+    cleaned = utils.drop_over_underflow(df)
+    assert len(cleaned) == 27
+    expected = [(i + 1, i + 4) for i in range(0, 5 * 3 * 3, 5)]
+    expected = np.concatenate([np.arange(i, j) for i, j in expected], axis=0)
+    assert np.array_equal(cleaned.A, expected)

From 484106929c9a5cadac372ce7572c7b025d52c956 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 9 Feb 2020 14:00:15 +0100
Subject: [PATCH 19/28] Fix issue with pandas index being passed in instead of
 numpy arrays

---
 fast_plotter/plotting.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 07c77d3..39c165d 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -2,6 +2,7 @@
 from . import statistics as stats
 import traceback
 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.colors as mc
 import logging
@@ -241,7 +242,11 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0):
         new = np.full_like(expected_xs, fill_val, dtype=y.dtype)
         new[insert] = y
         new_ys.append(new)
-    return expected_xs[:], new_ys
+    if isinstance(expected_xs, (pd.Index, pd.MultiIndex)):
+        new_x = expected_xs.values
+    else:
+        new_x = expected_xs[:]
+    return new_x, new_ys
 
 
 def pad_ends(x, y_values=[], fill_val=0):

From 51baa596852e5120d26e8e119432417c7e01337e Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 12 Feb 2020 11:43:18 +0100
Subject: [PATCH 20/28] Add ability to control colours for lines directly

---
 fast_plotter/plotting.py | 87 +++++++++++++++++++++++-----------------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 39c165d..137694b 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -70,51 +70,61 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat
     return figures, ran_ok
 
 
+class ColorDict():
+    def __init__(self, order=None, named=None, n_colors=10, cmap="nipy_spectral", cmap_start=0.96, cmap_stop=0.2):
+        self.order = {}
+        if order is not None:
+            self.order = {n: i for i, n in enumerate(order)}
+            n_colors = max(n_colors, len(order))
+
+        if isinstance(cmap, str):
+            colmap_def = plt.get_cmap(cmap)
+            n_colors = max(colmap_def.N, n_colors) if colmap_def.N < 256 else n_colors
+        elif isinstance(cmap, dict):
+            colmap_def = plt.get_cmap(cmap.get("map"))
+            n_colors = cmap.get("n_colors", n_colors)
+            cmap_start = cmap.get("colour_start", cmap_start)
+            cmap_stop = cmap.get("colour_stop", cmap_stop)
+
+        self.defaults = [colmap_def(i) for i in np.linspace(cmap_start, cmap_stop, n_colors)]
+        self.named = named if named is not None else {}
+
+    def get_colour(self, index=None, name=None):
+        if index is None and name is None:
+            raise RuntimeError("'Index' and 'name' cannot both be None")
+
+        if name in self.named:
+            return self.named[name]
+
+        if name in self.order:
+            return self.defaults[self.order[name]]
+
+        if index is None:
+            raise RuntimeError("'index' was not provided and we got an unknown named object '%s'" % name)
+
+        return self.defaults[index]
+
+
 class FillColl(object):
-    def __init__(self, n_colors=10, ax=None, fill=True, line=True,
+    def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=None,
                  colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None):
         self.calls = 0
         self.expected_xs = expected_xs
-
-        self.dataset_order = {}
-        if dataset_order is not None:
-            self.dataset_order = {n: i for i, n in enumerate(dataset_order)}
-            n_colors = max(n_colors, len(dataset_order))
-
-        colour_start = 0.96
-        colour_stop = 0.2
-        # darken = None
-        if isinstance(colourmap, str):
-            colmap_def = plt.get_cmap(colourmap)
-            n_colors = max(colmap_def.N, n_colors) if colmap_def.N < 256 else n_colors
-        elif isinstance(colourmap, dict):
-            colmap_def = plt.get_cmap(colourmap.get("map"))
-            n_colors = colourmap.get("n_colors", n_colors)
-            colour_start = colourmap.get("colour_start", colour_start)
-            colour_stop = colourmap.get("colour_stop", colour_stop)
-        if not fill:
-            # colmap_def = plt.get_cmap("Pastel1")
-            # darken = 0.02
-            pass
-
-        self.colors = [colmap_def(i)
-                       for i in np.linspace(colour_start, colour_stop, n_colors)]
+        self.colors = ColorDict(n_colors=n_colors, order=dataset_order,
+                                named=dataset_colours, cmap=colourmap)
 
         self.ax = ax
         self.fill = fill
         self.line = line
         self.linewidth = linewidth
 
-    def pre_call(self, col):
+    def pre_call(self, column):
         ax = self.ax
         if not ax:
             ax = plt.gca()
-        index = self.calls
-        if self.dataset_order:
-            index = self.dataset_order.get(col.name, index)
-        color = self.colors[index]
-        x = col.index.values
-        y = col.values
+        color = self.colors.get_colour(index=self.calls, name=column.name)
+        x = column.index.values
+        y = column.values
         return ax, x, y, color
 
     def __call__(self, col, **kwargs):
@@ -152,7 +162,7 @@ def __call__(self, col, **kwargs):
 
 
 def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
-                  colourmap="nipy_spectral", dataset_order=None):
+                  dataset_colours=None, colourmap="nipy_spectral", dataset_order=None):
     if kind == "scatter":
         df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr,
                                       color="k", label=label, ax=ax, s=13)
@@ -167,6 +177,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
     vals = df[y].unstack(dataset_col).fillna(method="ffill", axis="columns")
     if kind == "line":
         filler = FillColl(n_datasets, ax=ax, fill=False, colourmap=colourmap,
+                          dataset_colours=dataset_colours,
                           dataset_order=dataset_order, expected_xs=expected_xs)
         vals.apply(filler, axis=0, step="mid")
         return
@@ -175,11 +186,13 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
                          dataset_order=dataset_order, expected_xs=expected_xs)
         vals.apply(filler, axis=0, step="mid")
     elif kind == "fill":
-        filler = FillColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order,
+        filler = FillColl(n_datasets, ax=ax, colourmap=colourmap,
+                          dataset_colours=dataset_colours,
+                          dataset_order=dataset_order,
                           line=False, expected_xs=expected_xs)
         vals.iloc[:, ::-1].apply(filler, axis=0, step="mid")
     elif kind == "fill-error-last":
-        actually_plot(df, x_axis, y, yerr, "fill", label, ax,
+        actually_plot(df, x_axis, y, yerr, "fill", label, ax, dataset_colours=dataset_colours,
                       dataset_col=dataset_col, colourmap=colourmap, dataset_order=dataset_order)
         summed = df.unstack(dataset_col).fillna(method="ffill", axis="columns")
         last_dataset = summed.columns.get_level_values(1)[n_datasets - 1]
@@ -266,7 +279,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                  plot_sims="stack", plot_data="sum", plot_signal=None,
                  kind_data="scatter", kind_sims="fill-error-last", kind_signal="line",
                  scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral",
-                 dataset_order=None, figsize=(5, 6), no_over_underflow=True, **kwargs):
+                 dataset_order=None, figsize=(5, 6), no_over_underflow=True,
+                 dataset_colours=None, **kwargs):
     y = "sumw"
     yvar = "sumw2"
     yerr = "err"
@@ -317,6 +331,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
         merged = _merge_datasets(df, combine, dataset_col, param_name=var_name)
         actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style,
                       label=label, ax=main_ax, dataset_col=dataset_col,
+                      dataset_colours=dataset_colours,
                       colourmap=colourmap, dataset_order=dataset_order)
     main_ax.set_xlabel(x_axis)
 

From 5231cbed952ed9c50f9dd2276b7eebb894b972bd Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 15 Feb 2020 00:43:22 +0100
Subject: [PATCH 21/28] Fix issue where expected_xs was also modified

---
 fast_plotter/plotting.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 137694b..efd4e3f 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -108,7 +108,7 @@ def get_colour(self, index=None, name=None):
 class FillColl(object):
     def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=None,
                  colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None):
-        self.calls = 0
+        self.calls = -1
         self.expected_xs = expected_xs
         self.colors = ColorDict(n_colors=n_colors, order=dataset_order,
                                 named=dataset_colours, cmap=colourmap)
@@ -131,7 +131,7 @@ def __call__(self, col, **kwargs):
         ax, x, y, color = self.pre_call(col)
         if self.fill:
             draw(ax, "fill_between", x=x, ys=["y1"],
-                 y1=y, label=col.name,
+                 y1=y, label=col.name, expected_xs=self.expected_xs,
                  linewidth=0, color=color, **kwargs)
         if self.line:
             if self.fill:
@@ -167,7 +167,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
         df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr,
                                       color="k", label=label, ax=ax, s=13)
         return
-    expected_xs = df.index.unique(x_axis)
+    expected_xs = df.index.unique(x_axis).values
     if dataset_order is not None:
         input_datasets = df.index.unique(dataset_col)
         dataset_order = dataset_order + [d for d in input_datasets if d not in dataset_order]
@@ -258,7 +258,7 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0):
     if isinstance(expected_xs, (pd.Index, pd.MultiIndex)):
         new_x = expected_xs.values
     else:
-        new_x = expected_xs[:]
+        new_x = expected_xs.copy()
     return new_x, new_ys
 
 

From 3e6d2321eec037b1c49bb1f43e6b3910e8aae4fd Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 15 Feb 2020 00:48:20 +0100
Subject: [PATCH 22/28] Rename option for hiding overflows

---
 fast_plotter/plotting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index efd4e3f..ddfa9ec 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -279,7 +279,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                  plot_sims="stack", plot_data="sum", plot_signal=None,
                  kind_data="scatter", kind_sims="fill-error-last", kind_signal="line",
                  scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral",
-                 dataset_order=None, figsize=(5, 6), no_over_underflow=True,
+                 dataset_order=None, figsize=(5, 6), show_over_underflow=False,
                  dataset_colours=None, **kwargs):
     y = "sumw"
     yvar = "sumw2"
@@ -290,7 +290,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
         yerr = prefix + ":" + yerr
 
     df = utils.convert_intervals(df, to="mid")
-    if no_over_underflow:
+    if not show_over_underflow:
         df = utils.drop_over_underflow(df)
     in_df_data, in_df_sims = utils.split_data_sims(
         df, data_labels=data, dataset_level=dataset_col)

From 73431868b0e2c73b434452ec0ebd0e034eb68fdb Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sat, 15 Feb 2020 00:58:01 +0100
Subject: [PATCH 23/28] Add option to control errors

---
 fast_plotter/plotting.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index ddfa9ec..d97a606 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -280,7 +280,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
                  kind_data="scatter", kind_sims="fill-error-last", kind_signal="line",
                  scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral",
                  dataset_order=None, figsize=(5, 6), show_over_underflow=False,
-                 dataset_colours=None, **kwargs):
+                 dataset_colours=None, err_from_sumw2=False, **kwargs):
     y = "sumw"
     yvar = "sumw2"
     yerr = "err"
@@ -328,7 +328,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     for df, combine, style, label, var_name in config:
         if df is None or len(df) == 0:
             continue
-        merged = _merge_datasets(df, combine, dataset_col, param_name=var_name)
+        merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2)
         actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style,
                       label=label, ax=main_ax, dataset_col=dataset_col,
                       dataset_colours=dataset_colours,
@@ -343,9 +343,9 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     if summary.startswith("ratio"):
         main_ax.set_xlabel("")
         summed_data = _merge_datasets(
-            in_df_data, "sum", dataset_col=dataset_col)
+            in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)
         summed_sims = _merge_datasets(
-            in_df_sims, "sum", dataset_col=dataset_col)
+            in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2)
         if summary == "ratio-error-both":
             error = "both"
         elif summary == "ratio-error-markers":
@@ -361,13 +361,13 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset",
     return main_ax, summary_ax
 
 
-def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets"):
+def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False):
     if style == "stack":
-        utils.calculate_error(df)
+        utils.calculate_error(df, do_rel_err=not err_from_sumw2)
         df = utils.stack_datasets(df, dataset_level=dataset_col)
     elif style == "sum":
         df = utils.sum_over_datasets(df, dataset_level=dataset_col)
-        utils.calculate_error(df)
+        utils.calculate_error(df, do_rel_err=not err_from_sumw2)
     elif style:
         msg = "'{}' must be either 'sum', 'stack' or None. Got {}"
         raise RuntimeError(msg.format(param_name, style))

From 4fbe32d537635a4f0f5d9f14513c7444b9e18a19 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 26 Feb 2020 09:50:03 +0100
Subject: [PATCH 24/28] Add fixes for pandas 1.0.0

---
 fast_plotter/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py
index 9d8103d..f8c4775 100644
--- a/fast_plotter/utils.py
+++ b/fast_plotter/utils.py
@@ -75,6 +75,8 @@ def split_df(df, first_values, level=0):
     if isinstance(first_values, six.string_types):
         regex = re.compile(first_values)
         first_values = [val for val in df.index.unique(level) if regex.match(val)]
+    if not first_values:
+        return None, df
     second = df.drop(first_values, level=level)
     second_values = second.index.unique(level=level)
     first = df.drop(second_values, level=level)
@@ -162,7 +164,7 @@ def order_datasets(df, dataset_order, dataset_level="dataset", values="sumw"):
 
 
 def rename_index(df, name_replacements):
-    if not isinstance(df.index, pd.core.index.MultiIndex):
+    if not isinstance(df.index, pd.MultiIndex):
         return df
     df.index.names = [name_replacements.get(n, n) for n in df.index.names]
     return df

From 81735d0ea568b4ae112020e37cfc2a45f0f59872 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 26 Feb 2020 09:59:40 +0100
Subject: [PATCH 25/28] Plot data point for under/overflow bins

---
 fast_plotter/plotting.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index d97a606..cbe2e4d 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -163,11 +163,11 @@ def __call__(self, col, **kwargs):
 
 def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
                   dataset_colours=None, colourmap="nipy_spectral", dataset_order=None):
+    expected_xs = df.index.unique(x_axis).values
     if kind == "scatter":
-        df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr,
-                                      color="k", label=label, ax=ax, s=13)
+        draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr],
+             color="k", ms=3.5, fmt="o", label=label, expected_xs=expected_xs)
         return
-    expected_xs = df.index.unique(x_axis).values
     if dataset_order is not None:
         input_datasets = df.index.unique(dataset_col)
         dataset_order = dataset_order + [d for d in input_datasets if d not in dataset_order]

From bbd8ad0874f6f3f1070c95950e8269d39ed75682 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 26 Feb 2020 10:20:04 +0100
Subject: [PATCH 26/28] Fix ratio plots to include under/overflow

---
 fast_plotter/plotting.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index cbe2e4d..697714e 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -166,7 +166,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
     expected_xs = df.index.unique(x_axis).values
     if kind == "scatter":
         draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr],
-             color="k", ms=3.5, fmt="o", label=label, expected_xs=expected_xs)
+             color="k", ms=3.5, fmt="o", label=label, expected_xs=expected_xs, add_ends=False)
         return
     if dataset_order is not None:
         input_datasets = df.index.unique(dataset_col)
@@ -206,7 +206,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset",
         raise RuntimeError("Unknown value for 'kind', '{}'".format(kind))
 
 
-def standardize_values(x, y_values=[], fill_val=0, expected_xs=None):
+def standardize_values(x, y_values=[], fill_val=0, expected_xs=None, add_ends=True):
     """
     Standardize a set of arrays so they're ready to be plotted directly for matplotlib
 
@@ -220,7 +220,8 @@ def standardize_values(x, y_values=[], fill_val=0, expected_xs=None):
     if x.dtype.kind in 'bifc':
         x = replace_infs(x)
 
-        x, y_values = pad_ends(x, y_values=y_values, fill_val=fill_val)
+        if add_ends:
+            x, y_values = pad_ends(x, y_values=y_values, fill_val=fill_val)
     return (x,) + tuple(y_values)
 
 
@@ -393,9 +394,8 @@ def plot_1d(df, kind="line", yscale="lin"):
 
 
 def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]):
-    # make sure both sides agree with the binning and drop all infinities
+    # make sure both sides agree with the binning
     merged = data.join(sims, how="left", lsuffix="data", rsuffix="sims")
-    merged.drop([np.inf, -np.inf], inplace=True, errors="ignore")
     data = merged.filter(like="data", axis="columns").fillna(0)
     data.columns = [col.replace("data", "") for col in data.columns]
     sims = merged.filter(like="sims", axis="columns")
@@ -407,17 +407,21 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]):
 
     if error == "markers":
         central, lower, upper = stats.try_root_ratio_plot(d, d_err, s, s_err)
+        x_axis, central, lower, upper = standardize_values(x_axis, y_values=(central, lower, upper), add_ends=False)
         mask = (central != 0) & (lower != 0)
         ax.errorbar(x=x_axis[mask], y=central[mask], yerr=(lower[mask], upper[mask]),
                     fmt="o", markersize=4, color="k")
 
     elif error == "both":
-        rel_d_err = (d_err / d)
+        ratio = d / s
+        rel_d_err = (d_err / s)
         rel_s_err = (s_err / s)
 
-        ax.errorbar(x=x_axis.values, y=d / s, yerr=rel_d_err, fmt="o", markersize=4, color="k")
-        draw(ax, "fill_between", x_axis.values, ys=["y1", "y2"],
-             y2=1 + rel_s_err.values, y1=1 - rel_s_err.values, fill_val=1,
+        vals = standardize_values(x_axis.values, y_values=[ratio, rel_s_err, rel_d_err], add_ends=False)
+        x_axis, ratio, rel_s_err, rel_d_err = vals
+        ax.errorbar(x=x_axis, y=ratio, yerr=rel_d_err, fmt="o", markersize=4, color="k")
+        draw(ax, "fill_between", x_axis, ys=["y1", "y2"],
+             y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1,
              color="gray", step="mid", alpha=0.7)
 
     ax.set_ylim(ylim)
@@ -430,9 +434,12 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]):
 def draw(ax, method, x, ys, **kwargs):
     fill_val = kwargs.pop("fill_val", 0)
     expected_xs = kwargs.pop("expected_xs", None)
+    add_ends = kwargs.pop("add_ends", True)
     if x.dtype.kind in 'biufc':
         values = standardize_values(x, [kwargs[y] for y in ys],
-                                    fill_val=fill_val, expected_xs=expected_xs)
+                                    fill_val=fill_val,
+                                    add_ends=add_ends,
+                                    expected_xs=expected_xs)
         x = values[0]
         new_ys = values[1:]
         kwargs.update(dict(zip(ys, new_ys)))

From 676ced917d50658cb612f4d7ccf1518dbe90dabc Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 26 Feb 2020 10:35:53 +0100
Subject: [PATCH 27/28] Update CHANGELOG

---
 CHANGELOG.md             | 14 ++++++++++++++
 fast_plotter/plotting.py |  1 +
 2 files changed, 15 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 205e811..29389a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 ### Changed
 
+## [0.4.0] - 2020-02-26
+- Many changes from PR #26 [@benkrikler](github.com/benkrikler)
+
+### Fixed
+- Bugs in the way overflow bins were handled and step-lines were drawn by padding. Impacted error bars as well as produced weird plotting artefacts.
+
+### Added 
+- Extend unit tests
+- Variable interpolation within the config files and using variables which can be passed from the command-line
+- Y-limits based on plot-margins: pass a float with a percent sign after to limit configs
+- Control over the display of under and overflow bins from the config file
+- Ability to give specific colours for individual bands in the plot
+- Option to control how errors are calculated: sqrt of sumw2 or sumw / sqrt(n)
+
 ## [0.3.0] - 2019-11-1
 - Many changes from PR #13 [@benkrikler](github.com/benkrikler)
 ### Added 
diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py
index 697714e..27c9810 100644
--- a/fast_plotter/plotting.py
+++ b/fast_plotter/plotting.py
@@ -419,6 +419,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]):
 
         vals = standardize_values(x_axis.values, y_values=[ratio, rel_s_err, rel_d_err], add_ends=False)
         x_axis, ratio, rel_s_err, rel_d_err = vals
+
         ax.errorbar(x=x_axis, y=ratio, yerr=rel_d_err, fmt="o", markersize=4, color="k")
         draw(ax, "fill_between", x_axis, ys=["y1", "y2"],
              y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1,

From d909f917ecca5a33879be2c76751577d704aeec6 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Wed, 26 Feb 2020 10:36:15 +0100
Subject: [PATCH 28/28] =?UTF-8?q?Bump=20version:=200.3.0=20=E2=86=92=200.4?=
 =?UTF-8?q?.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fast_plotter/version.py | 2 +-
 setup.cfg               | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/fast_plotter/version.py b/fast_plotter/version.py
index ca74866..6e06bfc 100644
--- a/fast_plotter/version.py
+++ b/fast_plotter/version.py
@@ -12,5 +12,5 @@ def split_version(version):
     return tuple(result)
 
 
-__version__ = '0.3.0'
+__version__ = '0.4.0'
 version_info = split_version(__version__) # noqa
diff --git a/setup.cfg b/setup.cfg
index d604e91..51367d7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.0
+current_version = 0.4.0
 commit = True
 tag = False
 
@@ -18,4 +18,3 @@ test = pytest
 
 [tool:pytest]
 collect_ignore = ['setup.py']
-