From e5b7b2bbfa8c3a91a68292ef1e4fd29ad36f0e0b Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 29 Jan 2020 18:06:55 +0100 Subject: [PATCH 01/28] Add option to pass variables on the command line for formatting --- fast_plotter/__main__.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index b7fadd8..19f4f53 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -2,6 +2,7 @@ Turn them tables into plots """ import os +import six import logging import matplotlib matplotlib.use('Agg') @@ -41,6 +42,10 @@ def arg_parser(args=None): help="Scale the MC yields by this lumi") parser.add_argument("-y", "--yscale", default="log", choices=["log", "linear"], help="Use this scale for the y-axis") + def split_equals(arg): + return arg.split("=") + parser.add_argument("-v", "--variable", dest="variables", action="append", default=[], type=split_equals, + help="Define a variable to expand in the config file") parser.add_argument("--halt-errors", dest="continue_errors", default=True, action="store_false", help="Stop at the first time an error occurs") return parser @@ -64,6 +69,8 @@ def main(args=None): def process_cfg(cfg_file, args): import yaml + from argparse import Namespace + from string import Template with open(cfg_file, "r") as infile: cfg = yaml.load(infile) # Only way to neatly allow cmd-line args to override config and handle @@ -71,6 +78,20 @@ def process_cfg(cfg_file, args): parser = arg_parser() parser.set_defaults(**cfg) args = parser.parse_args() + if args.variables: + + def recursive_replace(value, replacements): + if isinstance(value, (tuple, list)): + return type(value)([recursive_replace(v, replacements) for v in value]) + if isinstance(value, dict): + return {k: recursive_replace(v, replacements) for k, v in value.items()} + if isinstance(value, six.string_types): + return Template(value).safe_substitute(replacements) + #return value.format(**replacements) + return value + + replacements = dict(args.variables) + args = Namespace(**recursive_replace(vars(args), replacements)) return args From 32e44665d464e93704fa70e673a8dd914666cec8 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 5 Feb 2020 16:42:17 +0100 Subject: [PATCH 02/28] Extend the unit tests for pad_zero --- tests/test_plotting.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index a7f2745..4fdc173 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -2,7 +2,7 @@ from fast_plotter import plotting -def test_pad_zero(): +def test_pad_zero_noYs(): x = np.arange(5) padded = plotting.pad_zero(x) assert (padded == np.arange(-1, 6)).all() @@ -18,3 +18,38 @@ def test_pad_zero(): x = np.concatenate(([-np.inf], x), axis=0) padded = plotting.pad_zero(x) assert (padded == np.arange(1, 5)).all() + +def test_pad_zero_oneY(): + + x = np.arange(5) + y = np.arange(5, 0, -1) + pad_x, pad_y = plotting.pad_zero(x, [y]) + assert (pad_x == np.arange(-1, 6)).all() + expected_y = np.concatenate(([0], y, [0]), axis=0) + assert np.array_equal(pad_y, expected_y) + + x = np.concatenate(([-np.inf], x, [np.inf]), axis=0) + y = np.arange(len(x), 0, -1) + pad_x, pad_y = plotting.pad_zero(x, [y]) + assert (pad_x == np.arange(-1, 6)).all() + assert np.array_equal(pad_y, y) + + x = np.arange(2, 4) + y = np.arange(len(x), 0, -1) + pad_x, pad_y = plotting.pad_zero(x, y) + print(x, y) + print(pad_x, pad_y) + assert (pad_x == np.arange(1, 5)).all() + expected_y = np.concatenate(([0], y, [0]), axis=0) + assert np.array_equal(pad_y, expected_y) + + x = np.concatenate(([-np.inf], x), axis=0) + y = np.arange(len(x), 0, -1) + pad_x, pad_y = plotting.pad_zero(x, y) + print(x, y) + print(pad_x, pad_y) + assert (pad_x == np.arange(1, 5)).all() + expected_y = np.concatenate((y, [0]), axis=0) + assert np.array_equal(pad_y, expected_y) + + assert False From 6ac3a96673ea363628b382b11f97254f08ec2ce7 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 5 Feb 2020 16:57:18 +0100 Subject: [PATCH 03/28] Rename method --- fast_plotter/plotting.py | 8 ++++++-- tests/test_plotting.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 5824b49..57ce7d8 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -185,7 +185,11 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", raise RuntimeError("Unknown value for 'kind', '{}'".format(kind)) -def pad_zero(x, y_values=[], fill_val=0): +def normalize_values(x, y_values=[], fill_val=0): + # if X has +/- inf at an end, replace this X value with +/- the previous/next value of X +/- the mean width in X + # if any requested X values are missing: + # insert dummy values into X and Y values at the right location + # insert a dummy entry to X and Y for all arrays if x.dtype.kind not in 'bifc': return (x,) + tuple(y_values) do_pad_left = not np.isneginf(x[0]) @@ -361,7 +365,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]): def draw(ax, method, x, ys, **kwargs): fill_val = kwargs.pop("fill_val", 0) if x.dtype.kind in 'biufc': - values = pad_zero(x, [kwargs[y] for y in ys], fill_val=fill_val) + values = normalize_values(x, [kwargs[y] for y in ys], fill_val=fill_val) x = values[0] new_ys = values[1:] kwargs.update(dict(zip(ys, new_ys))) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 4fdc173..a75aebc 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -4,39 +4,39 @@ def test_pad_zero_noYs(): x = np.arange(5) - padded = plotting.pad_zero(x) + padded = plotting.normalize_values(x) assert (padded == np.arange(-1, 6)).all() x = np.concatenate(([-np.inf], x, [np.inf]), axis=0) - padded = plotting.pad_zero(x) + padded = plotting.normalize_values(x) assert (padded == np.arange(-1, 6)).all() x = np.arange(2, 4) - padded = plotting.pad_zero(x) + padded = plotting.normalize_values(x) assert (padded == np.arange(1, 5)).all() x = np.concatenate(([-np.inf], x), axis=0) - padded = plotting.pad_zero(x) + padded = plotting.normalize_values(x) assert (padded == np.arange(1, 5)).all() def test_pad_zero_oneY(): x = np.arange(5) y = np.arange(5, 0, -1) - pad_x, pad_y = plotting.pad_zero(x, [y]) + pad_x, pad_y = plotting.normalize_values(x, [y]) assert (pad_x == np.arange(-1, 6)).all() expected_y = np.concatenate(([0], y, [0]), axis=0) assert np.array_equal(pad_y, expected_y) x = np.concatenate(([-np.inf], x, [np.inf]), axis=0) y = np.arange(len(x), 0, -1) - pad_x, pad_y = plotting.pad_zero(x, [y]) + pad_x, pad_y = plotting.normalize_values(x, [y]) assert (pad_x == np.arange(-1, 6)).all() assert np.array_equal(pad_y, y) x = np.arange(2, 4) y = np.arange(len(x), 0, -1) - pad_x, pad_y = plotting.pad_zero(x, y) + pad_x, pad_y = plotting.normalize_values(x, y) print(x, y) print(pad_x, pad_y) assert (pad_x == np.arange(1, 5)).all() @@ -45,7 +45,7 @@ def test_pad_zero_oneY(): x = np.concatenate(([-np.inf], x), axis=0) y = np.arange(len(x), 0, -1) - pad_x, pad_y = plotting.pad_zero(x, y) + pad_x, pad_y = plotting.normalize_values(x, y) print(x, y) print(pad_x, pad_y) assert (pad_x == np.arange(1, 5)).all() From 9d63709fea8ba3218255fed5a8b570a1c7f05932 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Fri, 7 Feb 2020 15:30:28 +0100 Subject: [PATCH 04/28] Add option to control margins --- fast_plotter/__main__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 19f4f53..5065c4b 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -142,12 +142,12 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, main_ax.grid(True) main_ax.set_axisbelow(True) for axis, lims in limits.items(): - lims = map(float, lims) - if axis.lower() == "x": - main_ax.set_xlim(*lims) - if axis.lower() == "y": - main_ax.set_ylim(*lims) - + if isinstance(lims, (tuple, list)): + lims = map(float, lims) + if axis.lower() in "xy": + getattr(main_ax, "set_%slim" % axis)(*lims) + elif lims.endswith("%"): + main_ax.margins(**{axis: float(lims[:-1])}) def save_plots(infile, weight, plots, outdir, extensions): binning, name = decipher_filename(infile) From 9a087d0915f2dacf518840c458ac1e9253cfccac Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Fri, 7 Feb 2020 15:30:52 +0100 Subject: [PATCH 05/28] Fix up padding functions and tests --- fast_plotter/plotting.py | 70 ++++++++++++++++++++++++++++------------ tests/test_plotting.py | 63 ++++++++++++++++++++++-------------- 2 files changed, 87 insertions(+), 46 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 57ce7d8..082281e 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -185,33 +185,61 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", raise RuntimeError("Unknown value for 'kind', '{}'".format(kind)) -def normalize_values(x, y_values=[], fill_val=0): - # if X has +/- inf at an end, replace this X value with +/- the previous/next value of X +/- the mean width in X +def standardize_values(x, y_values=[], fill_val=0, expected_xs=None): + """ + Standardize a set of arrays so they're ready to be plotted directly for matplotlib + """ # if any requested X values are missing: # insert dummy values into X and Y values at the right location - # insert a dummy entry to X and Y for all arrays - if x.dtype.kind not in 'bifc': - return (x,) + tuple(y_values) - do_pad_left = not np.isneginf(x[0]) - do_pad_right = not np.isposinf(x[-1]) - width_slice = x[None if do_pad_left else 1:None if do_pad_right else -1] + if expected_xs is not None: + x, y_values = add_missing_vals(x, expected_xs, y_values=y_values, fill_val=fill_val) + + if x.dtype.kind in 'bifc': + x = replace_infs(x) + + x, y_values = pad_ends(x, y_values=y_values, fill_val=fill_val) + return (x,) + tuple(y_values) + + +def replace_infs(x): + """ + Replace (pos or neg) infinities at the ends of an array of floats + + Algorithm: X has +/- inf at an end, replace this X value with +/- the + previous/next value of X +/- the mean width in X + """ + x = x[:] # Make a copy of the array + is_left_inf = np.isneginf(x[0]) + is_right_inf = np.isposinf(x[-1]) + width_slice = x[1 if is_left_inf else None:-1 if is_right_inf else None] mean_width = width_slice[0] if len(width_slice) > 1: mean_width = np.diff(width_slice).mean() - x_left_padding = [x[0] - mean_width, x[0] - ] if do_pad_left else [x[1] - mean_width] - x_right_padding = [x[-1], x[-1] + mean_width] if do_pad_right else [x[-2] + mean_width] + if is_left_inf: + x[0] = x[1] - mean_width + if is_right_inf: + x[-1] = x[-2] + mean_width + return x + + +def add_missing_vals(x, expected_xs, y_values=[], fill_val=0): + """ + Check from a list of expected x values, if all occur in x. If any are missing + """ + raise NotImplementedError() + - x = np.concatenate((x_left_padding, x[1:-1], x_right_padding)) - new_values = [] - for y in y_values: - y_left_padding = [fill_val, y[1]] if do_pad_left else [fill_val] - y_right_padding = [y[-2], fill_val] if do_pad_right else [fill_val] - y[np.isnan(y)] = fill_val - y = np.concatenate((y_left_padding, y[1:-1], y_right_padding)) - new_values.append(y) +def pad_ends(x, y_values=[], fill_val=0): + """ + Insert a dummy entry to X and Y for all arrays + """ + mean_width = x[0] + if len(x) > 1: + mean_width = np.diff(x).mean() - return (x,) + tuple(new_values) + x = np.concatenate((x[0:1] - mean_width, x, x[-1:] + mean_width), axis=0) + new_values = [np.concatenate(([fill_val], y, [fill_val]), axis=0) for y in y_values] + return x, tuple(new_values) def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", @@ -365,7 +393,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]): def draw(ax, method, x, ys, **kwargs): fill_val = kwargs.pop("fill_val", 0) if x.dtype.kind in 'biufc': - values = normalize_values(x, [kwargs[y] for y in ys], fill_val=fill_val) + values = standardize_values(x, [kwargs[y] for y in ys], fill_val=fill_val) x = values[0] new_ys = values[1:] kwargs.update(dict(zip(ys, new_ys))) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index a75aebc..e6b118a 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -2,54 +2,67 @@ from fast_plotter import plotting +def test_replace_inf(): + x = np.arange(5) + replaced = plotting.replace_infs(x) + assert np.array_equal(replaced, x) + + x = np.concatenate(([-np.inf], x, [np.inf]), axis=0) + replaced = plotting.replace_infs(x) + assert np.array_equal(replaced, np.arange(-1, 6)) + + x = np.arange(2, 4) + replaced = plotting.replace_infs(x) + assert np.array_equal(replaced, np.arange(2, 4)) + + x = np.concatenate(([-np.inf], x), axis=0) + replaced = plotting.replace_infs(x) + assert np.array_equal(replaced, np.arange(1, 4)) + + def test_pad_zero_noYs(): x = np.arange(5) - padded = plotting.normalize_values(x) - assert (padded == np.arange(-1, 6)).all() + padded, = plotting.standardize_values(x) + assert np.array_equal(padded, np.arange(-1, 6)) x = np.concatenate(([-np.inf], x, [np.inf]), axis=0) - padded = plotting.normalize_values(x) - assert (padded == np.arange(-1, 6)).all() + padded, = plotting.standardize_values(x) + assert np.array_equal(padded, np.arange(-2, 7)) x = np.arange(2, 4) - padded = plotting.normalize_values(x) - assert (padded == np.arange(1, 5)).all() + padded, = plotting.standardize_values(x) + assert np.array_equal(padded, np.arange(1, 5)) x = np.concatenate(([-np.inf], x), axis=0) - padded = plotting.normalize_values(x) - assert (padded == np.arange(1, 5)).all() + padded, = plotting.standardize_values(x) + assert np.array_equal(padded, np.arange(0, 5, dtype=float)) -def test_pad_zero_oneY(): +def test_pad_zero_oneY(): x = np.arange(5) y = np.arange(5, 0, -1) - pad_x, pad_y = plotting.normalize_values(x, [y]) - assert (pad_x == np.arange(-1, 6)).all() expected_y = np.concatenate(([0], y, [0]), axis=0) + pad_x, pad_y = plotting.standardize_values(x, [y]) + assert np.array_equal(pad_x, np.arange(-1, 6)) assert np.array_equal(pad_y, expected_y) x = np.concatenate(([-np.inf], x, [np.inf]), axis=0) y = np.arange(len(x), 0, -1) - pad_x, pad_y = plotting.normalize_values(x, [y]) - assert (pad_x == np.arange(-1, 6)).all() - assert np.array_equal(pad_y, y) + expected_y = np.concatenate(([0], y, [0]), axis=0) + pad_x, pad_y = plotting.standardize_values(x, [y]) + assert np.array_equal(pad_x, np.arange(-2, 7)) + assert np.array_equal(pad_y, expected_y) x = np.arange(2, 4) y = np.arange(len(x), 0, -1) - pad_x, pad_y = plotting.normalize_values(x, y) - print(x, y) - print(pad_x, pad_y) - assert (pad_x == np.arange(1, 5)).all() expected_y = np.concatenate(([0], y, [0]), axis=0) + pad_x, pad_y = plotting.standardize_values(x, [y]) + assert np.array_equal(pad_x, np.arange(1, 5)) assert np.array_equal(pad_y, expected_y) x = np.concatenate(([-np.inf], x), axis=0) y = np.arange(len(x), 0, -1) - pad_x, pad_y = plotting.normalize_values(x, y) - print(x, y) - print(pad_x, pad_y) - assert (pad_x == np.arange(1, 5)).all() - expected_y = np.concatenate((y, [0]), axis=0) + expected_y = np.concatenate(([0], y, [0]), axis=0) + pad_x, pad_y = plotting.standardize_values(x, [y]) + assert np.array_equal(pad_x, np.arange(0, 5)) assert np.array_equal(pad_y, expected_y) - - assert False From 956beb993c95e39cde8dc6f9ae33c0b89132b648 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Fri, 7 Feb 2020 15:49:44 +0100 Subject: [PATCH 06/28] Fix pep8 issues --- fast_plotter/__main__.py | 3 ++- fast_plotter/plotting.py | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index a9f152e..e552ff7 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -42,6 +42,7 @@ def arg_parser(args=None): help="Scale the MC yields by this lumi") parser.add_argument("-y", "--yscale", default="log", choices=["log", "linear"], help="Use this scale for the y-axis") + def split_equals(arg): return arg.split("=") parser.add_argument("-v", "--variable", dest="variables", action="append", default=[], type=split_equals, @@ -87,7 +88,6 @@ def recursive_replace(value, replacements): return {k: recursive_replace(v, replacements) for k, v in value.items()} if isinstance(value, six.string_types): return Template(value).safe_substitute(replacements) - #return value.format(**replacements) return value replacements = dict(args.variables) @@ -149,6 +149,7 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) + def save_plots(infile, weight, plots, outdir, extensions): binning, name = decipher_filename(infile) kernel = "plot_" + ".".join(binning) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index b59aa47..5912c2b 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -188,9 +188,11 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", def standardize_values(x, y_values=[], fill_val=0, expected_xs=None): """ Standardize a set of arrays so they're ready to be plotted directly for matplotlib + + Algorithm: + if any requested X values are missing: + insert dummy values into X and Y values at the right location """ - # if any requested X values are missing: - # insert dummy values into X and Y values at the right location if expected_xs is not None: x, y_values = add_missing_vals(x, expected_xs, y_values=y_values, fill_val=fill_val) @@ -208,7 +210,7 @@ def replace_infs(x): Algorithm: X has +/- inf at an end, replace this X value with +/- the previous/next value of X +/- the mean width in X """ - x = x[:] # Make a copy of the array + x = x[:] # Make a copy of the array is_left_inf = np.isneginf(x[0]) is_right_inf = np.isposinf(x[-1]) width_slice = x[1 if is_left_inf else None:-1 if is_right_inf else None] @@ -224,7 +226,7 @@ def replace_infs(x): def add_missing_vals(x, expected_xs, y_values=[], fill_val=0): """ - Check from a list of expected x values, if all occur in x. If any are missing + Check from a list of expected x values, if all occur in x. If any are missing """ raise NotImplementedError() From b2852f3c2ebcf8efb6f129c0edb4d3594761f153 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 00:54:29 +0100 Subject: [PATCH 07/28] Resolve warnings about SettingWithCopyWarning from pandas --- fast_plotter/__main__.py | 4 +++- fast_plotter/utils.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index e552ff7..9647ee4 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -115,7 +115,9 @@ def process_one_file(infile, args): regex=args.data, level=args.dataset_col) for col in df_filtered.columns: - df_filtered[col][data_rows] = df["n"][data_rows] + if col == "n": + continue + df_filtered.loc[data_rows, col] = df["n"][data_rows] df_filtered.columns = [ n.replace(weight + ":", "") for n in df_filtered.columns] if hasattr(args, "value_replacements"): diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index db1e717..21186b1 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -97,8 +97,9 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r for column in df: if do_rel_err and column.endswith("sumw"): err_name = column.replace("sumw", err_label) - df[err_name] = np.true_divide(df[column], root_n) - df[err_name][~np.isfinite(df[err_name])] = np.nan + errs = np.true_divide(df[column], root_n) + errs = np.nan_to_num(errs, copy=False, nan=np.nan, posinf=np.nan, neginf=np.nan) + df[err_name] = errs elif not do_rel_err and sumw2_label in column: err_name = column.replace(sumw2_label, err_label) df[err_name] = np.sqrt(df[column]) From dc4311211ec0b66151a8c002aa9b835cfd383a8a Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 00:56:25 +0100 Subject: [PATCH 08/28] Resolve warning from YAML about safe_load --- fast_plotter/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 9647ee4..5cc467f 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -73,7 +73,7 @@ def process_cfg(cfg_file, args): from argparse import Namespace from string import Template with open(cfg_file, "r") as infile: - cfg = yaml.load(infile) + cfg = yaml.safe_load(infile) # Only way to neatly allow cmd-line args to override config and handle # defaults seems to be: parser = arg_parser() From a1a35d5e5f20a446b13cc9849dcbd0395a2e11fc Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 01:14:27 +0100 Subject: [PATCH 09/28] Don't use nan_to_num --> signature only valid in numpy 1.17 --- fast_plotter/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index 21186b1..223ba68 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -98,7 +98,7 @@ def calculate_error(df, sumw2_label="sumw2", err_label="err", inplace=True, do_r if do_rel_err and column.endswith("sumw"): err_name = column.replace("sumw", err_label) errs = np.true_divide(df[column], root_n) - errs = np.nan_to_num(errs, copy=False, nan=np.nan, posinf=np.nan, neginf=np.nan) + errs[~np.isfinite(errs)] = np.nan df[err_name] = errs elif not do_rel_err and sumw2_label in column: err_name = column.replace(sumw2_label, err_label) From c11814c20bdff12f3cca9787a5cffad25625d9b9 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 01:31:53 +0100 Subject: [PATCH 10/28] Add function to insert missing values --- fast_plotter/plotting.py | 8 +++++++- tests/test_plotting.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 5912c2b..ce3639f 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -228,7 +228,13 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0): """ Check from a list of expected x values, if all occur in x. If any are missing """ - raise NotImplementedError() + insert = np.isin(expected_xs, x) + new_ys = [] + for y in y_values: + new = np.full_like(expected_xs, fill_val) + new[insert] = y + new_ys.append(new) + return expected_xs[:], new_ys def pad_ends(x, y_values=[], fill_val=0): diff --git a/tests/test_plotting.py b/tests/test_plotting.py index e6b118a..ce51805 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -66,3 +66,15 @@ def test_pad_zero_oneY(): pad_x, pad_y = plotting.standardize_values(x, [y]) assert np.array_equal(pad_x, np.arange(0, 5)) assert np.array_equal(pad_y, expected_y) + + +def test_add_missing_vals(): + x = np.arange(3)*2 + expected = np.arange(7) + outx, _ = plotting.add_missing_vals(x, expected) + assert np.array_equal(outx, expected) + + y = np.arange(3)[::-1] + 1 + outx, outy = plotting.add_missing_vals(x, expected, y_values=[y]) + assert np.array_equal(outx, expected) + assert np.array_equal(outy[0], [3, 0, 2, 0, 1, 0, 0]) From c6b9a14e66503dc40a3091d238183a031e99b429 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 02:02:05 +0100 Subject: [PATCH 11/28] Fix issue with error band on sims not completing --- fast_plotter/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index ce3639f..84952ab 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -173,7 +173,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", elif kind == "fill-error-last": actually_plot(df, x_axis, y, yerr, "fill", label, ax, dataset_col=dataset_col, colourmap=colourmap, dataset_order=dataset_order) - summed = df.unstack(dataset_col) + summed = df.unstack(dataset_col).fillna(method="ffill", axis="columns") last_dataset = summed.columns.get_level_values(1)[n_datasets - 1] summed = summed.xs(last_dataset, level=1, axis="columns") x = summed.index.values From 35fa2366c76a8de6ff0a16ea943fedc119d32871 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 02:16:48 +0100 Subject: [PATCH 12/28] Fix partially filled steps --- fast_plotter/plotting.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 84952ab..fed3f3a 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -71,8 +71,9 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat class FillColl(object): def __init__(self, n_colors=10, ax=None, fill=True, line=True, - colourmap="nipy_spectral", dataset_order=None, linewidth=0.5): + colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None): self.calls = 0 + self.expected_xs = expected_xs self.dataset_order = {} if dataset_order is not None: @@ -132,7 +133,7 @@ def __call__(self, col, **kwargs): label = col.name width = 2 style = "--" - draw(ax, "step", x=x, ys=["y"], y=y, + draw(ax, "step", x=x, ys=["y"], y=y, expected_xs=self.expected_xs, color=color, linewidth=width, where="mid", label=label, linestyle=style) self.calls += 1 @@ -155,21 +156,27 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr, color="k", label=label, ax=ax, s=13) return + expected_xs = df.index.unique(x_axis) if dataset_order is not None: input_datasets = df.index.unique(dataset_col) dataset_order = dataset_order + [d for d in input_datasets if d not in dataset_order] n_datasets = df.groupby(level=dataset_col).count() n_datasets = len(n_datasets[n_datasets != 0]) + + vals = df[y].unstack(dataset_col).fillna(method="ffill", axis="columns") if kind == "line": - filler = FillColl(n_datasets, ax=ax, fill=False, colourmap=colourmap, dataset_order=dataset_order) - df[y].unstack(dataset_col).iloc[:, ::-1].apply(filler, axis=0, step="mid") + filler = FillColl(n_datasets, ax=ax, fill=False, colourmap=colourmap, + dataset_order=dataset_order, expected_xs=expected_xs) + vals.apply(filler, axis=0, step="mid") return elif kind == "bar": - filler = BarColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order) - df[y].unstack(dataset_col).iloc[:, ::-1].apply(filler, axis=0, step="mid") + filler = BarColl(n_datasets, ax=ax, colourmap=colourmap, + dataset_order=dataset_order, expected_xs=expected_xs) + vals.apply(filler, axis=0, step="mid") elif kind == "fill": - filler = FillColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order, line=False) - df[y].unstack(dataset_col).iloc[:, ::-1].apply(filler, axis=0, step="mid") + filler = FillColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order, + line=False, expected_xs=expected_xs) + vals.iloc[:, ::-1].apply(filler, axis=0, step="mid") elif kind == "fill-error-last": actually_plot(df, x_axis, y, yerr, "fill", label, ax, dataset_col=dataset_col, colourmap=colourmap, dataset_order=dataset_order) @@ -180,7 +187,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", y_down = (summed[y] - summed[yerr]).values y_up = (summed[y] + summed[yerr]).values draw(ax, "fill_between", x, ys=["y1", "y2"], y2=y_down, y1=y_up, - color="gray", step="mid", alpha=0.7) + color="gray", step="mid", alpha=0.7, expected_xs=expected_xs) else: raise RuntimeError("Unknown value for 'kind', '{}'".format(kind)) @@ -400,8 +407,10 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]): def draw(ax, method, x, ys, **kwargs): fill_val = kwargs.pop("fill_val", 0) + expected_xs = kwargs.pop("expected_xs", None) if x.dtype.kind in 'biufc': - values = standardize_values(x, [kwargs[y] for y in ys], fill_val=fill_val) + values = standardize_values(x, [kwargs[y] for y in ys], + fill_val=fill_val, expected_xs=expected_xs) x = values[0] new_ys = values[1:] kwargs.update(dict(zip(ys, new_ys))) From 6a80e6b6a8967b4b20e95c1b9bc64faf6f30770b Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 02:17:20 +0100 Subject: [PATCH 13/28] pep8 --- tests/test_plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index ce51805..586723d 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -69,7 +69,7 @@ def test_pad_zero_oneY(): def test_add_missing_vals(): - x = np.arange(3)*2 + x = np.arange(3) * 2 expected = np.arange(7) outx, _ = plotting.add_missing_vals(x, expected) assert np.array_equal(outx, expected) From 08604dc51ffdcdbb67284d2df2c34d86856dcdb3 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 8 Feb 2020 20:34:09 +0100 Subject: [PATCH 14/28] Add test to catch bad dtype error --- tests/test_plotting.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 586723d..1d599e5 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -78,3 +78,14 @@ def test_add_missing_vals(): outx, outy = plotting.add_missing_vals(x, expected, y_values=[y]) assert np.array_equal(outx, expected) assert np.array_equal(outy[0], [3, 0, 2, 0, 1, 0, 0]) + assert outy[0].dtype == y.dtype + + x = np.logspace(0, 10, 11) + expected = np.logspace(0, 10, 21) + y = np.linspace(1, 100, 11) + outx, outy = plotting.add_missing_vals(x, expected, y_values=[y]) + assert np.array_equal(outx, expected) + assert np.array_equal(outy[0][::2], y) + assert all(outy[0][1::2] == 0) + assert outy[0].dtype == y.dtype + assert outx.dtype == expected.dtype From 55ef1bce558869e1510d23545db004fefad8dc99 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sun, 9 Feb 2020 10:53:28 +0100 Subject: [PATCH 15/28] Improve test so it actually catches the issue --- tests/test_plotting.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 1d599e5..23b0849 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -80,12 +80,14 @@ def test_add_missing_vals(): assert np.array_equal(outy[0], [3, 0, 2, 0, 1, 0, 0]) assert outy[0].dtype == y.dtype - x = np.logspace(0, 10, 11) - expected = np.logspace(0, 10, 21) - y = np.linspace(1, 100, 11) - outx, outy = plotting.add_missing_vals(x, expected, y_values=[y]) + x = np.logspace(0, 10, 11, dtype=int) + expected = np.zeros(22, dtype=int) + expected[0::2] = x + expected[1::2] = x / 2 + y = np.linspace(1, 3, 11) + outx, (outy,) = plotting.add_missing_vals(x, expected, y_values=[y], fill_val=0) assert np.array_equal(outx, expected) - assert np.array_equal(outy[0][::2], y) - assert all(outy[0][1::2] == 0) - assert outy[0].dtype == y.dtype + assert np.array_equal(outy[::2], y) + assert all(outy[1::2] == 0) + assert outy.dtype == y.dtype assert outx.dtype == expected.dtype From 7b0a8b62de1137f4209626c01cc67232463b0ee5 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sun, 9 Feb 2020 10:53:47 +0100 Subject: [PATCH 16/28] Fix bug with ints being created instead of matching the input dtype --- fast_plotter/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index fed3f3a..3f73a23 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -238,7 +238,7 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0): insert = np.isin(expected_xs, x) new_ys = [] for y in y_values: - new = np.full_like(expected_xs, fill_val) + new = np.full_like(expected_xs, fill_val, dtype=y.dtype) new[insert] = y new_ys.append(new) return expected_xs[:], new_ys From eed29ecefaa351c773adc0a48bb42ca11cdb6773 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sun, 9 Feb 2020 13:13:02 +0100 Subject: [PATCH 17/28] Add code and functions to remove under/overflow bins --- fast_plotter/plotting.py | 4 +++- fast_plotter/utils.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 3f73a23..07c77d3 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -261,7 +261,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", plot_sims="stack", plot_data="sum", plot_signal=None, kind_data="scatter", kind_sims="fill-error-last", kind_signal="line", scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral", - dataset_order=None, figsize=(5, 6), **kwargs): + dataset_order=None, figsize=(5, 6), no_over_underflow=True, **kwargs): y = "sumw" yvar = "sumw2" yerr = "err" @@ -271,6 +271,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", yerr = prefix + ":" + yerr df = utils.convert_intervals(df, to="mid") + if no_over_underflow: + df = utils.drop_over_underflow(df) in_df_data, in_df_sims = utils.split_data_sims( df, data_labels=data, dataset_level=dataset_col) if scale_sims is not None: diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index 223ba68..9d8103d 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -166,3 +166,10 @@ def rename_index(df, name_replacements): return df df.index.names = [name_replacements.get(n, n) for n in df.index.names] return df + + +def drop_over_underflow(df): + index = df.index.to_frame() + index = index.select_dtypes(exclude=['object']) + good_rows = np.isfinite(index).all(axis=1) + return df.loc[good_rows] From cf58a09e18df552d3f6dd56bed810d2eae243cab Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sun, 9 Feb 2020 13:40:42 +0100 Subject: [PATCH 18/28] Add unit test for new drop_over_underflow method --- tests/test_utils.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index 14721d2..08afe80 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -53,3 +53,36 @@ def test_read_binned_df(): assert df.index.names[1] == "njet" assert set(df.index.unique(level='dataset')) == set(("data", "mc_1", "mc_2")) assert len(df) == 12 + + +def test_drop_over_underflow(): + x1 = np.concatenate(([-np.inf], np.linspace(0, 100, 3), [np.inf]), axis=0) + x2 = ["one", "TWO", "3"] + x3 = [10, 11, 20] + + def build_df(*indices): + index = pd.MultiIndex.from_product(indices) + df = pd.DataFrame({"A": np.arange(len(index))}, index=index) + return df + + df = build_df(x1) + cleaned = utils.drop_over_underflow(df) + assert len(cleaned) == 3 + assert np.array_equal(cleaned.A, np.arange(1, 4)) + + df = build_df(x2) + cleaned = utils.drop_over_underflow(df) + assert len(cleaned) == 3 + assert np.array_equal(cleaned.A, np.arange(0, 3)) + + df = build_df(x3) + cleaned = utils.drop_over_underflow(df) + assert len(cleaned) == 3 + assert np.array_equal(cleaned.A, np.arange(0, 3)) + + df = build_df(x2, x3, x1) + cleaned = utils.drop_over_underflow(df) + assert len(cleaned) == 27 + expected = [(i + 1, i + 4) for i in range(0, 5 * 3 * 3, 5)] + expected = np.concatenate([np.arange(i, j) for i, j in expected], axis=0) + assert np.array_equal(cleaned.A, expected) From 484106929c9a5cadac372ce7572c7b025d52c956 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sun, 9 Feb 2020 14:00:15 +0100 Subject: [PATCH 19/28] Fix issue with pandas index being passed in instead of numpy arrays --- fast_plotter/plotting.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 07c77d3..39c165d 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -2,6 +2,7 @@ from . import statistics as stats import traceback import numpy as np +import pandas as pd import matplotlib.pyplot as plt import matplotlib.colors as mc import logging @@ -241,7 +242,11 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0): new = np.full_like(expected_xs, fill_val, dtype=y.dtype) new[insert] = y new_ys.append(new) - return expected_xs[:], new_ys + if isinstance(expected_xs, (pd.Index, pd.MultiIndex)): + new_x = expected_xs.values + else: + new_x = expected_xs[:] + return new_x, new_ys def pad_ends(x, y_values=[], fill_val=0): From 51baa596852e5120d26e8e119432417c7e01337e Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 12 Feb 2020 11:43:18 +0100 Subject: [PATCH 20/28] Add ability to control colours for lines directly --- fast_plotter/plotting.py | 87 +++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 39c165d..137694b 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -70,51 +70,61 @@ def plot_all(df, project_1d=True, project_2d=True, data="data", signal=None, dat return figures, ran_ok +class ColorDict(): + def __init__(self, order=None, named=None, n_colors=10, cmap="nipy_spectral", cmap_start=0.96, cmap_stop=0.2): + self.order = {} + if order is not None: + self.order = {n: i for i, n in enumerate(order)} + n_colors = max(n_colors, len(order)) + + if isinstance(cmap, str): + colmap_def = plt.get_cmap(cmap) + n_colors = max(colmap_def.N, n_colors) if colmap_def.N < 256 else n_colors + elif isinstance(cmap, dict): + colmap_def = plt.get_cmap(cmap.get("map")) + n_colors = cmap.get("n_colors", n_colors) + cmap_start = cmap.get("colour_start", cmap_start) + cmap_stop = cmap.get("colour_stop", cmap_stop) + + self.defaults = [colmap_def(i) for i in np.linspace(cmap_start, cmap_stop, n_colors)] + self.named = named if named is not None else {} + + def get_colour(self, index=None, name=None): + if index is None and name is None: + raise RuntimeError("'Index' and 'name' cannot both be None") + + if name in self.named: + return self.named[name] + + if name in self.order: + return self.defaults[self.order[name]] + + if index is None: + raise RuntimeError("'index' was not provided and we got an unknown named object '%s'" % name) + + return self.defaults[index] + + class FillColl(object): - def __init__(self, n_colors=10, ax=None, fill=True, line=True, + def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=None, colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None): self.calls = 0 self.expected_xs = expected_xs - - self.dataset_order = {} - if dataset_order is not None: - self.dataset_order = {n: i for i, n in enumerate(dataset_order)} - n_colors = max(n_colors, len(dataset_order)) - - colour_start = 0.96 - colour_stop = 0.2 - # darken = None - if isinstance(colourmap, str): - colmap_def = plt.get_cmap(colourmap) - n_colors = max(colmap_def.N, n_colors) if colmap_def.N < 256 else n_colors - elif isinstance(colourmap, dict): - colmap_def = plt.get_cmap(colourmap.get("map")) - n_colors = colourmap.get("n_colors", n_colors) - colour_start = colourmap.get("colour_start", colour_start) - colour_stop = colourmap.get("colour_stop", colour_stop) - if not fill: - # colmap_def = plt.get_cmap("Pastel1") - # darken = 0.02 - pass - - self.colors = [colmap_def(i) - for i in np.linspace(colour_start, colour_stop, n_colors)] + self.colors = ColorDict(n_colors=n_colors, order=dataset_order, + named=dataset_colours, cmap=colourmap) self.ax = ax self.fill = fill self.line = line self.linewidth = linewidth - def pre_call(self, col): + def pre_call(self, column): ax = self.ax if not ax: ax = plt.gca() - index = self.calls - if self.dataset_order: - index = self.dataset_order.get(col.name, index) - color = self.colors[index] - x = col.index.values - y = col.values + color = self.colors.get_colour(index=self.calls, name=column.name) + x = column.index.values + y = column.values return ax, x, y, color def __call__(self, col, **kwargs): @@ -152,7 +162,7 @@ def __call__(self, col, **kwargs): def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", - colourmap="nipy_spectral", dataset_order=None): + dataset_colours=None, colourmap="nipy_spectral", dataset_order=None): if kind == "scatter": df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr, color="k", label=label, ax=ax, s=13) @@ -167,6 +177,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", vals = df[y].unstack(dataset_col).fillna(method="ffill", axis="columns") if kind == "line": filler = FillColl(n_datasets, ax=ax, fill=False, colourmap=colourmap, + dataset_colours=dataset_colours, dataset_order=dataset_order, expected_xs=expected_xs) vals.apply(filler, axis=0, step="mid") return @@ -175,11 +186,13 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", dataset_order=dataset_order, expected_xs=expected_xs) vals.apply(filler, axis=0, step="mid") elif kind == "fill": - filler = FillColl(n_datasets, ax=ax, colourmap=colourmap, dataset_order=dataset_order, + filler = FillColl(n_datasets, ax=ax, colourmap=colourmap, + dataset_colours=dataset_colours, + dataset_order=dataset_order, line=False, expected_xs=expected_xs) vals.iloc[:, ::-1].apply(filler, axis=0, step="mid") elif kind == "fill-error-last": - actually_plot(df, x_axis, y, yerr, "fill", label, ax, + actually_plot(df, x_axis, y, yerr, "fill", label, ax, dataset_colours=dataset_colours, dataset_col=dataset_col, colourmap=colourmap, dataset_order=dataset_order) summed = df.unstack(dataset_col).fillna(method="ffill", axis="columns") last_dataset = summed.columns.get_level_values(1)[n_datasets - 1] @@ -266,7 +279,8 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", plot_sims="stack", plot_data="sum", plot_signal=None, kind_data="scatter", kind_sims="fill-error-last", kind_signal="line", scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral", - dataset_order=None, figsize=(5, 6), no_over_underflow=True, **kwargs): + dataset_order=None, figsize=(5, 6), no_over_underflow=True, + dataset_colours=None, **kwargs): y = "sumw" yvar = "sumw2" yerr = "err" @@ -317,6 +331,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", merged = _merge_datasets(df, combine, dataset_col, param_name=var_name) actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style, label=label, ax=main_ax, dataset_col=dataset_col, + dataset_colours=dataset_colours, colourmap=colourmap, dataset_order=dataset_order) main_ax.set_xlabel(x_axis) From 5231cbed952ed9c50f9dd2276b7eebb894b972bd Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 15 Feb 2020 00:43:22 +0100 Subject: [PATCH 21/28] Fix issue where expected_xs was also modified --- fast_plotter/plotting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 137694b..efd4e3f 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -108,7 +108,7 @@ def get_colour(self, index=None, name=None): class FillColl(object): def __init__(self, n_colors=10, ax=None, fill=True, line=True, dataset_colours=None, colourmap="nipy_spectral", dataset_order=None, linewidth=0.5, expected_xs=None): - self.calls = 0 + self.calls = -1 self.expected_xs = expected_xs self.colors = ColorDict(n_colors=n_colors, order=dataset_order, named=dataset_colours, cmap=colourmap) @@ -131,7 +131,7 @@ def __call__(self, col, **kwargs): ax, x, y, color = self.pre_call(col) if self.fill: draw(ax, "fill_between", x=x, ys=["y1"], - y1=y, label=col.name, + y1=y, label=col.name, expected_xs=self.expected_xs, linewidth=0, color=color, **kwargs) if self.line: if self.fill: @@ -167,7 +167,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr, color="k", label=label, ax=ax, s=13) return - expected_xs = df.index.unique(x_axis) + expected_xs = df.index.unique(x_axis).values if dataset_order is not None: input_datasets = df.index.unique(dataset_col) dataset_order = dataset_order + [d for d in input_datasets if d not in dataset_order] @@ -258,7 +258,7 @@ def add_missing_vals(x, expected_xs, y_values=[], fill_val=0): if isinstance(expected_xs, (pd.Index, pd.MultiIndex)): new_x = expected_xs.values else: - new_x = expected_xs[:] + new_x = expected_xs.copy() return new_x, new_ys From 3e6d2321eec037b1c49bb1f43e6b3910e8aae4fd Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 15 Feb 2020 00:48:20 +0100 Subject: [PATCH 22/28] Rename option for hiding overflows --- fast_plotter/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index efd4e3f..ddfa9ec 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -279,7 +279,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", plot_sims="stack", plot_data="sum", plot_signal=None, kind_data="scatter", kind_sims="fill-error-last", kind_signal="line", scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral", - dataset_order=None, figsize=(5, 6), no_over_underflow=True, + dataset_order=None, figsize=(5, 6), show_over_underflow=False, dataset_colours=None, **kwargs): y = "sumw" yvar = "sumw2" @@ -290,7 +290,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", yerr = prefix + ":" + yerr df = utils.convert_intervals(df, to="mid") - if no_over_underflow: + if not show_over_underflow: df = utils.drop_over_underflow(df) in_df_data, in_df_sims = utils.split_data_sims( df, data_labels=data, dataset_level=dataset_col) From 73431868b0e2c73b434452ec0ebd0e034eb68fdb Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Sat, 15 Feb 2020 00:58:01 +0100 Subject: [PATCH 23/28] Add option to control errors --- fast_plotter/plotting.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index ddfa9ec..d97a606 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -280,7 +280,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", kind_data="scatter", kind_sims="fill-error-last", kind_signal="line", scale_sims=None, summary="ratio-error-both", colourmap="nipy_spectral", dataset_order=None, figsize=(5, 6), show_over_underflow=False, - dataset_colours=None, **kwargs): + dataset_colours=None, err_from_sumw2=False, **kwargs): y = "sumw" yvar = "sumw2" yerr = "err" @@ -328,7 +328,7 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", for df, combine, style, label, var_name in config: if df is None or len(df) == 0: continue - merged = _merge_datasets(df, combine, dataset_col, param_name=var_name) + merged = _merge_datasets(df, combine, dataset_col, param_name=var_name, err_from_sumw2=err_from_sumw2) actually_plot(merged, x_axis=x_axis, y=y, yerr=yerr, kind=style, label=label, ax=main_ax, dataset_col=dataset_col, dataset_colours=dataset_colours, @@ -343,9 +343,9 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", if summary.startswith("ratio"): main_ax.set_xlabel("") summed_data = _merge_datasets( - in_df_data, "sum", dataset_col=dataset_col) + in_df_data, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) summed_sims = _merge_datasets( - in_df_sims, "sum", dataset_col=dataset_col) + in_df_sims, "sum", dataset_col=dataset_col, err_from_sumw2=err_from_sumw2) if summary == "ratio-error-both": error = "both" elif summary == "ratio-error-markers": @@ -361,13 +361,13 @@ def plot_1d_many(df, prefix="", data="data", signal=None, dataset_col="dataset", return main_ax, summary_ax -def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets"): +def _merge_datasets(df, style, dataset_col, param_name="_merge_datasets", err_from_sumw2=False): if style == "stack": - utils.calculate_error(df) + utils.calculate_error(df, do_rel_err=not err_from_sumw2) df = utils.stack_datasets(df, dataset_level=dataset_col) elif style == "sum": df = utils.sum_over_datasets(df, dataset_level=dataset_col) - utils.calculate_error(df) + utils.calculate_error(df, do_rel_err=not err_from_sumw2) elif style: msg = "'{}' must be either 'sum', 'stack' or None. Got {}" raise RuntimeError(msg.format(param_name, style)) From 4fbe32d537635a4f0f5d9f14513c7444b9e18a19 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 26 Feb 2020 09:50:03 +0100 Subject: [PATCH 24/28] Add fixes for pandas 1.0.0 --- fast_plotter/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fast_plotter/utils.py b/fast_plotter/utils.py index 9d8103d..f8c4775 100644 --- a/fast_plotter/utils.py +++ b/fast_plotter/utils.py @@ -75,6 +75,8 @@ def split_df(df, first_values, level=0): if isinstance(first_values, six.string_types): regex = re.compile(first_values) first_values = [val for val in df.index.unique(level) if regex.match(val)] + if not first_values: + return None, df second = df.drop(first_values, level=level) second_values = second.index.unique(level=level) first = df.drop(second_values, level=level) @@ -162,7 +164,7 @@ def order_datasets(df, dataset_order, dataset_level="dataset", values="sumw"): def rename_index(df, name_replacements): - if not isinstance(df.index, pd.core.index.MultiIndex): + if not isinstance(df.index, pd.MultiIndex): return df df.index.names = [name_replacements.get(n, n) for n in df.index.names] return df From 81735d0ea568b4ae112020e37cfc2a45f0f59872 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 26 Feb 2020 09:59:40 +0100 Subject: [PATCH 25/28] Plot data point for under/overflow bins --- fast_plotter/plotting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index d97a606..cbe2e4d 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -163,11 +163,11 @@ def __call__(self, col, **kwargs): def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", dataset_colours=None, colourmap="nipy_spectral", dataset_order=None): + expected_xs = df.index.unique(x_axis).values if kind == "scatter": - df.reset_index().plot.scatter(x=x_axis, y=y, yerr=yerr, - color="k", label=label, ax=ax, s=13) + draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr], + color="k", ms=3.5, fmt="o", label=label, expected_xs=expected_xs) return - expected_xs = df.index.unique(x_axis).values if dataset_order is not None: input_datasets = df.index.unique(dataset_col) dataset_order = dataset_order + [d for d in input_datasets if d not in dataset_order] From bbd8ad0874f6f3f1070c95950e8269d39ed75682 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 26 Feb 2020 10:20:04 +0100 Subject: [PATCH 26/28] Fix ratio plots to include under/overflow --- fast_plotter/plotting.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index cbe2e4d..697714e 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -166,7 +166,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", expected_xs = df.index.unique(x_axis).values if kind == "scatter": draw(ax, "errorbar", x=df.reset_index()[x_axis], ys=["y", "yerr"], y=df[y], yerr=df[yerr], - color="k", ms=3.5, fmt="o", label=label, expected_xs=expected_xs) + color="k", ms=3.5, fmt="o", label=label, expected_xs=expected_xs, add_ends=False) return if dataset_order is not None: input_datasets = df.index.unique(dataset_col) @@ -206,7 +206,7 @@ def actually_plot(df, x_axis, y, yerr, kind, label, ax, dataset_col="dataset", raise RuntimeError("Unknown value for 'kind', '{}'".format(kind)) -def standardize_values(x, y_values=[], fill_val=0, expected_xs=None): +def standardize_values(x, y_values=[], fill_val=0, expected_xs=None, add_ends=True): """ Standardize a set of arrays so they're ready to be plotted directly for matplotlib @@ -220,7 +220,8 @@ def standardize_values(x, y_values=[], fill_val=0, expected_xs=None): if x.dtype.kind in 'bifc': x = replace_infs(x) - x, y_values = pad_ends(x, y_values=y_values, fill_val=fill_val) + if add_ends: + x, y_values = pad_ends(x, y_values=y_values, fill_val=fill_val) return (x,) + tuple(y_values) @@ -393,9 +394,8 @@ def plot_1d(df, kind="line", yscale="lin"): def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]): - # make sure both sides agree with the binning and drop all infinities + # make sure both sides agree with the binning merged = data.join(sims, how="left", lsuffix="data", rsuffix="sims") - merged.drop([np.inf, -np.inf], inplace=True, errors="ignore") data = merged.filter(like="data", axis="columns").fillna(0) data.columns = [col.replace("data", "") for col in data.columns] sims = merged.filter(like="sims", axis="columns") @@ -407,17 +407,21 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]): if error == "markers": central, lower, upper = stats.try_root_ratio_plot(d, d_err, s, s_err) + x_axis, central, lower, upper = standardize_values(x_axis, y_values=(central, lower, upper), add_ends=False) mask = (central != 0) & (lower != 0) ax.errorbar(x=x_axis[mask], y=central[mask], yerr=(lower[mask], upper[mask]), fmt="o", markersize=4, color="k") elif error == "both": - rel_d_err = (d_err / d) + ratio = d / s + rel_d_err = (d_err / s) rel_s_err = (s_err / s) - ax.errorbar(x=x_axis.values, y=d / s, yerr=rel_d_err, fmt="o", markersize=4, color="k") - draw(ax, "fill_between", x_axis.values, ys=["y1", "y2"], - y2=1 + rel_s_err.values, y1=1 - rel_s_err.values, fill_val=1, + vals = standardize_values(x_axis.values, y_values=[ratio, rel_s_err, rel_d_err], add_ends=False) + x_axis, ratio, rel_s_err, rel_d_err = vals + ax.errorbar(x=x_axis, y=ratio, yerr=rel_d_err, fmt="o", markersize=4, color="k") + draw(ax, "fill_between", x_axis, ys=["y1", "y2"], + y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, color="gray", step="mid", alpha=0.7) ax.set_ylim(ylim) @@ -430,9 +434,12 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]): def draw(ax, method, x, ys, **kwargs): fill_val = kwargs.pop("fill_val", 0) expected_xs = kwargs.pop("expected_xs", None) + add_ends = kwargs.pop("add_ends", True) if x.dtype.kind in 'biufc': values = standardize_values(x, [kwargs[y] for y in ys], - fill_val=fill_val, expected_xs=expected_xs) + fill_val=fill_val, + add_ends=add_ends, + expected_xs=expected_xs) x = values[0] new_ys = values[1:] kwargs.update(dict(zip(ys, new_ys))) From 676ced917d50658cb612f4d7ccf1518dbe90dabc Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 26 Feb 2020 10:35:53 +0100 Subject: [PATCH 27/28] Update CHANGELOG --- CHANGELOG.md | 14 ++++++++++++++ fast_plotter/plotting.py | 1 + 2 files changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 205e811..29389a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changed +## [0.4.0] - 2020-02-26 +- Many changes from PR #26 [@benkrikler](github.com/benkrikler) + +### Fixed +- Bugs in the way overflow bins were handled and step-lines were drawn by padding. Impacted error bars as well as produced weird plotting artefacts. + +### Added +- Extend unit tests +- Variable interpolation within the config files and using variables which can be passed from the command-line +- Y-limits based on plot-margins: pass a float with a percent sign after to limit configs +- Control over the display of under and overflow bins from the config file +- Ability to give specific colours for individual bands in the plot +- Option to control how errors are calculated: sqrt of sumw2 or sumw / sqrt(n) + ## [0.3.0] - 2019-11-1 - Many changes from PR #13 [@benkrikler](github.com/benkrikler) ### Added diff --git a/fast_plotter/plotting.py b/fast_plotter/plotting.py index 697714e..27c9810 100644 --- a/fast_plotter/plotting.py +++ b/fast_plotter/plotting.py @@ -419,6 +419,7 @@ def plot_ratio(data, sims, x, y, yerr, ax, error="both", ylim=[0., 2]): vals = standardize_values(x_axis.values, y_values=[ratio, rel_s_err, rel_d_err], add_ends=False) x_axis, ratio, rel_s_err, rel_d_err = vals + ax.errorbar(x=x_axis, y=ratio, yerr=rel_d_err, fmt="o", markersize=4, color="k") draw(ax, "fill_between", x_axis, ys=["y1", "y2"], y2=1 + rel_s_err, y1=1 - rel_s_err, fill_val=1, From d909f917ecca5a33879be2c76751577d704aeec6 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Wed, 26 Feb 2020 10:36:15 +0100 Subject: [PATCH 28/28] =?UTF-8?q?Bump=20version:=200.3.0=20=E2=86=92=200.4?= =?UTF-8?q?.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fast_plotter/version.py | 2 +- setup.cfg | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fast_plotter/version.py b/fast_plotter/version.py index ca74866..6e06bfc 100644 --- a/fast_plotter/version.py +++ b/fast_plotter/version.py @@ -12,5 +12,5 @@ def split_version(version): return tuple(result) -__version__ = '0.3.0' +__version__ = '0.4.0' version_info = split_version(__version__) # noqa diff --git a/setup.cfg b/setup.cfg index d604e91..51367d7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.0 +current_version = 0.4.0 commit = True tag = False @@ -18,4 +18,3 @@ test = pytest [tool:pytest] collect_ignore = ['setup.py'] -