diff --git a/setup.py b/setup.py index 1de27c0d..ec5bc6d7 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ "uproot", "uproot4", "boost_histogram", + "hist", "scikit-hep-testdata", ], "dev": ["flake8", "jupyter", "bumpversion", "twine", "black", "pre-commit"], diff --git a/src/mplhep/plot.py b/src/mplhep/plot.py index 9a74ef0a..56b82c74 100644 --- a/src/mplhep/plot.py +++ b/src/mplhep/plot.py @@ -1,6 +1,7 @@ from __future__ import annotations import collections.abc +import inspect from collections import OrderedDict, namedtuple from typing import TYPE_CHECKING, Any, Union @@ -55,7 +56,7 @@ def soft_update_kwargs(kwargs, mods, rc=True): # Histogram plotter def histplot( H, # Histogram object, tuple or array - bins=None, # Bins to be supplied when h is a value array or iterable of arrays + bins=None, # Bins to be supplied when h is a value array or iterable of array *, yerr: ArrayLike | bool | None = None, w2=None, @@ -70,6 +71,7 @@ def histplot( edges=True, binticks=False, ax=None, + flow="hint", **kwargs, ): """ @@ -130,6 +132,8 @@ def histplot( Attempts to draw x-axis ticks coinciding with bin boundaries if feasible. ax : matplotlib.axes.Axes, optional Axes object (if None, last one is fetched or one is created) + flow : str, optional { "show", "sum", "hint", None} + Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin. **kwargs : Keyword arguments passed to underlying matplotlib functions - {'step', 'fill_between', 'errorbar'}. @@ -138,6 +142,7 @@ def histplot( List[Hist1DArtists] """ + # ax check if ax is None: ax = plt.gca() @@ -167,11 +172,104 @@ def histplot( else get_histogram_axes_title(hists[0].axes[0]) ) - # Cast to plottables - plottables = [ - Plottable(h.values(), edges=final_bins, variances=h.variances()) for h in hists - ] + # Show under/overflow bins + # check underflow/overflow bin exist + + underflow, overflow = 0.0, 0.0 + for h in hists: + if ( + hasattr(h, "values") + and "flow" not in inspect.getfullargspec(h.values).args + and flow is not None + ): + continue + elif flow is None: + continue + elif ( + hasattr(h, "axes") + and hasattr(h.axes[0], "traits") + and hasattr(h.axes[0].traits, "underflow") + ): + if h.axes[0].traits.underflow: + underflow = underflow + h.values(flow=True)[0] + if h.axes[0].traits.overflow: + overflow = overflow + h.values(flow=True)[-1] + else: + underflow = underflow + h.values(flow=True)[0] + overflow = overflow + h.values(flow=True)[-1] + + # "show": Add additional bin with 5 times bin width + plottables = [] + flow_bins = final_bins + for i, h in enumerate(hists): + value, variance = h.values(), h.variances() + if ( + hasattr(h, "values") + and "flow" not in inspect.getfullargspec(h.values).args + and flow is not None + ): + if flow == "sum" or flow == "show": + print(f"Warning: {type(h)} is not allowed to get flow bins") + flow = None + plottables.append(Plottable(value, edges=final_bins, variances=variance)) + # check the original hist as flow bins + elif ( + hasattr(h, "axes") + and hasattr(h.axes[0], "traits") + and hasattr(h.axes[0].traits, "underflow") + and not h.axes[0].traits.underflow + and not h.axes[0].traits.overflow + ): + print(f"Warning: you don't have flow bins stored in {h}") + flow = None + plottables.append(Plottable(value, edges=final_bins, variances=variance)) + elif flow == "hint": + plottables.append(Plottable(value, edges=final_bins, variances=variance)) + elif flow == "show": + if underflow > 0: + if i == 0: + flow_bins = np.insert( + final_bins, + 0, + [ + final_bins[0] - (final_bins[-1] - final_bins[0]) * 0.08, + final_bins[0] - (final_bins[-1] - final_bins[0]) * 0.03, + ], + ) + value, variance = np.insert(value, 0, np.nan), np.insert( + variance, 0, np.nan + ) + value, variance = np.insert( + value, 0, h.values(flow=True)[0] + ), np.insert(value, 0, h.variances(flow=True)[0]) + if overflow > 0: + if i == 0: + flow_bins = np.append( + flow_bins, + [ + final_bins[-1] + (final_bins[-1] - final_bins[0]) * 0.03, + final_bins[-1] + (final_bins[-1] - final_bins[0]) * 0.08, + ], + ) + value, variance = np.append(value, np.nan), np.append(variance, np.nan) + value, variance = np.append(value, h.values(flow=True)[-1]), np.append( + variance, h.variances(flow=True)[-1] + ) + plottables.append(Plottable(value, edges=flow_bins, variances=variance)) + elif flow == "sum": + value, variance = h.values().copy(), h.variances().copy() + value[0], value[-1] = ( + value[0] + h.values(flow=True)[0], + value[-1] + h.values(flow=True)[-1], + ) + variance[0], variance[-1] = ( + variance[0] + h.variances(flow=True)[0], + variance[-1] + h.variances(flow=True)[-1], + ) + plottables.append(Plottable(value, edges=final_bins, variances=variance)) + else: + plottables.append(Plottable(value, edges=final_bins, variances=variance)) if w2 is not None: for _w2, _plottable in zip( w2.reshape(len(plottables), len(final_bins) - 1), plottables @@ -272,7 +370,7 @@ def iterable_not_string(arg): elif isinstance(sort, list) or isinstance(sort, np.ndarray): if len(sort) != len(plottables): raise ValueError( - f"Sort indexing arrays is of the wrong size - {len(sort)}, {len(plottables)} expected." + f"Sort indexing array is of the wrong size - {len(sort)}, {len(plottables)} expected." ) order = np.asarray(sort) else: @@ -397,12 +495,70 @@ def iterable_not_string(arg): if binticks: _slice = int(round(float(len(final_bins)) / len(ax.get_xticks()))) + 1 ax.set_xticks(final_bins[::_slice]) + elif flow == "show": + if binticks: + _slice = int(round(float(len(final_bins)) / len(ax.get_xticks()))) + 1 + ax.set_xticks(final_bins[::_slice]) else: ax.set_xticks(_bin_centers) ax.set_xticklabels(xtick_labels) if x_axes_label: ax.set_xlabel(x_axes_label) + if flow == "hint" or flow == "show": + d = 0.9 # proportion of vertical to horizontal extent of the slanted line + trans = mpl.transforms.blended_transform_factory(ax.transData, ax.transAxes) + ax_h = ax.bbox.height + kwargs = dict( + marker=[(-0.5, -d), (0.5, d)], + markersize=ax_h * 0.05, + linestyle="none", + color="k", + mec="k", + mew=1, + clip_on=False, + transform=trans, + ) + xticks = ax.get_xticks().tolist() + if underflow > 0.0: + if flow == "hint": + ax.plot( + [ + final_bins[0] - (final_bins[-3] - final_bins[2]) * 0.03, + final_bins[0], + ], + [0, 0], + **kwargs, + ) + if flow == "show": + ax.plot( + [flow_bins[1], flow_bins[2]], + [0, 0], + **kwargs, + ) + xticks[0] = "" + xticks[1] = f"<{flow_bins[2]}" + + ax.set_xticklabels(xticks) + if overflow > 0.0: + if flow == "hint": + ax.plot( + [ + final_bins[-1], + final_bins[-1] + (final_bins[-3] - final_bins[2]) * 0.03, + ], + [0, 0], + **kwargs, + ) + if flow == "show": + ax.plot( + [flow_bins[-3], flow_bins[-2]], + [0, 0], + **kwargs, + ) + xticks[-1] = "" + xticks[-2] = f">{flow_bins[-3]}" + ax.set_xticklabels(xticks) return return_artists @@ -420,6 +576,7 @@ def hist2dplot( cmin=None, cmax=None, ax=None, + flow="hint", **kwargs, ): """ @@ -460,6 +617,8 @@ def hist2dplot( Colorbar maximum. ax : matplotlib.axes.Axes, optional Axes object (if None, last one is fetched or one is created) + flow : str, optional {"show", "sum","hint", None} + Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin. "hint" would highlight the bins with under/overflow contents **kwargs : Keyword arguments passed to underlying matplotlib function - pcolormesh. @@ -476,22 +635,104 @@ def hist2dplot( if not isinstance(ax, plt.Axes): raise ValueError("ax must be a matplotlib Axes object") - hist = hist_object_handler(H, xbins, ybins) + h = hist_object_handler(H, xbins, ybins) # TODO: use Histogram everywhere - H = hist.values() - xbins, xtick_labels = get_plottable_protocol_bins(hist.axes[0]) - ybins, ytick_labels = get_plottable_protocol_bins(hist.axes[1]) + + H = h.values() + xbins, xtick_labels = get_plottable_protocol_bins(h.axes[0]) + ybins, ytick_labels = get_plottable_protocol_bins(h.axes[1]) + # Show under/overflow bins + # "show": Add additional bin with 2 times bin width + if ( + hasattr(h, "values") + and "flow" not in inspect.getfullargspec(h.values).args + and flow is not None + ): + print( + f"Warning: {type(h)} is not allowed to get flow bins, flow bin option set to None" + ) + flow = None + elif ( + hasattr(h, "axes") + and hasattr(h.axes[0], "traits") + and hasattr(h.axes[0].traits, "underflow") + and not h.axes[0].traits.underflow + and not h.axes[0].traits.overflow + ): + flow = None + print(f"Warning: you don't have flow bins stored in {h}") + elif flow == "show": + H = h.values(flow=True) + if any(h.values(flow=True)[0] > 0): + xbins = np.array( + [ + xbins[0] - (xbins[-1] - xbins[0]) * 0.08, + xbins[0] - (xbins[-1] - xbins[0]) * 0.03, + *xbins, + ] + ) + if any(h.values(flow=True)[-1] > 0): + xbins = np.array( + [ + *xbins, + xbins[-1] + (xbins[-1] - xbins[0]) * 0.03, + xbins[-1] + (xbins[-1] - xbins[0]) * 0.08, + ] + ) + if any(h.values(flow=True)[:, 0] > 0): + ybins = np.array( + [ + ybins[0] - (ybins[-1] - ybins[0]) * 0.08, + ybins[0] - (ybins[-1] - ybins[0]) * 0.03, + *ybins, + ] + ) + if any(h.values(flow=True)[:, -1] > 0): + ybins = np.array( + [ + *ybins, + ybins[-1] + (ybins[-1] - ybins[0]) * 0.03, + ybins[-1] + (ybins[-1] - ybins[0]) * 0.08, + ] + ) + + if any(h.values(flow=True)[0] > 0.0): + H = np.insert(H, (1), np.nan, axis=-1) + if any(h.values(flow=True)[-1] > 0.0): + H = np.insert(H, (-1), np.nan, axis=-1) + if any(h.values(flow=True)[:, 0] > 0): + H = np.insert(H, (1), np.full(np.shape(H)[1], np.nan), axis=0) + if any(h.values(flow=True)[:, -1] > 0): + H = np.insert(H, (-1), np.full(np.shape(H)[1], np.nan), axis=0) + elif flow == "sum": + H = h.values().copy() + # Sum borders + H[0], H[-1] = ( + H[0] + h.values(flow=True)[0, 1:-1], + H[-1] + h.values(flow=True)[-1, 1:-1], + ) + H[:, 0], H[:, -1] = ( + H[:, 0] + h.values(flow=True)[1:-1, 0], + H[:, -1] + h.values(flow=True)[1:-1, -1], + ) + # Sum corners to corners + H[0, 0], H[-1, -1], H[0, -1], H[-1, 0] = ( + h.values(flow=True)[0, 0] + H[0, 0], + h.values(flow=True)[-1, -1] + H[-1, -1], + h.values(flow=True)[0, -1] + H[0, -1], + h.values(flow=True)[-1, 0] + H[-1, 0], + ) xbin_centers = xbins[1:] - np.diff(xbins) / float(2) ybin_centers = ybins[1:] - np.diff(ybins) / float(2) _x_axes_label = ax.get_xlabel() x_axes_label = ( - _x_axes_label if _x_axes_label != "" else get_histogram_axes_title(hist.axes[0]) + _x_axes_label if _x_axes_label != "" else get_histogram_axes_title(h.axes[0]) ) _y_axes_label = ax.get_ylabel() y_axes_label = ( - _y_axes_label if _y_axes_label != "" else get_histogram_axes_title(hist.axes[1]) + _y_axes_label if _y_axes_label != "" else get_histogram_axes_title(h.axes[1]) ) H = H.T @@ -504,7 +745,7 @@ def hist2dplot( X, Y = np.meshgrid(xbins, ybins) kwargs.setdefault("shading", "flat") - pc = ax.pcolormesh(X, Y, H, **kwargs) + pc = ax.pcolormesh(X, Y, H, vmin=cmin, vmax=cmax, **kwargs) if x_axes_label: ax.set_xlabel(x_axes_label) @@ -536,6 +777,64 @@ def hist2dplot( cb_obj = None plt.sca(ax) + if flow == "hint" or flow == "show": + d = 0.9 # proportion of vertical to horizontal extent of the slanted line + trans = mpl.transforms.blended_transform_factory(ax.transData, ax.transAxes) + ax_h = ax.bbox.height + kwargs = dict( + marker=[(-0.5, -d), (0.5, d)], + markersize=ax_h * 0.05, + linestyle="none", + color="k", + mec="k", + mew=1, + clip_on=False, + ) + if any(h.values(flow=True)[0] > 0): + if flow == "hint": + ax.plot( + [xbins[0] - (xbins[-3] - xbins[2]) * 0.03, xbins[0]], + [0, 0], + transform=trans, + **kwargs, + ) + if flow == "show": + ax.plot([xbins[1], xbins[2]], [0, 0], transform=trans, **kwargs) + ax.plot([xbins[0], xbins[0]], [ybins[1], ybins[2]], **kwargs) + if any(h.values(flow=True)[:, 0] > 0): + if flow == "hint": + ax.plot( + [xbins[-1] + (xbins[-3] - xbins[2]) * 0.03, xbins[-1]], + [0, 0], + transform=trans, + **kwargs, + ) + if flow == "show": + ax.plot([xbins[-3], xbins[-2]], [0, 0], transform=trans, **kwargs) + ax.plot([xbins[-1], xbins[-1]], [ybins[1], ybins[2]], **kwargs) + if any(h.values(flow=True)[-1] > 0): + if flow == "hint": + ax.plot( + [xbins[0], xbins[0] - (xbins[-3] - xbins[2]) * 0.03], + [1, 1], + transform=trans, + **kwargs, + ) + if flow == "show": + ax.plot([xbins[1], xbins[2]], [1, 1], transform=trans, **kwargs) + ax.plot([xbins[0], xbins[0]], [ybins[-3], ybins[-2]], **kwargs) + + if any(h.values(flow=True)[:, -1] > 0): + if flow == "hint": + ax.plot( + [xbins[-1] + (xbins[-3] - xbins[2]) * 0.03, xbins[-1]], + [1, 1], + transform=trans, + **kwargs, + ) + if flow == "show": + ax.plot([xbins[-3], xbins[-2]], [1, 1], transform=trans, **kwargs) + ax.plot([xbins[-1], xbins[-1]], [ybins[-3], ybins[-2]], **kwargs) _labels: np.ndarray | None = None if isinstance(labels, bool): diff --git a/tests/baseline/test_hist2dplot_flow.png b/tests/baseline/test_hist2dplot_flow.png new file mode 100644 index 00000000..e0f8133b Binary files /dev/null and b/tests/baseline/test_hist2dplot_flow.png differ diff --git a/tests/baseline/test_histplot_flow.png b/tests/baseline/test_histplot_flow.png new file mode 100644 index 00000000..9b9c41a4 Binary files /dev/null and b/tests/baseline/test_histplot_flow.png differ diff --git a/tests/baseline/test_histplot_hist_flow.png b/tests/baseline/test_histplot_hist_flow.png new file mode 100644 index 00000000..9f3fa884 Binary files /dev/null and b/tests/baseline/test_histplot_hist_flow.png differ diff --git a/tests/baseline/test_histplot_type_flow.png b/tests/baseline/test_histplot_type_flow.png new file mode 100644 index 00000000..c78bd98f Binary files /dev/null and b/tests/baseline/test_histplot_type_flow.png differ diff --git a/tests/baseline/test_histplot_uproot_flow.png b/tests/baseline/test_histplot_uproot_flow.png new file mode 100644 index 00000000..3ee706bd Binary files /dev/null and b/tests/baseline/test_histplot_uproot_flow.png differ diff --git a/tests/baseline/test_inputs_bh.png b/tests/baseline/test_inputs_bh.png index f71cb724..df5b7ca3 100644 Binary files a/tests/baseline/test_inputs_bh.png and b/tests/baseline/test_inputs_bh.png differ diff --git a/tests/baseline/test_inputs_bh_cat.png b/tests/baseline/test_inputs_bh_cat.png index 000494c9..bf55cf64 100644 Binary files a/tests/baseline/test_inputs_bh_cat.png and b/tests/baseline/test_inputs_bh_cat.png differ diff --git a/tests/baseline/test_inputs_uproot.png b/tests/baseline/test_inputs_uproot.png index 5b544d84..05dac78b 100644 Binary files a/tests/baseline/test_inputs_uproot.png and b/tests/baseline/test_inputs_uproot.png differ diff --git a/tests/test_basic.py b/tests/test_basic.py index 1b9423a0..17df619c 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -2,6 +2,7 @@ import os +import hist import matplotlib.pyplot as plt import numpy as np import pytest @@ -110,6 +111,120 @@ def test_histplot_density(): return fig +@pytest.mark.mpl_image_compare(style="default") +def test_histplot_flow(): + np.random.seed(0) + h = hist.new.Reg(20, 5, 15, name="x").Weight() + h.fill(np.random.normal(10, 3, 400)) + fig, axs = plt.subplots(2, 2, sharey=True, figsize=(10, 10)) + axs = axs.flatten() + + hep.histplot(h, ax=axs[0], flow="hint") + hep.histplot(h, ax=axs[1], flow="show") + hep.histplot(h, ax=axs[2], flow="sum") + hep.histplot(h, ax=axs[3], flow=None) + + axs[0].set_title("Default(hint)", fontsize=18) + axs[1].set_title("Show", fontsize=18) + axs[2].set_title("Sum", fontsize=18) + axs[3].set_title("None", fontsize=18) + return fig + + +@pytest.mark.mpl_image_compare(style="default") +def test_histplot_hist_flow(): + np.random.seed(0) + entries = np.random.normal(10, 3, 400) + h = hist.new.Reg(20, 5, 15, name="x", flow=True).Weight() + h2 = hist.new.Reg(20, 5, 15, name="x", underflow=True, overflow=False).Weight() + h3 = hist.new.Reg(20, 5, 15, name="x", underflow=False, overflow=True).Weight() + h4 = hist.new.Reg(20, 5, 15, name="x", flow=False).Weight() + + h.fill(entries) + h2.fill(entries) + h3.fill(entries) + h4.fill(entries) + fig, axs = plt.subplots(2, 2, sharey=True, figsize=(10, 10)) + axs = axs.flatten() + + hep.histplot(h, ax=axs[0], flow="show") + hep.histplot(h2, ax=axs[1], flow="show") + hep.histplot(h3, ax=axs[2], flow="show") + hep.histplot(h4, ax=axs[3], flow="show") + + axs[0].set_title("Two-side overflow", fontsize=18) + axs[1].set_title("Left-side overflow", fontsize=18) + axs[2].set_title("Right-side overflow", fontsize=18) + axs[3].set_title("No overflow", fontsize=18) + fig.subplots_adjust(hspace=0.2, wspace=0.2) + axs[0].legend() + return fig + + +@pytest.mark.mpl_image_compare(style="default") +def test_histplot_uproot_flow(): + np.random.seed(0) + entries = np.random.normal(10, 3, 400) + h = hist.new.Reg(20, 5, 15, name="x", flow=True).Weight() + h2 = hist.new.Reg(20, 5, 15, name="x", flow=True).Weight() + h3 = hist.new.Reg(20, 5, 15, name="x", flow=True).Weight() + h4 = hist.new.Reg(20, 5, 15, name="x", flow=True).Weight() + + h.fill(entries) + h2.fill(entries[entries < 15]) + h3.fill(entries[entries > 5]) + h4.fill(entries[(entries > 5) & (entries < 15)]) + import uproot + + f = uproot.recreate("flow_th1.root") + f["h"] = h + f["h2"] = h2 + f["h3"] = h3 + f["h4"] = h4 + + f = uproot.open("flow_th1.root") + h = f["h"] + h2 = f["h2"] + h3 = f["h3"] + h4 = f["h4"] + + fig, axs = plt.subplots(2, 2, sharey=True, figsize=(10, 10)) + axs = axs.flatten() + + hep.histplot(h, ax=axs[0], flow="show") + hep.histplot(h2, ax=axs[1], flow="show") + hep.histplot(h3, ax=axs[2], flow="show") + hep.histplot(h4, ax=axs[3], flow="show") + + axs[0].set_title("Two-side overflow", fontsize=18) + axs[1].set_title("Left-side overflow", fontsize=18) + axs[2].set_title("Right-side overflow", fontsize=18) + axs[3].set_title("No overflow", fontsize=18) + fig.subplots_adjust(hspace=0.2, wspace=0.2) + axs[0].legend() + return fig + + +@pytest.mark.mpl_image_compare(style="default") +def test_histplot_type_flow(): + np.random.seed(0) + entries = np.random.normal(10, 3, 400) + + histh = hist.new.Reg(20, 5, 15, name="x", flow=False).Weight() + nph, bins = np.histogram(entries, bins=20, range=(5, 15)) + histh.fill(entries) + + fig, axs = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(10, 5)) + axs = axs.flatten() + + hep.histplot(histh, ax=axs[0], flow="hint", yerr=False) + hep.histplot(nph, bins, ax=axs[1], flow="hint") + + axs[0].set_title("hist, noflow bin", fontsize=18) + axs[1].set_title("numpy hist", fontsize=18) + return fig + + @pytest.mark.mpl_image_compare(style="default", remove_text=True) def test_histplot_multiple(): np.random.seed(0) @@ -175,6 +290,32 @@ def test_hist2dplot(): return fig +@pytest.mark.mpl_image_compare(style="default") +def test_hist2dplot_flow(): + np.random.seed(0) + h = hist.Hist( + hist.axis.Regular(20, 5, 15, name="x"), + hist.axis.Regular(20, -5, 5, name="y"), + hist.storage.Weight(), + ) + h.fill(np.random.normal(10, 3, 400), np.random.normal(0, 4, 400)) + fig, axs = plt.subplots(2, 2, figsize=(10, 10)) + axs = axs.flatten() + + hep.hist2dplot(h, ax=axs[0], flow="hint", cmin=0, cmax=10) + hep.hist2dplot(h, ax=axs[1], flow="show", cmin=0, cmax=10) + hep.hist2dplot(h, ax=axs[2], flow="sum", cmin=0, cmax=10) + hep.hist2dplot(h, ax=axs[3], flow=None, cmin=0, cmax=10) + + axs[0].set_title("Default(hint)", fontsize=18) + axs[1].set_title("Show", fontsize=18) + axs[2].set_title("Sum", fontsize=18) + axs[3].set_title("None", fontsize=18) + fig.subplots_adjust(hspace=0.1, wspace=0.1) + + return fig + + @pytest.mark.mpl_image_compare(style="default", remove_text=True) def test_hist2dplot_inputs_nobin(): np.random.seed(0)