Merge pull request #177 from CoffeaTeam/plotupdates

Plotting updates to address some issues
scikit-hep · Oct 16, 2019 · 9c8c2b7 · 9c8c2b7
2 parents 6a24e59 + 26a28d0
commit 9c8c2b7
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 21 deletions.
diff --git a/coffea/hist/hist_tools.py b/coffea/hist/hist_tools.py
@@ -134,6 +134,11 @@ def hi(self):
         """Upper boundary of this bin, exclusive"""
         return self._hi
 
+    @property
+    def mid(self):
+        """Midpoint of this bin"""
+        return (self._hi + self._lo) / 2
+
     @property
     def label(self):
         """Label of this bin, mutable"""
@@ -272,7 +277,7 @@ class Cat(SparseAxis):
             is used as a keyword in histogram filling, immutable
         label : str
             describes the meaning of the axis, can be changed
-        sorting : {'identifier', 'placement'}, optional
+        sorting : {'identifier', 'placement', 'integral'}, optional
             Axis sorting when listing identifiers.  Default 'placement'
             Changing this setting can effect the order of stack plotting
             in `hist.plot1d`.
@@ -358,6 +363,28 @@ def size(self):
         """Number of bins"""
         return len(self._bins)
 
+    @property
+    def sorting(self):
+        """Sorting definition to adhere to
+
+        See `Cat` constructor for possible values
+        """
+        return self._sorting
+
+    @sorting.setter
+    def sorting(self, newsorting):
+        if newsorting == 'placement':
+            # not much we can do about already inserted values
+            pass
+        elif newsorting == 'identifier':
+            self._sorted.sort()
+        elif newsorting == 'integral':
+            # this will be checked in any Hist.identifiers() call accessing this axis
+            pass
+        else:
+            raise AttributeError("Invalid axis sorting type: %s" % newsorting)
+        self._sorting = newsorting
+
     def identifiers(self):
         """List of `StringBin` identifiers"""
         return [self._bins[k] for k in self._sorted]
@@ -632,16 +659,16 @@ class Hist(AccumulatorABC):
 
     Creating a histogram with a sparse axis, and two dense axes::
 
-        h = coffea.hist.Hist("Events",
-                             coffea.hist.Cat("sample", "Sample name"),
-                             coffea.hist.Bin("x", "x coordinate [cm]", 20, -5, 5),
-                             coffea.hist.Bin("y", "y coordinate [cm]", 20, -5, 5),
+        h = coffea.hist.Hist("Observed bird count",
+                             coffea.hist.Cat("species", "Bird species"),
+                             coffea.hist.Bin("x", "x coordinate [m]", 20, -5, 5),
+                             coffea.hist.Bin("y", "y coordinate [m]", 20, -5, 5),
                              )
 
     which produces:
 
     >>> h
-    <Hist (sample,x,y) instance at 0x10d84b550>
+    <Hist (species,x,y) instance at 0x10d84b550>
 
     """
 
@@ -857,7 +884,7 @@ def fill(self, **values):
 
         Filling the histogram from the `Hist` example:
 
-        >>> h.fill(sample='ducks', x=np.random.normal(size=10), y=np.random.normal(size=10), weight=np.ones(size=10) * 3)
+        >>> h.fill(species='ducks', x=np.random.normal(size=10), y=np.random.normal(size=10), weight=np.ones(size=10) * 3)
 
         """
         if not all(d.name in values for d in self._axes):
@@ -942,13 +969,22 @@ def dense_op(array):
                     out._sumw2[new_key] = dense_op(self._sumw2[key]).copy()
         return out
 
-    def project(self, axis_name, the_slice=slice(None), overflow='none'):
-        """This function has been renamed to Hist.integrate()
+    def project(self, *axes, **kwargs):
+        """Project histogram onto a subset of its axes
 
-        In the future, this will integrate all axes other than the ones specified.
+        Parameters
+        ----------
+            ``*axes`` : str or Axis
+                Positional list of axes to project on to
+            overflow : str
+                Controls behavior of integration over remaining axes.
+                See `sum` description for meaning of allowed values
+                Default is to *not include* overflow bins
         """
-        warnings.warn("Hist.project() has been renamed to Hist.integrate().  In the future, Hist.project() will provide different functionality", FutureWarning)
-        return self.integrate(axis_name, the_slice, overflow)
+        overflow = kwargs.pop('overflow', 'none')
+        axes = [self.axis(ax) for ax in axes]
+        toremove = [ax for ax in self.axes() if ax not in axes]
+        return self.sum(*toremove, overflow=overflow)
 
     def integrate(self, axis_name, int_range=slice(None), overflow='none'):
         """Integrates current histogram along one dimension
@@ -977,18 +1013,14 @@ def integrate(self, axis_name, int_range=slice(None), overflow='none'):
                 overflow = 'over'
         return self[full_slice].sum(axis.name, overflow=overflow)  # slice may make new axis, use name
 
-    def profile(self, axis_name):
-        """Not yet implemented"""
-        raise NotImplementedError("Profiling along an axis")
-
     def remove(self, bins, axis):
         """Remove bins from a sparse axis
 
         Parameters
         ----------
             bins : iterable
                 A list of bin identifiers to remove
-            axis
+            axis : str or Axis
                 Axis name or SparseAxis instance
 
         Returns a *copy* of the histogram with specified bins removed, not an in-place operation
@@ -1146,10 +1178,10 @@ def scale(self, factor, axis=None):
         Examples
         --------
         This function is useful to quickly reweight according to some
-        weight mapping along a sparse axis, such as the ``sample`` axis
+        weight mapping along a sparse axis, such as the ``species`` axis
         in the `Hist` example:
 
-        >>> h.scale({'ducks': 0.3, 'geese': 1.2}, axis='sample')
+        >>> h.scale({'ducks': 0.3, 'geese': 1.2}, axis='species')
         """
         if self._sumw2 is None:
             self._init_sumw2()
@@ -1188,6 +1220,9 @@ def identifiers(self, axis, overflow='none'):
             for identifier in axis.identifiers():
                 if any(k[isparse] == axis.index(identifier) for k in self._sumw.keys()):
                     out.append(identifier)
+            if axis.sorting == 'integral':
+                hproj = {key[0]: integral for key, integral in self.project(axis).values().items()}
+                out.sort(key=lambda k: hproj[k.name], reverse=True)
             return out
         elif isinstance(axis, DenseAxis):
             return axis.identifiers(overflow=overflow)
diff --git a/coffea/hist/plot.py b/coffea/hist/plot.py
@@ -79,7 +79,7 @@ def clopper_pearson_interval(num, denom, coverage=_coverage1sd):
 
 
 def plot1d(hist, ax=None, clear=True, overlay=None, stack=False, overflow='none', line_opts=None,
-           fill_opts=None, error_opts=None, overlay_overflow='none', density=False, binwnorm=None):
+           fill_opts=None, error_opts=None, legend_opts={}, overlay_overflow='none', density=False, binwnorm=None):
     """Create a 1D plot from a 1D or 2D `Hist` object
 
     Parameters
@@ -112,6 +112,10 @@ def plot1d(hist, ax=None, clear=True, overlay=None, stack=False, overflow='none'
             internal to this function.  Leave blank for defaults.  Some special options are interpreted by
             this function and not passed to matplotlib: 'emarker' (default: '') specifies the marker type
             to place at cap of the errorbar.
+        legend_opts : dict, optional
+            A dictionary of options  to pass to the matplotlib
+            `ax.legend <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.legend.html>`_ call
+            internal to this fuction.  Leave blank for defaults.
         overlay_overflow : str, optional
             If overflow behavior is not 'none', extra bins in the overlay axis will be overlayed or stacked,
             to represent the contents of the overflow bins.  See `Hist.sum` documentation for a description of the options.
@@ -254,7 +258,10 @@ def plot1d(hist, ax=None, clear=True, overlay=None, stack=False, overflow='none'
             for identifier, handlelist in primitives.items():
                 handles.append(tuple(h for h in handlelist if h.get_label()[0] != '_'))
                 labels.append(str(identifier))
-            primitives['legend'] = ax.legend(title=overlay.label, handles=handles, labels=labels)
+            opts = {'title': overlay.label}
+            if legend_opts is not None:
+                opts.update(legend_opts)
+            primitives['legend'] = ax.legend(handles=handles, labels=labels, **opts)
         ax.autoscale(axis='x', tight=True)
         ax.set_ylim(0, None)