Skip to content
This repository has been archived by the owner on Jan 27, 2023. It is now read-only.

Commit

Permalink
working on Pandas -> ROOT histogram
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Sep 25, 2018
1 parent 4bba49c commit 3ed2fb5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 14 deletions.
40 changes: 26 additions & 14 deletions uproot_methods/classes/TH1.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,9 @@ def numpy(self):
edges = numpy.linspace(self._fXaxis._fXmin, self._fXaxis._fXmax, self._fXaxis._fNbins + 1)
return freq, edges

def pandas(self, underflow=True, overflow=True):
def pandas(self, underflow=True, overflow=True, variance=True):
import pandas
freq = numpy.array(self.allvalues, dtype=self._dtype.newbyteorder("="))
print("freq", len(freq))

if not underflow and not overflow:
freq = freq[1:-1]
Expand Down Expand Up @@ -229,19 +228,23 @@ def pandas(self, underflow=True, overflow=True):
index = pandas.IntervalIndex.from_arrays(lefts[nonzero], rights[nonzero], closed="left")

data = {"count": freq[nonzero]}
if getattr(self, "_fSumw2", None):
sumw2 = self._fSumw2
if not underflow and not overflow:
sumw2 = sumw2[1:-1]
elif not underflow:
sumw2 = sumw2[1:]
elif not overflow:
sumw2 = sumw2[:-1]
data["variance"] = numpy.array(sumw2)[nonzero]
else:
data["variance"] = data["count"]
columns = ["count"]

if variance:
if getattr(self, "_fSumw2", None):
sumw2 = self._fSumw2
if not underflow and not overflow:
sumw2 = sumw2[1:-1]
elif not underflow:
sumw2 = sumw2[1:]
elif not overflow:
sumw2 = sumw2[:-1]
data["variance"] = numpy.array(sumw2)[nonzero]
else:
data["variance"] = data["count"]
columns.append("variance")

return pandas.DataFrame(index=index, data=data, columns=["count", "variance"])
return pandas.DataFrame(index=index, data=data, columns=columns)

def physt(self):
import physt.binnings
Expand Down Expand Up @@ -354,6 +357,15 @@ def __init__(self, fNbins, fXmin, fXmax, fXbins):

return out

# def from_pandas(histogram):
# import pandas

# sparse = histogram.index[numpy.isfinite(histogram.index.left) & numpy.isfinite(histogram.index.right)]





def from_physt(histogram):
import physt.binnings
import physt.histogram1d
Expand Down
5 changes: 5 additions & 0 deletions uproot_methods/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def resolve(obj):
def types(cls, obj):
if cls is numpy.ndarray:
yield ("numpy", "ndarray", len(obj.shape), str(obj.dtype))
elif cls.__module__ == "pandas.core.frame" and cls.__name__ == "DataFrame":
yield ("pandas.core.frame", "DataFrame", obj.index.__class__.__name__, set(obj.columns))
else:
yield (cls.__module__, cls.__name__)
for x in cls.__bases__:
Expand All @@ -52,6 +54,9 @@ def types(cls, obj):
elif isinstance(obj, tuple) and any(x[:2] == ("numpy", "ndarray") for x in types(obj[0].__class__, obj[0])) and any(x[:2] == ("numpy", "ndarray") for x in types(obj[1].__class__, obj[1])) and len(obj[0]) + 1 == len(obj[1]):
return ("uproot_methods.classes.TH1", "from_numpy", "uproot.write.objects.TH1", "TH1")

elif any(x[:3] == ("pandas.core.frame", "DataFrame", "IntervalIndex") and "count" in x[3] for x in types(obj.__class__, obj)):
return ("uproot_methods.classes.TH1", "from_pandas", "uproot.write.objects.TH1", "TH1")

elif any(x == ("physt.histogram1d", "Histogram1D") for x in types(obj.__class__, obj)):
return ("uproot_methods.classes.TH1", "from_physt", "uproot.write.objects.TH1", "TH1")

Expand Down

0 comments on commit 3ed2fb5

Please sign in to comment.