From eb606323555a567440b20e38ab4847ddb38c2f89 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 29 Mar 2020 19:32:10 +0200
Subject: [PATCH 01/15] Test and fix rename_dim

---
 fast_plotter/postproc/functions.py | 2 +-
 tests/postproc/test_functions.py   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
index 4d63e2e..aa15a55 100644
--- a/fast_plotter/postproc/functions.py
+++ b/fast_plotter/postproc/functions.py
@@ -190,7 +190,7 @@ def rename_dim(df, mapping):
     """
     Rename one or more dimensions
     """
-    df.index.names = [mapping.get(n, n) for n in df.df.index.names]
+    df.index.names = [mapping.get(n, n) for n in df.index.names]
     return df
 
 
diff --git a/tests/postproc/test_functions.py b/tests/postproc/test_functions.py
index 08b5a74..0fd8071 100644
--- a/tests/postproc/test_functions.py
+++ b/tests/postproc/test_functions.py
@@ -70,9 +70,9 @@ def test_combine_cols_AND_split_dimension(binned_df):
 #     #def rename_cols(df, mapping):
 #     pass
 
-# def test_rename_dim():
-#     #def rename_dim(df, mapping):
-#     pass
+def test_rename_dim(binned_df):
+    result = funcs.rename_dim(binned_df, {"int": "integers", "cat": "CATEGORICALS"})
+    assert result.index.names == ["integers", "CATEGORICALS", "interval"]
 
 # def test_split():
 #     #def split(df, axis, keep_split_dim, return_meta=True):

From ddba0b71237a307cf5802c55e9508c4636ef6daf Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 29 Mar 2020 19:34:27 +0200
Subject: [PATCH 02/15] Update CHANGELOG

---
 CHANGELOG.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4967fdb..4838e9e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,9 +4,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.5.0] - 2020-03-30
+## [0.5.1] - 2020-03-30
+### Fixed
+- Bugs in post-processing modules, PR #29 [@benkrikler](github.com/benkrikler)
+
+## [0.5.0] - 2020-03-29
 ### Added
-- New post-processing command to reshape outputs of fast-carpenter
+- New post-processing command to reshape outputs of fast-carpenter from PR #28 [@benkrikler](github.com/benkrikler)
 
 ## [0.4.0] - 2020-02-26
 - Many changes from PR #26 [@benkrikler](github.com/benkrikler)

From e12d39eb542f4d7b0a108c172cec0eaa579c9ff6 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 29 Mar 2020 19:34:34 +0200
Subject: [PATCH 03/15] =?UTF-8?q?Bump=20version:=200.5.0=20=E2=86=92=200.5?=
 =?UTF-8?q?.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fast_plotter/version.py | 2 +-
 setup.cfg               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fast_plotter/version.py b/fast_plotter/version.py
index d1947dd..cd7cc73 100644
--- a/fast_plotter/version.py
+++ b/fast_plotter/version.py
@@ -12,5 +12,5 @@ def split_version(version):
     return tuple(result)
 
 
-__version__ = '0.5.0'
+__version__ = '0.5.1'
 version_info = split_version(__version__) # noqa
diff --git a/setup.cfg b/setup.cfg
index 99f64d1..644ec11 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.5.0
+current_version = 0.5.1
 commit = True
 tag = False
 

From cfec0c5ff9fe6e0bc6e3cdd7720cd4175d3440d7 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 29 Mar 2020 19:35:13 +0200
Subject: [PATCH 04/15] Fix pep8

---
 tests/postproc/test_functions.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/postproc/test_functions.py b/tests/postproc/test_functions.py
index 0fd8071..ace720c 100644
--- a/tests/postproc/test_functions.py
+++ b/tests/postproc/test_functions.py
@@ -70,10 +70,12 @@ def test_combine_cols_AND_split_dimension(binned_df):
 #     #def rename_cols(df, mapping):
 #     pass
 
+
 def test_rename_dim(binned_df):
     result = funcs.rename_dim(binned_df, {"int": "integers", "cat": "CATEGORICALS"})
     assert result.index.names == ["integers", "CATEGORICALS", "interval"]
 
+
 # def test_split():
 #     #def split(df, axis, keep_split_dim, return_meta=True):
 #     pass

From 8e81027327aaa8f9dd91e6af95251bd11dcdbb89 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 29 Mar 2020 20:37:52 +0200
Subject: [PATCH 05/15] Handle when no bins need to be dropped

---
 fast_plotter/postproc/functions.py |  2 ++
 tests/postproc/test_functions.py   | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
index aa15a55..b942b19 100644
--- a/fast_plotter/postproc/functions.py
+++ b/fast_plotter/postproc/functions.py
@@ -119,6 +119,8 @@ def split_dimension(df, axis, delimeter=";"):
 def keep_bins(df, axis, keep):
     """Keep bins on the single dimension, dropping others"""
     others = {val for val in df.index.unique(axis) if val not in keep}
+    if not others:
+        return df
     logger.info("Dropping values for '%s': %s", axis, str(others))
     out = df.drop(others, level=axis, axis="index")
     return out
diff --git a/tests/postproc/test_functions.py b/tests/postproc/test_functions.py
index ace720c..a176ca6 100644
--- a/tests/postproc/test_functions.py
+++ b/tests/postproc/test_functions.py
@@ -33,9 +33,9 @@ def test_query(binned_df):
 
 
 def test_rebin(binned_df):
-    result = funcs.rebin(binned_df.copy(), axis="int", mapping=dict(zip(range(4), [0, 2] * 2)))
+    result = funcs.rebin(binned_df.copy(), rename="hruff", axis="int", mapping=dict(zip(range(4), [0, 2] * 2)))
     assert len(result) == 20
-    assert list(result.index.unique("int")) == [0, 2]
+    assert list(result.index.unique("hruff")) == [0, 2]
 
     mapping = {0: dict(bar="foo"), 2: dict(foo="bar"), 3: dict(foo="BAZ", bar="BAZ")}
     result = funcs.rebin(binned_df.copy(), axis=["int", 'cat'], mapping=mapping)
@@ -43,9 +43,14 @@ def test_rebin(binned_df):
     assert set(result.index.unique("cat")) == {"bar", "BAZ", "foo"}
 
 
-# def test_keep_bins():
-#     #def keep_bins(df, axis, keep):
-#     pass
+def test_keep_bins(binned_df):
+    result = funcs.keep_bins(binned_df.copy(), "int", keep=[0, 2])
+    assert len(result) == 20
+
+    result = funcs.keep_bins(binned_df.copy(), "int", keep=binned_df.index.unique("int"))
+    assert len(result) == 40
+
+
 
 # def test_keep_specific_bins():
 #     #def keep_specific_bins(df, axis, keep, expansions={}):

From 05a44bbbe9a229230706079f6924d9ccac64a9b8 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Sun, 29 Mar 2020 20:38:20 +0200
Subject: [PATCH 06/15] Fix whitespace

---
 tests/postproc/test_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/postproc/test_functions.py b/tests/postproc/test_functions.py
index a176ca6..d345c58 100644
--- a/tests/postproc/test_functions.py
+++ b/tests/postproc/test_functions.py
@@ -51,11 +51,11 @@ def test_keep_bins(binned_df):
     assert len(result) == 40
 
 
-
 # def test_keep_specific_bins():
 #     #def keep_specific_bins(df, axis, keep, expansions={}):
 #     pass
 
+
 def test_combine_cols_AND_split_dimension(binned_df):
     result = funcs.combine_cols(binned_df, {"a;b": "{a};{b}"})
     assert len(result.columns) == 3

From 1ff9fa7d5eb791e8cccf43f2f69440ae2fcb3d68 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 31 Mar 2020 10:13:51 +0200
Subject: [PATCH 07/15] Fix things for dataframes with only unweighted counts

---
 fast_plotter/__main__.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py
index 3c9e625..c0e173a 100644
--- a/fast_plotter/__main__.py
+++ b/fast_plotter/__main__.py
@@ -104,12 +104,11 @@ def process_one_file(infile, args):
     for weight in weights:
         if args.weights and weight not in args.weights:
             continue
+        df_filtered = df.copy()
         if weight == "n":
-            df_filtered = df.filter(weight, axis="columns").copy()
-            df_filtered.rename({weight: "sumw"}, axis="columns", inplace=True)
-            df_filtered["sumw2"] = df_filtered.sumw
+            df_filtered["sumw"] = df_filtered.n
+            df_filtered["sumw2"] = df_filtered.n
         else:
-            df_filtered = df.copy()
             if "n" in df.columns:
                 data_rows = mask_rows(df_filtered,
                                       regex=args.data,

From 8f01420676f3a967425680df6f708220252f9bdc Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 31 Mar 2020 10:28:16 +0200
Subject: [PATCH 08/15] Add option to change output file format

---
 fast_plotter/postproc/functions.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
index b942b19..89d8f28 100644
--- a/fast_plotter/postproc/functions.py
+++ b/fast_plotter/postproc/functions.py
@@ -376,7 +376,7 @@ def open_many(file_list, return_meta=True):
     return dfs
 
 
-def write_out(df, meta, filename="tbl_{dims}--{name}.csv", out_dir=None):
+def write_out(df, meta, filename="tbl_{dims}--{name}", out_dir=None, filetype="csv"):
     """ Write a dataframe to disk
     """
     meta = meta.copy()
@@ -387,5 +387,10 @@ def write_out(df, meta, filename="tbl_{dims}--{name}.csv", out_dir=None):
         complete_file = os.path.join(out_dir, complete_file)
     os.makedirs(os.path.dirname(complete_file), exist_ok=True)
     logger.info("Writing out file '%s'", complete_file)
-    df.to_csv(complete_file)
+    if not complete_file.endswith(filetype):
+        complete_file += "." + filetype
+    if filetype == "csv":
+        df.to_csv(complete_file)
+    elif filetype == "hd5":
+        df.to_hdf(complete_file)
     return df

From 5a8eaa79464366df2341813a9fdfe0805f20a00e Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 31 Mar 2020 10:50:49 +0200
Subject: [PATCH 09/15] Add option to write out with hd5

---
 fast_plotter/postproc/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
index 89d8f28..c6bb0b3 100644
--- a/fast_plotter/postproc/functions.py
+++ b/fast_plotter/postproc/functions.py
@@ -392,5 +392,5 @@ def write_out(df, meta, filename="tbl_{dims}--{name}", out_dir=None, filetype="c
     if filetype == "csv":
         df.to_csv(complete_file)
     elif filetype == "hd5":
-        df.to_hdf(complete_file)
+        df.to_hdf(complete_file, key="df")
     return df

From 71c31f372e7263a28a9576a2c99cdd2a929d4416 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 31 Mar 2020 10:59:12 +0200
Subject: [PATCH 10/15] Add option to control the number of debugging lines to
 show

---
 fast_plotter/postproc/__main__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fast_plotter/postproc/__main__.py b/fast_plotter/postproc/__main__.py
index 023352e..3248de8 100644
--- a/fast_plotter/postproc/__main__.py
+++ b/fast_plotter/postproc/__main__.py
@@ -13,6 +13,8 @@ def make_parser():
                         help="Print a dataframe after each step")
     parser.add_argument("--debug-dfs-query", default=None,
                         help="Provide a query to select rows from the debugged dataframe")
+    parser.add_argument("--debug-num", default=5,
+                        help="Number of rows to dump from debugging dataframe")
     parser.add_argument("-p", "--post-process", default=None, required=True,
                         help="A yaml to configure the post-processing step")
     parser.add_argument("-o", "--outdir", default=".",
@@ -57,7 +59,7 @@ def main(args=None):
         if args.debug_dfs:
             debug_df = dump_debug_df(dfs, args.debug_dfs_query)
             if debug_df is not None:
-                logger.debug(debug_df.head().to_string())
+                logger.debug(debug_df.head(args.debug_num).to_string())
 
 
 if __name__ == "__main__":

From 4fd6435c08b036ea97223774b470432373c88031 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 31 Mar 2020 11:00:29 +0200
Subject: [PATCH 11/15] Fix debug-num --> debug-rows command

---
 fast_plotter/postproc/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/postproc/__main__.py b/fast_plotter/postproc/__main__.py
index 3248de8..ff1e33d 100644
--- a/fast_plotter/postproc/__main__.py
+++ b/fast_plotter/postproc/__main__.py
@@ -13,7 +13,7 @@ def make_parser():
                         help="Print a dataframe after each step")
     parser.add_argument("--debug-dfs-query", default=None,
                         help="Provide a query to select rows from the debugged dataframe")
-    parser.add_argument("--debug-num", default=5,
+    parser.add_argument("--debug-rows", default=5, type=int,
                         help="Number of rows to dump from debugging dataframe")
     parser.add_argument("-p", "--post-process", default=None, required=True,
                         help="A yaml to configure the post-processing step")

From 0ed39069b7ab653330b70e916d8092530d9a951b Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 31 Mar 2020 11:02:27 +0200
Subject: [PATCH 12/15] Fix up the debug rows option (again)

---
 fast_plotter/postproc/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fast_plotter/postproc/__main__.py b/fast_plotter/postproc/__main__.py
index ff1e33d..41b0123 100644
--- a/fast_plotter/postproc/__main__.py
+++ b/fast_plotter/postproc/__main__.py
@@ -59,7 +59,7 @@ def main(args=None):
         if args.debug_dfs:
             debug_df = dump_debug_df(dfs, args.debug_dfs_query)
             if debug_df is not None:
-                logger.debug(debug_df.head(args.debug_num).to_string())
+                logger.debug(debug_df.head(args.debug_rows).to_string())
 
 
 if __name__ == "__main__":

From bcc2204a52e6a3362195171c3056f79712f53c7c Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 7 Apr 2020 11:03:32 +0200
Subject: [PATCH 13/15] Extend split method to work with multiple split
 dimensions

---
 fast_plotter/postproc/functions.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
index c6bb0b3..9666b73 100644
--- a/fast_plotter/postproc/functions.py
+++ b/fast_plotter/postproc/functions.py
@@ -201,6 +201,10 @@ def split(df, axis, keep_split_dim, return_meta=True):
     split the dataframe into a list of dataframes using a given binning
     dimensions
     """
+    if isinstance(axis, (list, tuple)):
+        axis = tuple(axis)
+    else:
+        axis = (axis, )
     logger.info("Splitting on axis: '%s'", axis)
     out_dfs = []
     groups = df.groupby(level=axis, group_keys=keep_split_dim)
@@ -209,8 +213,9 @@ def split(df, axis, keep_split_dim, return_meta=True):
             group.index = group.index.droplevel(axis)
         result = group.copy()
         if return_meta:
-            meta = {"split_name": "%s_%s" % (axis, split_val),
-                    axis: split_val}
+            meta = dict(zip(axis, split_val))
+            split_name = "--".join(map("_".join, meta.items()))
+            meta["split_name"] = split_name
             result = (result, meta)
         out_dfs.append(result)
     return out_dfs

From 3de2d824c8d31615d574d6898c5e4d2c31f6c24c Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 7 Apr 2020 11:06:28 +0200
Subject: [PATCH 14/15] Also convert splitval to tuple

---
 fast_plotter/postproc/functions.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
index 9666b73..4388a3c 100644
--- a/fast_plotter/postproc/functions.py
+++ b/fast_plotter/postproc/functions.py
@@ -201,14 +201,17 @@ def split(df, axis, keep_split_dim, return_meta=True):
     split the dataframe into a list of dataframes using a given binning
     dimensions
     """
-    if isinstance(axis, (list, tuple)):
-        axis = tuple(axis)
-    else:
-        axis = (axis, )
+    def to_tuple(obj):
+        if isinstance(obj, (list, tuple)):
+            return tuple(obj)
+        else:
+            return (obj, )
+    axis = to_tuple(axis)
     logger.info("Splitting on axis: '%s'", axis)
     out_dfs = []
     groups = df.groupby(level=axis, group_keys=keep_split_dim)
     for split_val, group in groups:
+        split_val = to_tuple(split_val)
         if not keep_split_dim:
             group.index = group.index.droplevel(axis)
         result = group.copy()

From 2f966ad20e7c640f3cf6dda39ebf8973362846e4 Mon Sep 17 00:00:00 2001
From: Ben Krikler <bek07@ic.ac.uk>
Date: Tue, 7 Apr 2020 11:27:27 +0200
Subject: [PATCH 15/15] Update CHANGELOG

---
 CHANGELOG.md                       | 2 +-
 fast_plotter/postproc/functions.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4838e9e..3ed6d6c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.5.1] - 2020-03-30
+## [0.5.1] - 2020-04-7
 ### Fixed
 - Bugs in post-processing modules, PR #29 [@benkrikler](github.com/benkrikler)
 
diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
index 4388a3c..5592739 100644
--- a/fast_plotter/postproc/functions.py
+++ b/fast_plotter/postproc/functions.py
@@ -206,6 +206,7 @@ def to_tuple(obj):
             return tuple(obj)
         else:
             return (obj, )
+
     axis = to_tuple(axis)
     logger.info("Splitting on axis: '%s'", axis)
     out_dfs = []