From 0ab4a6d7f6a1667f75d5b51edb2c749bb60f7a4f Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 7 May 2020 13:01:43 +0200 Subject: [PATCH 1/6] Add option to rotate xtick labels --- fast_plotter/__main__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 8bfe4c1..5a3c9cd 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -6,6 +6,7 @@ import logging import matplotlib matplotlib.use('Agg') +matplotlib.rcParams.update({'figure.autolayout': True}) from .version import __version__ # noqa from .utils import read_binned_df, weighting_vars # noqa from .utils import decipher_filename, mask_rows # noqa @@ -134,8 +135,9 @@ def process_one_file(infile, args): return ran_ok -def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, limits={}, **kwargs): - for main_ax, _ in plots.values(): +def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, + limits={}, xtickrotation=None, **kwargs): + for main_ax, summary_ax in plots.values(): add_annotations(annotations, main_ax) if yscale: main_ax.set_yscale(yscale) @@ -151,6 +153,8 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, getattr(main_ax, "set_%slim" % axis)(*lims) elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) + if xtickrotation: + matplotlib.pyplot.xticks(rotation=xtickrotation) def save_plots(infile, weight, plots, outdir, extensions): From ea27a69fdb2455224fb671f7f10912b598c940ab Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 7 May 2020 13:02:50 +0200 Subject: [PATCH 2/6] Add option to set output of assign_cols to be an index --- fast_plotter/postproc/functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py index 53de7ae..a0a9e9d 100644 --- a/fast_plotter/postproc/functions.py +++ b/fast_plotter/postproc/functions.py @@ -152,7 +152,7 @@ def keep_specific_bins(df, axis, keep, expansions={}): return out_df -def combine_cols(df, format_strings): +def combine_cols(df, format_strings, as_index=[]): """Combine columns together using format strings""" logger.info("Combining columns based on: %s", str(format_strings)) result_names = list(format_strings.keys()) @@ -166,6 +166,8 @@ def apply_fmt(row): results.columns = result_names new_df = new_df.assign(**results) new_df.set_index(index, inplace=True, drop=True) + if as_index: + new_df.set_index(as_index, inplace=True, append=True) return new_df From 9e279bc56ad950cef358b144840f1fa436f964f7 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 7 May 2020 13:03:15 +0200 Subject: [PATCH 3/6] Fix up to_datacard_inputs --- fast_plotter/postproc/functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py index a0a9e9d..66f0129 100644 --- a/fast_plotter/postproc/functions.py +++ b/fast_plotter/postproc/functions.py @@ -290,6 +290,8 @@ def to_datacard_inputs(df, select_data, rename_syst_vars=False): df["content"] = df.n df["content"][~data_mask] = df.sumw df["error"] = df.content / np.sqrt(df.n) + df.drop(["n", "sumw", "sumw2"], inplace=True, axis="columns") + return df def generic_pandas(df, func, *args, **kwargs): From 3816f28081f9a1dee7439d2752ab48c6c7a7765e Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 7 May 2020 13:03:33 +0200 Subject: [PATCH 4/6] Add new stage to remove columns in a more convenient way --- fast_plotter/postproc/functions.py | 24 ++++++++++++++++++++++++ fast_plotter/postproc/stages.py | 5 +++++ tests/postproc/test_functions.py | 24 ++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py index 66f0129..cc8e1ce 100644 --- a/fast_plotter/postproc/functions.py +++ b/fast_plotter/postproc/functions.py @@ -152,6 +152,30 @@ def keep_specific_bins(df, axis, keep, expansions={}): return out_df +def filter_cols(df, items=None, like=None, regex=None, drop_not_keep=False): + """Filter out columns you want to keep. + + Parameters: + items (list-like): A list of column names to filter with + like (str, list[string]): A string or list of strings which will filter + columns where they are found in the column name + regex (str): A regular expression to match column names to + drop_not_keep (bool): Inverts the selection if true so that matched columns are dropped + """ + if not like or not isinstance(like, (tuple, list)): + df_filtered = df.filter(items=items, like=like, regex=regex) + elif like: + if items and like: + raise RuntimeError("Can only use one of 'items', 'like', or 'regex'") + filtered = [set(col for col in df.columns if i in col) for i in like] + filtered = set.union(*filtered) + df_filtered = df.filter(items=filtered, regex=regex) + + if drop_not_keep: + return df.drop(df_filtered.columns) + return df_filtered + + def combine_cols(df, format_strings, as_index=[]): """Combine columns together using format strings""" logger.info("Combining columns based on: %s", str(format_strings)) diff --git a/fast_plotter/postproc/stages.py b/fast_plotter/postproc/stages.py index d56f6c6..d97bc7c 100644 --- a/fast_plotter/postproc/stages.py +++ b/fast_plotter/postproc/stages.py @@ -111,6 +111,11 @@ class AssignCol(BaseManipulator): func = "assign_col" +class FilterCols(BaseManipulator): + cardinality = "one-to-one" + func = "filter_cols" + + class AssignDim(BaseManipulator): cardinality = "one-to-one" func = "assign_dim" diff --git a/tests/postproc/test_functions.py b/tests/postproc/test_functions.py index 89dd05a..a2fc89c 100644 --- a/tests/postproc/test_functions.py +++ b/tests/postproc/test_functions.py @@ -91,6 +91,30 @@ def test_split(binned_df): assert all([r[0].index.nlevels == 3 for r in results]) +def test_filter_cols(binned_df): + df = binned_df.index.to_frame() + + result = funcs.filter_cols(df, items=["int"]) + assert len(result.columns) == 1 + assert result.columns[0] == "int" + + result = funcs.filter_cols(df, items=["int", "cat"]) + assert len(result.columns) == 2 + assert set(result.columns) == set(("int", "cat")) + + result = funcs.filter_cols(df, like="int") + assert len(result.columns) == 2 + assert set(result.columns) == set(("int", "interval")) + + result = funcs.filter_cols(df, like=["int", "cat"]) + assert len(result.columns) == 3 + assert set(result.columns) == set(("int", "cat", "interval")) + + result = funcs.filter_cols(df, regex="^int.*") + assert len(result.columns) == 2 + assert set(result.columns) == set(("int", "interval")) + + # def test_reorder_dimensions(): # #def reorder_dimensions(df, order): # pass From 76bdfb2b7e7dda412f380038686013f1b84d9ab4 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 7 May 2020 13:07:50 +0200 Subject: [PATCH 5/6] Update CHANGELOG --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdab811..a9463e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.4] - 2020-05-07 +### Added +- New postprocessing stage to filter columns, PR #34 [@benkrikler](https://github.com/benkrikler) +- New option to AssignCols stage to make assignment the index, PR #34 + +## Fixed +- ToDatacardInputs had been broken in a previous update, PR #34 + ## [0.6.3] - 2020-04-29 ### Added - Add GenericPandas and UnstackWeights stages, PR #33 [@benkrikler](https://github.com/benkrikler) From 98fda7876868e80c66acb4749227b2344764b9a4 Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 7 May 2020 13:22:57 +0200 Subject: [PATCH 6/6] =?UTF-8?q?Bump=20version:=200.6.3=20=E2=86=92=200.6.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fast_plotter/version.py | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fast_plotter/version.py b/fast_plotter/version.py index f8e8cef..af940b7 100644 --- a/fast_plotter/version.py +++ b/fast_plotter/version.py @@ -12,5 +12,5 @@ def split_version(version): return tuple(result) -__version__ = '0.6.3' +__version__ = '0.6.4' version_info = split_version(__version__) # noqa diff --git a/setup.cfg b/setup.cfg index eb63e1d..6a4046c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.3 +current_version = 0.6.4 commit = True tag = False