diff --git a/CHANGELOG.md b/CHANGELOG.md index bdab811..a9463e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.4] - 2020-05-07 +### Added +- New postprocessing stage to filter columns, PR #34 [@benkrikler](https://github.com/benkrikler) +- New option to AssignCols stage to make assignment the index, PR #34 + +## Fixed +- ToDatacardInputs had been broken in a previous update, PR #34 + ## [0.6.3] - 2020-04-29 ### Added - Add GenericPandas and UnstackWeights stages, PR #33 [@benkrikler](https://github.com/benkrikler) diff --git a/fast_plotter/__main__.py b/fast_plotter/__main__.py index 8bfe4c1..5a3c9cd 100644 --- a/fast_plotter/__main__.py +++ b/fast_plotter/__main__.py @@ -6,6 +6,7 @@ import logging import matplotlib matplotlib.use('Agg') +matplotlib.rcParams.update({'figure.autolayout': True}) from .version import __version__ # noqa from .utils import read_binned_df, weighting_vars # noqa from .utils import decipher_filename, mask_rows # noqa @@ -134,8 +135,9 @@ def process_one_file(infile, args): return ran_ok -def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, limits={}, **kwargs): - for main_ax, _ in plots.values(): +def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, + limits={}, xtickrotation=None, **kwargs): + for main_ax, summary_ax in plots.values(): add_annotations(annotations, main_ax) if yscale: main_ax.set_yscale(yscale) @@ -151,6 +153,8 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, getattr(main_ax, "set_%slim" % axis)(*lims) elif lims.endswith("%"): main_ax.margins(**{axis: float(lims[:-1])}) + if xtickrotation: + matplotlib.pyplot.xticks(rotation=xtickrotation) def save_plots(infile, weight, plots, outdir, extensions): diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py index 53de7ae..cc8e1ce 100644 --- a/fast_plotter/postproc/functions.py +++ b/fast_plotter/postproc/functions.py @@ -152,7 +152,31 @@ def keep_specific_bins(df, axis, keep, expansions={}): return out_df -def combine_cols(df, format_strings): +def filter_cols(df, items=None, like=None, regex=None, drop_not_keep=False): + """Filter out columns you want to keep. + + Parameters: + items (list-like): A list of column names to filter with + like (str, list[string]): A string or list of strings which will filter + columns where they are found in the column name + regex (str): A regular expression to match column names to + drop_not_keep (bool): Inverts the selection if true so that matched columns are dropped + """ + if not like or not isinstance(like, (tuple, list)): + df_filtered = df.filter(items=items, like=like, regex=regex) + elif like: + if items and like: + raise RuntimeError("Can only use one of 'items', 'like', or 'regex'") + filtered = [set(col for col in df.columns if i in col) for i in like] + filtered = set.union(*filtered) + df_filtered = df.filter(items=filtered, regex=regex) + + if drop_not_keep: + return df.drop(df_filtered.columns) + return df_filtered + + +def combine_cols(df, format_strings, as_index=[]): """Combine columns together using format strings""" logger.info("Combining columns based on: %s", str(format_strings)) result_names = list(format_strings.keys()) @@ -166,6 +190,8 @@ def apply_fmt(row): results.columns = result_names new_df = new_df.assign(**results) new_df.set_index(index, inplace=True, drop=True) + if as_index: + new_df.set_index(as_index, inplace=True, append=True) return new_df @@ -288,6 +314,8 @@ def to_datacard_inputs(df, select_data, rename_syst_vars=False): df["content"] = df.n df["content"][~data_mask] = df.sumw df["error"] = df.content / np.sqrt(df.n) + df.drop(["n", "sumw", "sumw2"], inplace=True, axis="columns") + return df def generic_pandas(df, func, *args, **kwargs): diff --git a/fast_plotter/postproc/stages.py b/fast_plotter/postproc/stages.py index d56f6c6..d97bc7c 100644 --- a/fast_plotter/postproc/stages.py +++ b/fast_plotter/postproc/stages.py @@ -111,6 +111,11 @@ class AssignCol(BaseManipulator): func = "assign_col" +class FilterCols(BaseManipulator): + cardinality = "one-to-one" + func = "filter_cols" + + class AssignDim(BaseManipulator): cardinality = "one-to-one" func = "assign_dim" diff --git a/fast_plotter/version.py b/fast_plotter/version.py index f8e8cef..af940b7 100644 --- a/fast_plotter/version.py +++ b/fast_plotter/version.py @@ -12,5 +12,5 @@ def split_version(version): return tuple(result) -__version__ = '0.6.3' +__version__ = '0.6.4' version_info = split_version(__version__) # noqa diff --git a/setup.cfg b/setup.cfg index eb63e1d..6a4046c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.3 +current_version = 0.6.4 commit = True tag = False diff --git a/tests/postproc/test_functions.py b/tests/postproc/test_functions.py index 89dd05a..a2fc89c 100644 --- a/tests/postproc/test_functions.py +++ b/tests/postproc/test_functions.py @@ -91,6 +91,30 @@ def test_split(binned_df): assert all([r[0].index.nlevels == 3 for r in results]) +def test_filter_cols(binned_df): + df = binned_df.index.to_frame() + + result = funcs.filter_cols(df, items=["int"]) + assert len(result.columns) == 1 + assert result.columns[0] == "int" + + result = funcs.filter_cols(df, items=["int", "cat"]) + assert len(result.columns) == 2 + assert set(result.columns) == set(("int", "cat")) + + result = funcs.filter_cols(df, like="int") + assert len(result.columns) == 2 + assert set(result.columns) == set(("int", "interval")) + + result = funcs.filter_cols(df, like=["int", "cat"]) + assert len(result.columns) == 3 + assert set(result.columns) == set(("int", "cat", "interval")) + + result = funcs.filter_cols(df, regex="^int.*") + assert len(result.columns) == 2 + assert set(result.columns) == set(("int", "interval")) + + # def test_reorder_dimensions(): # #def reorder_dimensions(df, order): # pass