Skip to content

Commit

Permalink
Merge pull request #34 from FAST-HEP/BK_small_improvements
Browse files Browse the repository at this point in the history
Several small improvements
  • Loading branch information
benkrikler authored May 7, 2020
2 parents 21e45e5 + 98fda78 commit 11ce33b
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 5 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.6.4] - 2020-05-07
### Added
- New postprocessing stage to filter columns, PR #34 [@benkrikler](https://github.com/benkrikler)
- New option to AssignCols stage to make assignment the index, PR #34

## Fixed
- ToDatacardInputs had been broken in a previous update, PR #34

## [0.6.3] - 2020-04-29
### Added
- Add GenericPandas and UnstackWeights stages, PR #33 [@benkrikler](https://github.com/benkrikler)
Expand Down
8 changes: 6 additions & 2 deletions fast_plotter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import matplotlib
matplotlib.use('Agg')
matplotlib.rcParams.update({'figure.autolayout': True})
from .version import __version__ # noqa
from .utils import read_binned_df, weighting_vars # noqa
from .utils import decipher_filename, mask_rows # noqa
Expand Down Expand Up @@ -134,8 +135,9 @@ def process_one_file(infile, args):
return ran_ok


def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={}, limits={}, **kwargs):
for main_ax, _ in plots.values():
def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
limits={}, xtickrotation=None, **kwargs):
for main_ax, summary_ax in plots.values():
add_annotations(annotations, main_ax)
if yscale:
main_ax.set_yscale(yscale)
Expand All @@ -151,6 +153,8 @@ def dress_main_plots(plots, annotations=[], yscale=None, ylabel=None, legend={},
getattr(main_ax, "set_%slim" % axis)(*lims)
elif lims.endswith("%"):
main_ax.margins(**{axis: float(lims[:-1])})
if xtickrotation:
matplotlib.pyplot.xticks(rotation=xtickrotation)


def save_plots(infile, weight, plots, outdir, extensions):
Expand Down
30 changes: 29 additions & 1 deletion fast_plotter/postproc/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,31 @@ def keep_specific_bins(df, axis, keep, expansions={}):
return out_df


def combine_cols(df, format_strings):
def filter_cols(df, items=None, like=None, regex=None, drop_not_keep=False):
"""Filter out columns you want to keep.
Parameters:
items (list-like): A list of column names to filter with
like (str, list[string]): A string or list of strings which will filter
columns where they are found in the column name
regex (str): A regular expression to match column names to
drop_not_keep (bool): Inverts the selection if true so that matched columns are dropped
"""
if not like or not isinstance(like, (tuple, list)):
df_filtered = df.filter(items=items, like=like, regex=regex)
elif like:
if items and like:
raise RuntimeError("Can only use one of 'items', 'like', or 'regex'")
filtered = [set(col for col in df.columns if i in col) for i in like]
filtered = set.union(*filtered)
df_filtered = df.filter(items=filtered, regex=regex)

if drop_not_keep:
return df.drop(df_filtered.columns)
return df_filtered


def combine_cols(df, format_strings, as_index=[]):
"""Combine columns together using format strings"""
logger.info("Combining columns based on: %s", str(format_strings))
result_names = list(format_strings.keys())
Expand All @@ -166,6 +190,8 @@ def apply_fmt(row):
results.columns = result_names
new_df = new_df.assign(**results)
new_df.set_index(index, inplace=True, drop=True)
if as_index:
new_df.set_index(as_index, inplace=True, append=True)
return new_df


Expand Down Expand Up @@ -288,6 +314,8 @@ def to_datacard_inputs(df, select_data, rename_syst_vars=False):
df["content"] = df.n
df["content"][~data_mask] = df.sumw
df["error"] = df.content / np.sqrt(df.n)
df.drop(["n", "sumw", "sumw2"], inplace=True, axis="columns")
return df


def generic_pandas(df, func, *args, **kwargs):
Expand Down
5 changes: 5 additions & 0 deletions fast_plotter/postproc/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ class AssignCol(BaseManipulator):
func = "assign_col"


class FilterCols(BaseManipulator):
cardinality = "one-to-one"
func = "filter_cols"


class AssignDim(BaseManipulator):
cardinality = "one-to-one"
func = "assign_dim"
Expand Down
2 changes: 1 addition & 1 deletion fast_plotter/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def split_version(version):
return tuple(result)


__version__ = '0.6.3'
__version__ = '0.6.4'
version_info = split_version(__version__) # noqa
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.3
current_version = 0.6.4
commit = True
tag = False

Expand Down
24 changes: 24 additions & 0 deletions tests/postproc/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,30 @@ def test_split(binned_df):
assert all([r[0].index.nlevels == 3 for r in results])


def test_filter_cols(binned_df):
df = binned_df.index.to_frame()

result = funcs.filter_cols(df, items=["int"])
assert len(result.columns) == 1
assert result.columns[0] == "int"

result = funcs.filter_cols(df, items=["int", "cat"])
assert len(result.columns) == 2
assert set(result.columns) == set(("int", "cat"))

result = funcs.filter_cols(df, like="int")
assert len(result.columns) == 2
assert set(result.columns) == set(("int", "interval"))

result = funcs.filter_cols(df, like=["int", "cat"])
assert len(result.columns) == 3
assert set(result.columns) == set(("int", "cat", "interval"))

result = funcs.filter_cols(df, regex="^int.*")
assert len(result.columns) == 2
assert set(result.columns) == set(("int", "interval"))


# def test_reorder_dimensions():
# #def reorder_dimensions(df, order):
# pass
Expand Down

0 comments on commit 11ce33b

Please sign in to comment.