Skip to content

Commit

Permalink
updating pivot_table
Browse files Browse the repository at this point in the history
  • Loading branch information
mahaalbashir committed Oct 31, 2023
1 parent d5c70b5 commit 4602179
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 0 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
## Development

Changes:
* Update table suppression when totals are true for pivot table ([#165](https://github.com/AI-SDC/ACRO/pull/165))
* Fix the problem of shape mismatch when there are two columns and the aggfunc is count or sum ([#167](https://github.com/AI-SDC/ACRO/pull/167))
* Remove all files and folders created during testing ([#168](https://github.com/AI-SDC/ACRO/pull/168))
* Create an example notebook with simple examples of acro ([#170](https://github.com/AI-SDC/ACRO/pull/170))
* Add support for histogram ([#176](https://github.com/AI-SDC/ACRO/pull/176))
* Add inherited members from acro_tables and acro_regression to the sphinx docs ([#177](https://github.com/AI-SDC/ACRO/pull/177))
* Update the R help function ([#178](https://github.com/AI-SDC/ACRO/pull/178))
* Update the finalise function by checking the provided folder name and ask for new one if it exists ([#179](https://github.com/AI-SDC/ACRO/pull/179))
* Add histogram and survival analysis to R ([#182](https://github.com/AI-SDC/ACRO/pull/182))

## Version 0.4.3 (Sep 22, 2023)

Expand Down
27 changes: 27 additions & 0 deletions acro/acro_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
(hierarchical indexes) on the index and columns of the result
DataFrame.
To provide consistent behaviour with different aggregation functions,
'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed.
Parameters
----------
data : DataFrame
Expand Down Expand Up @@ -307,6 +310,29 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
sort,
)

# delete empty rows and columns from table
deleted_rows = []
deleted_cols = []
# define empty columns and rows using boolean masks
empty_cols_mask = table.sum(axis=0) == 0
empty_rows_mask = table.sum(axis=1) == 0

deleted_cols = list(table.columns[empty_cols_mask])
table = table.loc[:, ~empty_cols_mask]
deleted_rows = list(table.index[empty_rows_mask])
table = table.loc[~empty_rows_mask, :]

# create a message with the deleted column's names
comments = []
if deleted_cols:
msg_cols = ", ".join(str(col) for col in deleted_cols)
comments.append(f"Empty columns: {msg_cols} were deleted.")
if deleted_rows:
msg_rows = ", ".join(str(row) for row in deleted_rows)
comments.append(f"Empty rows: {msg_rows} were deleted.")
if comments:
logger.info(" ".join(comments))

# suppression masks to apply based on the following checks
masks: dict[str, DataFrame] = {}

Expand Down Expand Up @@ -387,6 +413,7 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
summary=summary,
outcome=outcome,
output=[table],
comments=comments,
)
return table

Expand Down
33 changes: 33 additions & 0 deletions test/test_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,39 @@ def test_pivot_table_cols(data, acro):
shutil.rmtree(PATH)


def test_pivot_table_with_aggfunc_sum(data, acro):
"""Test the pivot table with two columns and aggfunc sum."""
acro = ACRO(suppress=False)
_ = acro.pivot_table(
data,
index="year",
columns=["grant_type", "survivor"],
values="inc_grants",
aggfunc="sum",
)
_ = acro.pivot_table(
data,
index=["grant_type", "survivor"],
columns="year",
values="inc_grants",
aggfunc="sum",
)
acro.add_exception("output_0", "Let me have it")
acro.add_exception("output_1", "I need this output")
results: Records = acro.finalise(PATH)
output_0 = results.get_index(0)
output_1 = results.get_index(1)
comment_0 = (
"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
)
comment_1 = (
"Empty rows: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
)
assert output_0.comments == [comment_0]
assert output_1.comments == [comment_1]
shutil.rmtree(PATH)


def test_ols(data, acro):
"""Ordinary Least Squares test."""
new_df = data[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
Expand Down

0 comments on commit 4602179

Please sign in to comment.