diff --git a/CHANGELOG.md b/CHANGELOG.md index 231a96d..2d819e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,15 @@ ## Development Changes: +* Update table suppression when totals are true for pivot table ([#165](https://github.com/AI-SDC/ACRO/pull/165)) +* Fix the problem of shape mismatch when there are two columns and the aggfunc is count or sum ([#167](https://github.com/AI-SDC/ACRO/pull/167)) +* Remove all files and folders created during testing ([#168](https://github.com/AI-SDC/ACRO/pull/168)) +* Create an example notebook with simple examples of acro ([#170](https://github.com/AI-SDC/ACRO/pull/170)) +* Add support for histogram ([#176](https://github.com/AI-SDC/ACRO/pull/176)) +* Add inherited members from acro_tables and acro_regression to the sphinx docs ([#177](https://github.com/AI-SDC/ACRO/pull/177)) +* Update the R help function ([#178](https://github.com/AI-SDC/ACRO/pull/178)) +* Update the finalise function by checking the provided folder name and ask for new one if it exists ([#179](https://github.com/AI-SDC/ACRO/pull/179)) +* Add histogram and survival analysis to R ([#182](https://github.com/AI-SDC/ACRO/pull/182)) ## Version 0.4.3 (Sep 22, 2023) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 0318f36..7988e0f 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -244,6 +244,9 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals (hierarchical indexes) on the index and columns of the result DataFrame. + To provide consistent behaviour with different aggregation functions, + 'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed. + Parameters ---------- data : DataFrame @@ -307,6 +310,29 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals sort, ) + # delete empty rows and columns from table + deleted_rows = [] + deleted_cols = [] + # define empty columns and rows using boolean masks + empty_cols_mask = table.sum(axis=0) == 0 + empty_rows_mask = table.sum(axis=1) == 0 + + deleted_cols = list(table.columns[empty_cols_mask]) + table = table.loc[:, ~empty_cols_mask] + deleted_rows = list(table.index[empty_rows_mask]) + table = table.loc[~empty_rows_mask, :] + + # create a message with the deleted column's names + comments = [] + if deleted_cols: + msg_cols = ", ".join(str(col) for col in deleted_cols) + comments.append(f"Empty columns: {msg_cols} were deleted.") + if deleted_rows: + msg_rows = ", ".join(str(row) for row in deleted_rows) + comments.append(f"Empty rows: {msg_rows} were deleted.") + if comments: + logger.info(" ".join(comments)) + # suppression masks to apply based on the following checks masks: dict[str, DataFrame] = {} @@ -387,6 +413,7 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals summary=summary, outcome=outcome, output=[table], + comments=comments, ) return table diff --git a/test/test_initial.py b/test/test_initial.py index aecfe63..66a5051 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -169,6 +169,39 @@ def test_pivot_table_cols(data, acro): shutil.rmtree(PATH) +def test_pivot_table_with_aggfunc_sum(data, acro): + """Test the pivot table with two columns and aggfunc sum.""" + acro = ACRO(suppress=False) + _ = acro.pivot_table( + data, + index="year", + columns=["grant_type", "survivor"], + values="inc_grants", + aggfunc="sum", + ) + _ = acro.pivot_table( + data, + index=["grant_type", "survivor"], + columns="year", + values="inc_grants", + aggfunc="sum", + ) + acro.add_exception("output_0", "Let me have it") + acro.add_exception("output_1", "I need this output") + results: Records = acro.finalise(PATH) + output_0 = results.get_index(0) + output_1 = results.get_index(1) + comment_0 = ( + "Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted." + ) + comment_1 = ( + "Empty rows: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted." + ) + assert output_0.comments == [comment_0] + assert output_1.comments == [comment_1] + shutil.rmtree(PATH) + + def test_ols(data, acro): """Ordinary Least Squares test.""" new_df = data[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]