Skip to content

Commit

Permalink
Merge branch 'main' into update_test.ipynb
Browse files Browse the repository at this point in the history
Signed-off-by: Jim-smith <jim-smith@users.noreply.github.com>
  • Loading branch information
jim-smith authored Oct 11, 2023
2 parents 03ff299 + 193754e commit cba0c16
Show file tree
Hide file tree
Showing 6 changed files with 496 additions and 243 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:

# Standard hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-merge-conflict
- id: end-of-file-fixer
Expand All @@ -26,7 +26,7 @@ repos:

# Check for spelling
- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
rev: v2.2.6
hooks:
- id: codespell
args: ["-L", "tre"]
Expand All @@ -39,7 +39,7 @@ repos:
# Upgrade old Python syntax
- repo: https://github.com/asottile/pyupgrade
rev: v3.13.0
rev: v3.15.0
hooks:
- id: pyupgrade
args: [--py310-plus]
Expand Down
36 changes: 33 additions & 3 deletions acro/acro_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
default, computes a frequency table of the factors unless an array of
values and an aggregation function are passed.
To provide consistent behaviour with different aggregation functions,
'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed.
Parameters
----------
index : array-like, Series, or list of arrays/Series
Expand Down Expand Up @@ -133,6 +136,28 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
dropna,
normalize,
)
# delete empty rows and columns from table
deleted_rows = []
deleted_cols = []
# define empty columns and rows using boolean masks
empty_cols_mask = table.sum(axis=0) == 0
empty_rows_mask = table.sum(axis=1) == 0

deleted_cols = list(table.columns[empty_cols_mask])
table = table.loc[:, ~empty_cols_mask]
deleted_rows = list(table.index[empty_rows_mask])
table = table.loc[~empty_rows_mask, :]

# create a message with the deleted column's names
comments = []
if deleted_cols:
msg_cols = ", ".join(str(col) for col in deleted_cols)
comments.append(f"Empty columns: {msg_cols} were deleted.")
if deleted_rows:
msg_rows = ", ".join(str(row) for row in deleted_rows)
comments.append(f"Empty rows: {msg_rows} were deleted.")
if comments:
logger.info(" ".join(comments))

masks = create_crosstab_masks(
index,
Expand Down Expand Up @@ -195,6 +220,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
summary=summary,
outcome=outcome,
output=[table],
comments=comments,
)
return table

Expand Down Expand Up @@ -548,10 +574,14 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals
normalize=normalize,
)

# drop empty columns and rows
if dropna or margins:
for col in t_values.columns:
if t_values[col].sum() == 0:
t_values = t_values.drop(col, axis=1)
empty_cols_mask = t_values.sum(axis=0) == 0
empty_rows_mask = t_values.sum(axis=1) == 0

t_values = t_values.loc[:, ~empty_cols_mask]
t_values = t_values.loc[~empty_rows_mask, :]

t_values = t_values < THRESHOLD
masks["threshold"] = t_values
# check for negative values -- currently unsupported
Expand Down
2 changes: 1 addition & 1 deletion docs/ACRO_For_Researchers.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ The finalise function will:

## Frequently Asked Questions
### What if I want to run my code many times before I decide exactly what to send for approval?
ACRO naturally suppors this way of working. It will not produce the output folder until you are satisfied and add acro.finalise() to the end of your script.
ACRO naturally supports this way of working. It will not produce the output folder until you are satisfied and add acro.finalise() to the end of your script.
### Why is my data exported as unformatted .csv files?
The outputs are saved in row format (as csv files) for the output checkers to check and make decisions. Although, you can change the format, if you like, the csv files should be there for the checking.
### Why is ACRO Python-based ‘under-the-hood’?
Expand Down
Loading

0 comments on commit cba0c16

Please sign in to comment.