diff --git a/acro/acro_tables.py b/acro/acro_tables.py
index cd50361..07aa229 100644
--- a/acro/acro_tables.py
+++ b/acro/acro_tables.py
@@ -70,6 +70,9 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
default, computes a frequency table of the factors unless an array of
values and an aggregation function are passed.
+ To provide consistent behaviour with different aggregation functions,
+ 'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed.
+
Parameters
----------
index : array-like, Series, or list of arrays/Series
@@ -133,6 +136,28 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
dropna,
normalize,
)
+ # delete empty rows and columns from table
+ deleted_rows = []
+ deleted_cols = []
+ # define empty columns and rows using boolean masks
+ empty_cols_mask = table.sum(axis=0) == 0
+ empty_rows_mask = table.sum(axis=1) == 0
+
+ deleted_cols = list(table.columns[empty_cols_mask])
+ table = table.loc[:, ~empty_cols_mask]
+ deleted_rows = list(table.index[empty_rows_mask])
+ table = table.loc[~empty_rows_mask, :]
+
+ # create a message with the deleted column's names
+ comments = []
+ if deleted_cols:
+ msg_cols = ", ".join(str(col) for col in deleted_cols)
+ comments.append(f"Empty columns: {msg_cols} were deleted.")
+ if deleted_rows:
+ msg_rows = ", ".join(str(row) for row in deleted_rows)
+ comments.append(f"Empty rows: {msg_rows} were deleted.")
+ if comments:
+ logger.info(" ".join(comments))
masks = create_crosstab_masks(
index,
@@ -195,6 +220,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
summary=summary,
outcome=outcome,
output=[table],
+ comments=comments,
)
return table
@@ -548,10 +574,14 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals
normalize=normalize,
)
+ # drop empty columns and rows
if dropna or margins:
- for col in t_values.columns:
- if t_values[col].sum() == 0:
- t_values = t_values.drop(col, axis=1)
+ empty_cols_mask = t_values.sum(axis=0) == 0
+ empty_rows_mask = t_values.sum(axis=1) == 0
+
+ t_values = t_values.loc[:, ~empty_cols_mask]
+ t_values = t_values.loc[~empty_rows_mask, :]
+
t_values = t_values < THRESHOLD
masks["threshold"] = t_values
# check for negative values -- currently unsupported
diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb
index feeda2b..b37af78 100644
--- a/notebooks/test.ipynb
+++ b/notebooks/test.ipynb
@@ -569,7 +569,7 @@
"id": "6d4730c4",
"metadata": {},
"source": [
- "### ACRO crosstab with supression"
+ "### ACRO crosstab with suppression"
]
},
{
@@ -708,15 +708,67 @@
"id": "0c695e09",
"metadata": {},
"source": [
- "### ACRO crosstab with supression and totals"
+ "### ACRO crosstab with suppression and totals"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"id": "ef42beb6",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:acro:[\"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"]\n",
+ "INFO:acro:get_summary(): fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n",
+ "INFO:acro:outcome_df:\n",
+ "------------------------------------------------------------------------------------------------------------------------------------------------|\n",
+ "grant_type |G |N |R |R/G |All|\n",
+ "survivor |Dead in 2015 Alive in 2015 |Alive in 2015 |Dead in 2015 Alive in 2015 |Alive in 2015 | |\n",
+ "year | | | | | |\n",
+ "------------------------------------------------------------------------------------------------------------------------------------------------|\n",
+ "2010 | threshold; p-ratio; nk-rule; ok | threshold; p-ratio; | ok ok | threshold; p-ratio; nk-rule; | ok|\n",
+ "2011 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n",
+ "2012 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n",
+ "2013 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n",
+ "2014 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n",
+ "2015 | threshold; p-ratio; nk-rule; threshold; | ok | ok ok | threshold; | ok|\n",
+ "All | ok ok | ok | ok ok | ok | ok|\n",
+ "------------------------------------------------------------------------------------------------------------------------------------------------|\n",
+ "\n",
+ "INFO:acro:records:add(): output_2\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "grant_type G N R \\\n",
+ "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n",
+ "year \n",
+ "2010 2 12 5 40 \n",
+ "2011 3 12 58 45 \n",
+ "2012 3 12 59 45 \n",
+ "2013 3 12 59 47 \n",
+ "2014 3 12 59 43 \n",
+ "2015 3 9 58 28 \n",
+ "All 17 69 298 248 \n",
+ "\n",
+ "grant_type R/G All \n",
+ "survivor Alive in 2015 Alive in 2015 \n",
+ "year \n",
+ "2010 20 4 83 \n",
+ "2011 24 8 150 \n",
+ "2012 24 8 151 \n",
+ "2013 24 8 153 \n",
+ "2014 24 8 149 \n",
+ "2015 23 8 129 \n",
+ "All 139 44 815 \n"
+ ]
+ }
+ ],
"source": [
"acro.suppress = False\n",
"table = acro.crosstab(\n",
@@ -731,7 +783,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 10,
"id": "506135e0",
"metadata": {},
"outputs": [],
@@ -749,7 +801,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"id": "4ae844a0",
"metadata": {},
"outputs": [
@@ -771,7 +823,7 @@
"2015 | ok | ok | ok | threshold; |\n",
"---------------------------------------------------------------------------|\n",
"\n",
- "INFO:acro:records:add(): output_2\n"
+ "INFO:acro:records:add(): output_3\n"
]
},
{
@@ -866,7 +918,7 @@
"2015 11133433.0 146572.187500 10812888.0 18278624.0"
]
},
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -907,7 +959,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"id": "bf132239",
"metadata": {},
"outputs": [
@@ -930,7 +982,7 @@
"All | | | | | |\n",
"-------------------------------------------------------|\n",
"\n",
- "INFO:acro:records:add(): output_3\n"
+ "INFO:acro:records:add(): output_4\n"
]
},
{
@@ -1042,7 +1094,7 @@
"All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5"
]
},
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1061,7 +1113,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 13,
"id": "7cc417a0",
"metadata": {},
"outputs": [],
@@ -1079,7 +1131,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 14,
"id": "15bcdc7c",
"metadata": {},
"outputs": [
@@ -1101,7 +1153,7 @@
"2015 | | negative | negative | |\n",
"----------------------------------------|\n",
"\n",
- "INFO:acro:records:add(): output_4\n"
+ "INFO:acro:records:add(): output_5\n"
]
},
{
@@ -1196,7 +1248,7 @@
"2015 11133433.0 146572.015625 10388613.0 18278624.0"
]
},
- "execution_count": 13,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -1219,7 +1271,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 15,
"id": "b13b5f7e",
"metadata": {},
"outputs": [
@@ -1241,7 +1293,7 @@
"All |\n",
"------------------------------------------------------------------|\n",
"\n",
- "INFO:acro:records:add(): output_5\n"
+ "INFO:acro:records:add(): output_6\n"
]
},
{
@@ -1367,7 +1419,7 @@
"All 839788672.0 4.888204e+09 "
]
},
- "execution_count": 14,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -1386,7 +1438,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 16,
"id": "3f016823",
"metadata": {},
"outputs": [
@@ -1503,7 +1555,7 @@
"All 839788672.0 4.888204e+09 "
]
},
- "execution_count": 15,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -1521,7 +1573,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 17,
"id": "6d4730c4",
"metadata": {},
"outputs": [
@@ -1542,7 +1594,7 @@
"R/G missing | missing |\n",
"---------------------------------|\n",
"\n",
- "INFO:acro:records:add(): output_6\n"
+ "INFO:acro:records:add(): output_7\n"
]
},
{
@@ -1619,7 +1671,7 @@
"R/G 1.664827e+07 1.583532e+07"
]
},
- "execution_count": 16,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -1641,7 +1693,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 18,
"id": "f3a87c20",
"metadata": {},
"outputs": [
@@ -1662,7 +1714,7 @@
"R/G missing | missing |\n",
"---------------------------------|\n",
"\n",
- "INFO:acro:records:add(): output_7\n"
+ "INFO:acro:records:add(): output_8\n"
]
},
{
@@ -1739,7 +1791,7 @@
"R/G 1.664827e+07 1.583532e+07"
]
},
- "execution_count": 17,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -1757,7 +1809,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 19,
"id": "8b603548",
"metadata": {},
"outputs": [],
@@ -1775,7 +1827,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 20,
"id": "de4266cd-b4d4-417b-ae44-5d972e8bfdde",
"metadata": {},
"outputs": [
@@ -1796,7 +1848,7 @@
"R/G | |\n",
"---------------------------------|\n",
"\n",
- "INFO:acro:records:add(): output_8\n"
+ "INFO:acro:records:add(): output_9\n"
]
},
{
@@ -1873,7 +1925,7 @@
"R/G 1.664827e+07 1.583532e+07"
]
},
- "execution_count": 19,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1897,7 +1949,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 21,
"id": "a521cb83",
"metadata": {},
"outputs": [
@@ -1905,8 +1957,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "INFO:acro:ols() outcome: pass; dof=807.0 >= 10\n",
- "INFO:acro:records:add(): output_9\n"
+ "INFO:acro:ols() outcome: pass; dof=807.0 >= 10\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:acro:records:add(): output_10\n"
]
},
{
@@ -1924,10 +1982,10 @@
"
Method: | Least Squares | F-statistic: | 2261. | \n",
"\n",
"\n",
- " Date: | Wed, 04 Oct 2023 | Prob (F-statistic): | 0.00 | \n",
+ " Date: | Thu, 05 Oct 2023 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
- " Time: | 16:08:40 | Log-Likelihood: | -14495. | \n",
+ " Time: | 18:04:09 | Log-Likelihood: | -14495. | \n",
"
\n",
"\n",
" No. Observations: | 811 | AIC: | 2.900e+04 | \n",
@@ -1982,8 +2040,8 @@
"Dep. Variable: inc_activity R-squared: 0.894\n",
"Model: OLS Adj. R-squared: 0.893\n",
"Method: Least Squares F-statistic: 2261.\n",
- "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.00\n",
- "Time: 16:08:40 Log-Likelihood: -14495.\n",
+ "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.00\n",
+ "Time: 18:04:09 Log-Likelihood: -14495.\n",
"No. Observations: 811 AIC: 2.900e+04\n",
"Df Residuals: 807 BIC: 2.902e+04\n",
"Df Model: 3 \n",
@@ -2009,7 +2067,7 @@
"\"\"\""
]
},
- "execution_count": 20,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -2036,7 +2094,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 22,
"id": "cc90f7c9",
"metadata": {},
"outputs": [
@@ -2044,8 +2102,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n",
- "INFO:acro:records:add(): output_10\n"
+ "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:acro:records:add(): output_11\n"
]
},
{
@@ -2063,10 +2127,10 @@
" Method: | Least Squares | F-statistic: | 2261. | \n",
"
\n",
"\n",
- " Date: | Wed, 04 Oct 2023 | Prob (F-statistic): | 0.00 | \n",
+ " Date: | Thu, 05 Oct 2023 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
- " Time: | 16:08:40 | Log-Likelihood: | -14495. | \n",
+ " Time: | 18:04:09 | Log-Likelihood: | -14495. | \n",
"
\n",
"\n",
" No. Observations: | 811 | AIC: | 2.900e+04 | \n",
@@ -2121,8 +2185,8 @@
"Dep. Variable: inc_activity R-squared: 0.894\n",
"Model: OLS Adj. R-squared: 0.893\n",
"Method: Least Squares F-statistic: 2261.\n",
- "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.00\n",
- "Time: 16:08:40 Log-Likelihood: -14495.\n",
+ "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.00\n",
+ "Time: 18:04:09 Log-Likelihood: -14495.\n",
"No. Observations: 811 AIC: 2.900e+04\n",
"Df Residuals: 807 BIC: 2.902e+04\n",
"Df Model: 3 \n",
@@ -2148,7 +2212,7 @@
"\"\"\""
]
},
- "execution_count": 21,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -2170,7 +2234,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 23,
"id": "5b1a1611",
"metadata": {},
"outputs": [
@@ -2178,8 +2242,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n",
- "INFO:acro:records:add(): output_11\n"
+ "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:acro:records:add(): output_12\n"
]
},
{
@@ -2206,10 +2276,10 @@
" Method: | MLE | Df Model: | 4 | \n",
"
\n",
"\n",
- " Date: | Wed, 04 Oct 2023 | Pseudo R-squ.: | 0.2140 | \n",
+ " Date: | Thu, 05 Oct 2023 | Pseudo R-squ.: | 0.2140 | \n",
"
\n",
"\n",
- " Time: | 16:08:40 | Log-Likelihood: | -400.46 | \n",
+ " Time: | 18:04:09 | Log-Likelihood: | -400.46 | \n",
"
\n",
"\n",
" converged: | True | LL-Null: | -509.50 | \n",
@@ -2247,8 +2317,8 @@
"Dep. Variable: survivor No. Observations: 811\n",
"Model: Probit Df Residuals: 806\n",
"Method: MLE Df Model: 4\n",
- "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 0.2140\n",
- "Time: 16:08:40 Log-Likelihood: -400.46\n",
+ "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 0.2140\n",
+ "Time: 18:04:09 Log-Likelihood: -400.46\n",
"converged: True LL-Null: -509.50\n",
"Covariance Type: nonrobust LLR p-value: 4.875e-46\n",
"=================================================================================\n",
@@ -2267,7 +2337,7 @@
"\"\"\""
]
},
- "execution_count": 22,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -2295,7 +2365,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 24,
"id": "dcf30f8f",
"metadata": {},
"outputs": [
@@ -2303,8 +2373,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n",
- "INFO:acro:records:add(): output_12\n"
+ "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:acro:records:add(): output_13\n"
]
},
{
@@ -2331,10 +2407,10 @@
" Method: | MLE | Df Model: | 4 | \n",
"
\n",
"\n",
- " Date: | Wed, 04 Oct 2023 | Pseudo R-squ.: | 0.2187 | \n",
+ " Date: | Thu, 05 Oct 2023 | Pseudo R-squ.: | 0.2187 | \n",
"
\n",
"\n",
- " Time: | 16:08:40 | Log-Likelihood: | -398.07 | \n",
+ " Time: | 18:04:09 | Log-Likelihood: | -398.07 | \n",
"
\n",
"\n",
" converged: | True | LL-Null: | -509.50 | \n",
@@ -2372,8 +2448,8 @@
"Dep. Variable: survivor No. Observations: 811\n",
"Model: Logit Df Residuals: 806\n",
"Method: MLE Df Model: 4\n",
- "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 0.2187\n",
- "Time: 16:08:40 Log-Likelihood: -398.07\n",
+ "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 0.2187\n",
+ "Time: 18:04:09 Log-Likelihood: -398.07\n",
"converged: True LL-Null: -509.50\n",
"Covariance Type: nonrobust LLR p-value: 4.532e-47\n",
"=================================================================================\n",
@@ -2392,7 +2468,7 @@
"\"\"\""
]
},
- "execution_count": 23,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -2412,7 +2488,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 25,
"id": "ec960039",
"metadata": {
"scrolled": true
@@ -2445,7 +2521,7 @@
"2013 15 59 71 8\n",
"2014 15 59 71 8\n",
"2015 15 59 71 8]\n",
- "timestamp: 2023-10-04T16:08:29.606127\n",
+ "timestamp: 2023-10-05T18:03:53.064163\n",
"comments: []\n",
"exception: \n",
"\n",
@@ -2472,7 +2548,7 @@
"2013 13557147.0 147937.796875 7202273.5 NaN\n",
"2014 13748147.0 133198.250000 8277525.5 NaN\n",
"2015 11133433.0 146572.187500 10812888.0 NaN]\n",
- "timestamp: 2023-10-04T16:08:29.806068\n",
+ "timestamp: 2023-10-05T18:03:54.913352\n",
"comments: []\n",
"exception: \n",
"\n",
@@ -2480,6 +2556,59 @@
"status: fail\n",
"type: table\n",
"properties: {'method': 'crosstab'}\n",
+ "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]]}}\n",
+ "command: table = acro.crosstab(\n",
+ "summary: fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n",
+ "outcome: grant_type G N \\\n",
+ "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n",
+ "year \n",
+ "2010 threshold; p-ratio; nk-rule; ok threshold; p-ratio; \n",
+ "2011 threshold; p-ratio; nk-rule; ok ok \n",
+ "2012 threshold; p-ratio; nk-rule; ok ok \n",
+ "2013 threshold; p-ratio; nk-rule; ok ok \n",
+ "2014 threshold; p-ratio; nk-rule; ok ok \n",
+ "2015 threshold; p-ratio; nk-rule; threshold; ok \n",
+ "All ok ok ok \n",
+ "\n",
+ "grant_type R R/G All \n",
+ "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n",
+ "year \n",
+ "2010 ok ok threshold; p-ratio; nk-rule; ok \n",
+ "2011 ok ok threshold; ok \n",
+ "2012 ok ok threshold; ok \n",
+ "2013 ok ok threshold; ok \n",
+ "2014 ok ok threshold; ok \n",
+ "2015 ok ok threshold; ok \n",
+ "All ok ok ok ok \n",
+ "output: [grant_type G N R \\\n",
+ "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n",
+ "year \n",
+ "2010 2 12 5 40 \n",
+ "2011 3 12 58 45 \n",
+ "2012 3 12 59 45 \n",
+ "2013 3 12 59 47 \n",
+ "2014 3 12 59 43 \n",
+ "2015 3 9 58 28 \n",
+ "All 17 69 298 248 \n",
+ "\n",
+ "grant_type R/G All \n",
+ "survivor Alive in 2015 Alive in 2015 \n",
+ "year \n",
+ "2010 20 4 83 \n",
+ "2011 24 8 150 \n",
+ "2012 24 8 151 \n",
+ "2013 24 8 153 \n",
+ "2014 24 8 149 \n",
+ "2015 23 8 129 \n",
+ "All 139 44 815 ]\n",
+ "timestamp: 2023-10-05T18:03:56.973956\n",
+ "comments: [\"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"]\n",
+ "exception: \n",
+ "\n",
+ "uid: output_3\n",
+ "status: fail\n",
+ "type: table\n",
+ "properties: {'method': 'crosstab'}\n",
"sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n",
"command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n",
"summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n",
@@ -2499,11 +2628,11 @@
"2013 13557147.0 147937.796875 7202273.5 16765625.0\n",
"2014 13748147.0 133198.250000 8277525.5 17845750.0\n",
"2015 11133433.0 146572.187500 10812888.0 18278624.0]\n",
- "timestamp: 2023-10-04T16:08:34.251644\n",
+ "timestamp: 2023-10-05T18:04:01.751627\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_3\n",
+ "uid: output_4\n",
"status: review\n",
"type: table\n",
"properties: {'method': 'crosstab'}\n",
@@ -2528,11 +2657,11 @@
"2014 13748147.0 135494.781250 8118565.0 17845750.0 6072600.0\n",
"2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n",
"All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5]\n",
- "timestamp: 2023-10-04T16:08:39.363301\n",
+ "timestamp: 2023-10-05T18:04:05.126101\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_4\n",
+ "uid: output_5\n",
"status: review\n",
"type: table\n",
"properties: {'method': 'crosstab'}\n",
@@ -2555,11 +2684,11 @@
"2013 13557147.0 147937.625000 6988263.5 16765625.0\n",
"2014 13748147.0 133198.078125 7997392.5 17845750.0\n",
"2015 11133433.0 146572.015625 10388613.0 18278624.0]\n",
- "timestamp: 2023-10-04T16:08:39.557302\n",
+ "timestamp: 2023-10-05T18:04:08.961665\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_5\n",
+ "uid: output_6\n",
"status: review\n",
"type: table\n",
"properties: {'method': 'pivot_table'}\n",
@@ -2591,11 +2720,11 @@
"R 551457280.0 3.134120e+09 \n",
"R/G 146228992.0 7.325240e+08 \n",
"All 839788672.0 4.888204e+09 ]\n",
- "timestamp: 2023-10-04T16:08:39.760564\n",
+ "timestamp: 2023-10-05T18:04:09.105670\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_6\n",
+ "uid: output_7\n",
"status: review\n",
"type: table\n",
"properties: {'method': 'pivot_table'}\n",
@@ -2616,11 +2745,11 @@
"N 1.344319e+05 1.988737e+05\n",
"R 8.098502e+06 3.204495e+07\n",
"R/G 1.664827e+07 1.583532e+07]\n",
- "timestamp: 2023-10-04T16:08:39.951135\n",
+ "timestamp: 2023-10-05T18:04:09.203761\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_7\n",
+ "uid: output_8\n",
"status: review\n",
"type: table\n",
"properties: {'method': 'pivot_table'}\n",
@@ -2641,11 +2770,11 @@
"N 1.364700e+05 1.999335e+05\n",
"R 8.006360e+06 3.228216e+07\n",
"R/G 1.664827e+07 1.583532e+07]\n",
- "timestamp: 2023-10-04T16:08:40.063744\n",
+ "timestamp: 2023-10-05T18:04:09.264100\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_8\n",
+ "uid: output_9\n",
"status: review\n",
"type: table\n",
"properties: {'method': 'pivot_table'}\n",
@@ -2666,11 +2795,11 @@
"N 1.341800e+05 1.990196e+05\n",
"R 7.882231e+06 3.204558e+07\n",
"R/G 1.664827e+07 1.583532e+07]\n",
- "timestamp: 2023-10-04T16:08:40.206681\n",
+ "timestamp: 2023-10-05T18:04:09.342995\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_9\n",
+ "uid: output_10\n",
"status: pass\n",
"type: regression\n",
"properties: {'method': 'ols', 'dof': 807.0}\n",
@@ -2684,8 +2813,8 @@
"Dep. Variable: \n",
"Model: OLS Adj. R-squared: 0.893\n",
"Method: Least Squares F-statistic: 2261.000\n",
- "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.000\n",
- "Time: 16:08:40 Log-Likelihood: -14495.000\n",
+ "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.000\n",
+ "Time: 18:04:09 Log-Likelihood: -14495.000\n",
"No. Observations: 811 AIC: 29000.000\n",
"Df Residuals: 807 BIC: 29020.000\n",
"Df Model: 3 NaN NaN\n",
@@ -2698,11 +2827,11 @@
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n",
"Skew: 9.899 Prob(JB): 0.000000e+00\n",
"Kurtosis: 194.566 Cond. No. 1.050000e+08]\n",
- "timestamp: 2023-10-04T16:08:40.319370\n",
+ "timestamp: 2023-10-05T18:04:09.406745\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_10\n",
+ "uid: output_11\n",
"status: pass\n",
"type: regression\n",
"properties: {'method': 'olsr', 'dof': 807.0}\n",
@@ -2716,8 +2845,8 @@
"Dep. Variable: \n",
"Model: OLS Adj. R-squared: 0.893\n",
"Method: Least Squares F-statistic: 2261.000\n",
- "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.000\n",
- "Time: 16:08:40 Log-Likelihood: -14495.000\n",
+ "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.000\n",
+ "Time: 18:04:09 Log-Likelihood: -14495.000\n",
"No. Observations: 811 AIC: 29000.000\n",
"Df Residuals: 807 BIC: 29020.000\n",
"Df Model: 3 NaN NaN\n",
@@ -2730,11 +2859,11 @@
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n",
"Skew: 9.899 Prob(JB): 0.000000e+00\n",
"Kurtosis: 194.566 Cond. No. 1.050000e+08]\n",
- "timestamp: 2023-10-04T16:08:40.431387\n",
+ "timestamp: 2023-10-05T18:04:09.449726\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_11\n",
+ "uid: output_12\n",
"status: pass\n",
"type: regression\n",
"properties: {'method': 'probit', 'dof': 806.0}\n",
@@ -2748,8 +2877,8 @@
"Dep. Variable: \n",
"Model: Probit Df Residuals: 8.060000e+02\n",
"Method: MLE Df Model: 4.000000e+00\n",
- "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 2.140000e-01\n",
- "Time: 16:08:40 Log-Likelihood: -4.004600e+02\n",
+ "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 2.140000e-01\n",
+ "Time: 18:04:09 Log-Likelihood: -4.004600e+02\n",
"converged: True LL-Null: -5.095000e+02\n",
"Covariance Type: nonrobust LLR p-value: 4.875000e-46, coef std err z P>|z| [0.025 \\\n",
"const 4.740000e-02 5.700000e-02 0.838 0.402 -6.300000e-02 \n",
@@ -2764,11 +2893,11 @@
"inc_grants 1.620000e-07 \n",
"inc_donations 3.300000e-07 \n",
"total_costs -1.440000e-08 ]\n",
- "timestamp: 2023-10-04T16:08:40.540353\n",
+ "timestamp: 2023-10-05T18:04:09.499724\n",
"comments: []\n",
"exception: \n",
"\n",
- "uid: output_12\n",
+ "uid: output_13\n",
"status: pass\n",
"type: regression\n",
"properties: {'method': 'logit', 'dof': 806.0}\n",
@@ -2782,8 +2911,8 @@
"Dep. Variable: \n",
"Model: Logit Df Residuals: 8.060000e+02\n",
"Method: MLE Df Model: 4.000000e+00\n",
- "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 2.187000e-01\n",
- "Time: 16:08:40 Log-Likelihood: -3.980700e+02\n",
+ "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 2.187000e-01\n",
+ "Time: 18:04:09 Log-Likelihood: -3.980700e+02\n",
"converged: True LL-Null: -5.095000e+02\n",
"Covariance Type: nonrobust LLR p-value: 4.532000e-47, coef std err z P>|z| [0.025 \\\n",
"const 5.120000e-02 9.100000e-02 0.561 0.575 -1.280000e-01 \n",
@@ -2798,7 +2927,7 @@
"inc_grants 2.660000e-07 \n",
"inc_donations 7.160000e-07 \n",
"total_costs -2.150000e-08 ]\n",
- "timestamp: 2023-10-04T16:08:40.616654\n",
+ "timestamp: 2023-10-05T18:04:09.537725\n",
"comments: []\n",
"exception: \n",
"\n",
@@ -2820,7 +2949,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 26,
"id": "b1f77749",
"metadata": {},
"outputs": [
@@ -2848,7 +2977,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 27,
"id": "45ec04ef",
"metadata": {},
"outputs": [
@@ -2874,7 +3003,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 28,
"id": "0c826271",
"metadata": {},
"outputs": [
@@ -2902,7 +3031,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 29,
"id": "2816eac7",
"metadata": {},
"outputs": [
@@ -2910,7 +3039,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "INFO:acro:records:add_custom(): output_13\n"
+ "INFO:acro:records:add_custom(): output_14\n"
]
}
],
@@ -2930,7 +3059,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 30,
"id": "f38b4334",
"metadata": {},
"outputs": [
@@ -2962,7 +3091,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 31,
"id": "9e554eea",
"metadata": {},
"outputs": [
@@ -2989,10 +3118,39 @@
" inc_grants inc_grants\n",
"grant_type \n",
"G 1.141279e+07 2.283220e+07\n",
+ "N 1.344319e+05 1.988737e+05\n",
+ "R 8.098502e+06 3.204495e+07\n",
+ "R/G 1.664827e+07 1.583532e+07]\n",
+ "timestamp: 2023-10-05T18:04:09.203761\n",
+ "comments: []\n",
+ "exception: \n",
+ "\n",
+ "The status of the record above is: review.\n",
+ "Please explain why an exception should be granted.\n",
+ "\n",
+ "INFO:acro:records:\n",
+ "uid: output_8\n",
+ "status: review\n",
+ "type: table\n",
+ "properties: {'method': 'pivot_table'}\n",
+ "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 8, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1]], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n",
+ "command: table = acro.pivot_table(\n",
+ "summary: review; missing values found\n",
+ "outcome: mean std\n",
+ " inc_grants inc_grants\n",
+ "grant_type \n",
+ "G missing missing\n",
+ "N missing missing\n",
+ "R missing missing\n",
+ "R/G missing missing\n",
+ "output: [ mean std\n",
+ " inc_grants inc_grants\n",
+ "grant_type \n",
+ "G 1.141279e+07 2.283220e+07\n",
"N 1.364700e+05 1.999335e+05\n",
"R 8.006360e+06 3.228216e+07\n",
"R/G 1.664827e+07 1.583532e+07]\n",
- "timestamp: 2023-10-04T16:08:40.063744\n",
+ "timestamp: 2023-10-05T18:04:09.264100\n",
"comments: []\n",
"exception: \n",
"\n",
@@ -3000,7 +3158,7 @@
"Please explain why an exception should be granted.\n",
"\n",
"INFO:acro:records:\n",
- "uid: output_8\n",
+ "uid: output_9\n",
"status: review\n",
"type: table\n",
"properties: {'method': 'pivot_table'}\n",
@@ -3021,7 +3179,7 @@
"N 1.341800e+05 1.990196e+05\n",
"R 7.882231e+06 3.204558e+07\n",
"R/G 1.664827e+07 1.583532e+07]\n",
- "timestamp: 2023-10-04T16:08:40.206681\n",
+ "timestamp: 2023-10-05T18:04:09.342995\n",
"comments: []\n",
"exception: \n",
"\n",
@@ -3033,34 +3191,60 @@
"status: fail\n",
"type: table\n",
"properties: {'method': 'crosstab'}\n",
- "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n",
- "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n",
- "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n",
- "outcome: grant_type G N R R/G\n",
- "year \n",
- "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; \n",
- "2011 ok ok ok threshold; \n",
- "2012 ok ok ok threshold; \n",
- "2013 ok ok ok threshold; \n",
- "2014 ok ok ok threshold; \n",
- "2015 ok ok ok threshold; \n",
- "output: [grant_type G N R R/G\n",
- "year \n",
- "2010 9921906.0 0.000000 8402284.0 11636000.0\n",
- "2011 8502247.0 124013.859375 7716880.0 16047500.0\n",
- "2012 11458580.0 131859.062500 6958050.5 16810000.0\n",
- "2013 13557147.0 147937.796875 7202273.5 16765625.0\n",
- "2014 13748147.0 133198.250000 8277525.5 17845750.0\n",
- "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n",
- "timestamp: 2023-10-04T16:08:34.251644\n",
- "comments: []\n",
+ "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]]}}\n",
+ "command: table = acro.crosstab(\n",
+ "summary: fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n",
+ "outcome: grant_type G N \\\n",
+ "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n",
+ "year \n",
+ "2010 threshold; p-ratio; nk-rule; ok threshold; p-ratio; \n",
+ "2011 threshold; p-ratio; nk-rule; ok ok \n",
+ "2012 threshold; p-ratio; nk-rule; ok ok \n",
+ "2013 threshold; p-ratio; nk-rule; ok ok \n",
+ "2014 threshold; p-ratio; nk-rule; ok ok \n",
+ "2015 threshold; p-ratio; nk-rule; threshold; ok \n",
+ "All ok ok ok \n",
+ "\n",
+ "grant_type R R/G All \n",
+ "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n",
+ "year \n",
+ "2010 ok ok threshold; p-ratio; nk-rule; ok \n",
+ "2011 ok ok threshold; ok \n",
+ "2012 ok ok threshold; ok \n",
+ "2013 ok ok threshold; ok \n",
+ "2014 ok ok threshold; ok \n",
+ "2015 ok ok threshold; ok \n",
+ "All ok ok ok ok \n",
+ "output: [grant_type G N R \\\n",
+ "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n",
+ "year \n",
+ "2010 2 12 5 40 \n",
+ "2011 3 12 58 45 \n",
+ "2012 3 12 59 45 \n",
+ "2013 3 12 59 47 \n",
+ "2014 3 12 59 43 \n",
+ "2015 3 9 58 28 \n",
+ "All 17 69 298 248 \n",
+ "\n",
+ "grant_type R/G All \n",
+ "survivor Alive in 2015 Alive in 2015 \n",
+ "year \n",
+ "2010 20 4 83 \n",
+ "2011 24 8 150 \n",
+ "2012 24 8 151 \n",
+ "2013 24 8 153 \n",
+ "2014 24 8 149 \n",
+ "2015 23 8 129 \n",
+ "All 139 44 815 ]\n",
+ "timestamp: 2023-10-05T18:03:56.973956\n",
+ "comments: [\"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"]\n",
"exception: \n",
"\n",
"The status of the record above is: fail.\n",
"Please explain why an exception should be granted.\n",
"\n",
"INFO:acro:records:\n",
- "uid: output_13\n",
+ "uid: output_14\n",
"status: review\n",
"type: custom\n",
"properties: {}\n",
@@ -3071,7 +3255,7 @@
"Columns: []\n",
"Index: []\n",
"output: ['XandY.jpeg']\n",
- "timestamp: 2023-10-04T16:08:40.800408\n",
+ "timestamp: 2023-10-05T18:04:09.660560\n",
"comments: ['This output is an image showing the relationship between X and Y']\n",
"exception: \n",
"\n",
@@ -3099,7 +3283,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 32,
"id": "f78b5a08",
"metadata": {},
"outputs": [
@@ -3115,16 +3299,17 @@
"output_10_2.csv\n",
"output_11_0.csv\n",
"output_11_1.csv\n",
+ "output_11_2.csv\n",
"output_12_0.csv\n",
"output_12_1.csv\n",
+ "output_13_0.csv\n",
+ "output_13_1.csv\n",
"output_3_0.csv\n",
"output_5_0.csv\n",
"output_6_0.csv\n",
"output_7_0.csv\n",
"output_8_0.csv\n",
"output_9_0.csv\n",
- "output_9_1.csv\n",
- "output_9_2.csv\n",
"pivot_table_0.csv\n",
"results.json\n"
]
@@ -3150,7 +3335,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 33,
"id": "df2a02e0",
"metadata": {},
"outputs": [
@@ -3165,16 +3350,17 @@
"output_10_2.csv.txt\n",
"output_11_0.csv.txt\n",
"output_11_1.csv.txt\n",
+ "output_11_2.csv.txt\n",
"output_12_0.csv.txt\n",
"output_12_1.csv.txt\n",
+ "output_13_0.csv.txt\n",
+ "output_13_1.csv.txt\n",
"output_3_0.csv.txt\n",
"output_5_0.csv.txt\n",
"output_6_0.csv.txt\n",
"output_7_0.csv.txt\n",
"output_8_0.csv.txt\n",
"output_9_0.csv.txt\n",
- "output_9_1.csv.txt\n",
- "output_9_2.csv.txt\n",
"pivot_table_0.csv.txt\n",
"results.json.txt\n"
]
@@ -3201,7 +3387,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 34,
"id": "56d2b6a1",
"metadata": {},
"outputs": [
@@ -3302,7 +3488,7 @@
" },\n",
" \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type)\",\n",
" \"summary\": \"fail; threshold: 6 cells may need suppressing; \",\n",
- " \"timestamp\": \"2023-10-04T16:08:29.606127\",\n",
+ " \"timestamp\": \"2023-10-05T18:03:53.064163\",\n",
" \"comments\": [\n",
" \"This is a cross table between year and grant_type\",\n",
" \"6 cells were suppressed in this table\"\n",
@@ -3311,7 +3497,7 @@
" },\n",
" \"output_3\": {\n",
" \"uid\": \"output_3\",\n",
- " \"status\": \"review\",\n",
+ " \"status\": \"fail\",\n",
" \"type\": \"table\",\n",
" \"properties\": {\n",
" \"method\": \"crosstab\"\n",
@@ -3323,64 +3509,205 @@
" \"summary\": {\n",
" \"suppressed\": false,\n",
" \"negative\": 0,\n",
- " \"missing\": 14,\n",
+ " \"missing\": 0,\n",
" \"threshold\": 7,\n",
" \"p-ratio\": 2,\n",
" \"nk-rule\": 1\n",
" },\n",
" \"cells\": {\n",
" \"negative\": [],\n",
- " \"missing\": [\n",
- " [\n",
- " 0,\n",
- " 0\n",
- " ],\n",
+ " \"missing\": [],\n",
+ " \"threshold\": [\n",
" [\n",
" 0,\n",
" 1\n",
" ],\n",
" [\n",
" 0,\n",
- " 2\n",
- " ],\n",
- " [\n",
- " 0,\n",
" 3\n",
" ],\n",
" [\n",
" 1,\n",
- " 1\n",
- " ],\n",
- " [\n",
- " 1,\n",
- " 2\n",
+ " 3\n",
" ],\n",
" [\n",
" 2,\n",
- " 2\n",
- " ],\n",
- " [\n",
- " 3,\n",
- " 1\n",
+ " 3\n",
" ],\n",
" [\n",
" 3,\n",
- " 2\n",
- " ],\n",
- " [\n",
- " 4,\n",
- " 1\n",
+ " 3\n",
" ],\n",
" [\n",
" 4,\n",
- " 2\n",
+ " 3\n",
" ],\n",
" [\n",
" 5,\n",
- " 0\n",
- " ],\n",
+ " 3\n",
+ " ]\n",
+ " ],\n",
+ " \"p-ratio\": [\n",
" [\n",
- " 5,\n",
+ " 0,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 0,\n",
+ " 3\n",
+ " ]\n",
+ " ],\n",
+ " \"nk-rule\": [\n",
+ " [\n",
+ " 0,\n",
+ " 3\n",
+ " ]\n",
+ " ]\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " ],\n",
+ " \"outcome\": {\n",
+ " \"G\": {\n",
+ " \"2010\": \"ok\",\n",
+ " \"2011\": \"ok\",\n",
+ " \"2012\": \"ok\",\n",
+ " \"2013\": \"ok\",\n",
+ " \"2014\": \"ok\",\n",
+ " \"2015\": \"ok\"\n",
+ " },\n",
+ " \"N\": {\n",
+ " \"2010\": \"threshold; p-ratio; \",\n",
+ " \"2011\": \"ok\",\n",
+ " \"2012\": \"ok\",\n",
+ " \"2013\": \"ok\",\n",
+ " \"2014\": \"ok\",\n",
+ " \"2015\": \"ok\"\n",
+ " },\n",
+ " \"R\": {\n",
+ " \"2010\": \"ok\",\n",
+ " \"2011\": \"ok\",\n",
+ " \"2012\": \"ok\",\n",
+ " \"2013\": \"ok\",\n",
+ " \"2014\": \"ok\",\n",
+ " \"2015\": \"ok\"\n",
+ " },\n",
+ " \"R/G\": {\n",
+ " \"2010\": \"threshold; p-ratio; nk-rule; \",\n",
+ " \"2011\": \"threshold; \",\n",
+ " \"2012\": \"threshold; \",\n",
+ " \"2013\": \"threshold; \",\n",
+ " \"2014\": \"threshold; \",\n",
+ " \"2015\": \"threshold; \"\n",
+ " }\n",
+ " },\n",
+ " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n",
+ " \"summary\": \"fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n",
+ " \"timestamp\": \"2023-10-05T18:04:01.751627\",\n",
+ " \"comments\": [],\n",
+ " \"exception\": \"This one is safe. Trust me, I'm a professor.\"\n",
+ " },\n",
+ " \"output_5\": {\n",
+ " \"uid\": \"output_5\",\n",
+ " \"status\": \"review\",\n",
+ " \"type\": \"table\",\n",
+ " \"properties\": {\n",
+ " \"method\": \"crosstab\"\n",
+ " },\n",
+ " \"files\": [\n",
+ " {\n",
+ " \"name\": \"output_5_0.csv\",\n",
+ " \"sdc\": {\n",
+ " \"summary\": {\n",
+ " \"suppressed\": false,\n",
+ " \"negative\": 10,\n",
+ " \"missing\": 11,\n",
+ " \"threshold\": 7,\n",
+ " \"p-ratio\": 2,\n",
+ " \"nk-rule\": 1\n",
+ " },\n",
+ " \"cells\": {\n",
+ " \"negative\": [\n",
+ " [\n",
+ " 0,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 1,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 3,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 3,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 4,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 4,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
+ " 2\n",
+ " ]\n",
+ " ],\n",
+ " \"missing\": [\n",
+ " [\n",
+ " 0,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 0,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 0,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 0,\n",
+ " 3\n",
+ " ],\n",
+ " [\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 1,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 4,\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
" 1\n",
" ],\n",
" [\n",
@@ -3440,59 +3767,46 @@
" ],\n",
" \"outcome\": {\n",
" \"G\": {\n",
- " \"2010\": \"missing\",\n",
+ " \"2010\": \"\",\n",
" \"2011\": \"\",\n",
" \"2012\": \"\",\n",
" \"2013\": \"\",\n",
" \"2014\": \"\",\n",
- " \"2015\": \"missing\",\n",
- " \"All\": \"\"\n",
+ " \"2015\": \"\"\n",
" },\n",
" \"N\": {\n",
- " \"2010\": \"missing\",\n",
- " \"2011\": \"missing\",\n",
+ " \"2010\": \"\",\n",
+ " \"2011\": \"negative\",\n",
" \"2012\": \"\",\n",
- " \"2013\": \"missing\",\n",
- " \"2014\": \"missing\",\n",
- " \"2015\": \"missing\",\n",
- " \"All\": \"\"\n",
+ " \"2013\": \"negative\",\n",
+ " \"2014\": \"negative\",\n",
+ " \"2015\": \"negative\"\n",
" },\n",
" \"R\": {\n",
- " \"2010\": \"missing\",\n",
- " \"2011\": \"missing\",\n",
- " \"2012\": \"missing\",\n",
- " \"2013\": \"missing\",\n",
- " \"2014\": \"missing\",\n",
- " \"2015\": \"missing\",\n",
- " \"All\": \"\"\n",
+ " \"2010\": \"negative\",\n",
+ " \"2011\": \"negative\",\n",
+ " \"2012\": \"negative\",\n",
+ " \"2013\": \"negative\",\n",
+ " \"2014\": \"negative\",\n",
+ " \"2015\": \"negative\"\n",
" },\n",
" \"R/G\": {\n",
- " \"2010\": \"missing\",\n",
- " \"2011\": \"\",\n",
- " \"2012\": \"\",\n",
- " \"2013\": \"\",\n",
- " \"2014\": \"\",\n",
- " \"2015\": \"\",\n",
- " \"All\": \"\"\n",
- " },\n",
- " \"All\": {\n",
" \"2010\": \"\",\n",
" \"2011\": \"\",\n",
" \"2012\": \"\",\n",
" \"2013\": \"\",\n",
" \"2014\": \"\",\n",
- " \"2015\": \"\",\n",
- " \"All\": \"\"\n",
+ " \"2015\": \"\"\n",
" }\n",
" },\n",
- " \"command\": \"safe_table = acro.crosstab(\",\n",
- " \"summary\": \"review; missing values found\",\n",
- " \"timestamp\": \"2023-10-04T16:08:39.363301\",\n",
+ " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\\\"mean\\\")\",\n",
+ " \"summary\": \"review; negative values found\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:08.961665\",\n",
" \"comments\": [],\n",
- " \"exception\": \"This one is safe. Trust me, I'm a professor.\"\n",
+ " \"exception\": \"It's not disclosive, I promise.\"\n",
" },\n",
- " \"output_5\": {\n",
- " \"uid\": \"output_5\",\n",
+ " \"output_6\": {\n",
+ " \"uid\": \"output_6\",\n",
" \"status\": \"review\",\n",
" \"type\": \"table\",\n",
" \"properties\": {\n",
@@ -3500,7 +3814,7 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_5_0.csv\",\n",
+ " \"name\": \"output_6_0.csv\",\n",
" \"sdc\": {\n",
" \"summary\": {\n",
" \"suppressed\": false,\n",
@@ -3661,12 +3975,12 @@
" },\n",
" \"command\": \"table = acro.pivot_table(\",\n",
" \"summary\": \"review; missing values found\",\n",
- " \"timestamp\": \"2023-10-04T16:08:39.760564\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.105670\",\n",
" \"comments\": [],\n",
- " \"exception\": \"It's not disclosive, I promise.\"\n",
+ " \"exception\": \"I need this one too\"\n",
" },\n",
- " \"output_6\": {\n",
- " \"uid\": \"output_6\",\n",
+ " \"output_7\": {\n",
+ " \"uid\": \"output_7\",\n",
" \"status\": \"review\",\n",
" \"type\": \"table\",\n",
" \"properties\": {\n",
@@ -3674,7 +3988,7 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_6_0.csv\",\n",
+ " \"name\": \"output_7_0.csv\",\n",
" \"sdc\": {\n",
" \"summary\": {\n",
" \"suppressed\": false,\n",
@@ -3743,12 +4057,12 @@
" },\n",
" \"command\": \"table = acro.pivot_table(\",\n",
" \"summary\": \"review; missing values found\",\n",
- " \"timestamp\": \"2023-10-04T16:08:39.951135\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.203761\",\n",
" \"comments\": [],\n",
- " \"exception\": \"I need this one too\"\n",
+ " \"exception\": \"yes\"\n",
" },\n",
- " \"output_7\": {\n",
- " \"uid\": \"output_7\",\n",
+ " \"output_8\": {\n",
+ " \"uid\": \"output_8\",\n",
" \"status\": \"review\",\n",
" \"type\": \"table\",\n",
" \"properties\": {\n",
@@ -3756,7 +4070,7 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_7_0.csv\",\n",
+ " \"name\": \"output_8_0.csv\",\n",
" \"sdc\": {\n",
" \"summary\": {\n",
" \"suppressed\": false,\n",
@@ -3825,12 +4139,12 @@
" },\n",
" \"command\": \"table = acro.pivot_table(\",\n",
" \"summary\": \"review; missing values found\",\n",
- " \"timestamp\": \"2023-10-04T16:08:40.063744\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.264100\",\n",
" \"comments\": [],\n",
- " \"exception\": \"y\"\n",
+ " \"exception\": \"yes\"\n",
" },\n",
- " \"output_8\": {\n",
- " \"uid\": \"output_8\",\n",
+ " \"output_9\": {\n",
+ " \"uid\": \"output_9\",\n",
" \"status\": \"review\",\n",
" \"type\": \"table\",\n",
" \"properties\": {\n",
@@ -3838,7 +4152,7 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_8_0.csv\",\n",
+ " \"name\": \"output_9_0.csv\",\n",
" \"sdc\": {\n",
" \"summary\": {\n",
" \"suppressed\": false,\n",
@@ -3924,12 +4238,12 @@
" },\n",
" \"command\": \"table = acro.pivot_table(\",\n",
" \"summary\": \"review; negative values found\",\n",
- " \"timestamp\": \"2023-10-04T16:08:40.206681\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.342995\",\n",
" \"comments\": [],\n",
- " \"exception\": \"y\"\n",
+ " \"exception\": \"yes\"\n",
" },\n",
- " \"output_9\": {\n",
- " \"uid\": \"output_9\",\n",
+ " \"output_10\": {\n",
+ " \"uid\": \"output_10\",\n",
" \"status\": \"pass\",\n",
" \"type\": \"regression\",\n",
" \"properties\": {\n",
@@ -3938,27 +4252,27 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_9_0.csv\",\n",
+ " \"name\": \"output_10_0.csv\",\n",
" \"sdc\": {}\n",
" },\n",
" {\n",
- " \"name\": \"output_9_1.csv\",\n",
+ " \"name\": \"output_10_1.csv\",\n",
" \"sdc\": {}\n",
" },\n",
" {\n",
- " \"name\": \"output_9_2.csv\",\n",
+ " \"name\": \"output_10_2.csv\",\n",
" \"sdc\": {}\n",
" }\n",
" ],\n",
" \"outcome\": {},\n",
" \"command\": \"results = acro.ols(y, x)\",\n",
" \"summary\": \"pass; dof=807.0 >= 10\",\n",
- " \"timestamp\": \"2023-10-04T16:08:40.319370\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.406745\",\n",
" \"comments\": [],\n",
" \"exception\": \"\"\n",
" },\n",
- " \"output_10\": {\n",
- " \"uid\": \"output_10\",\n",
+ " \"output_11\": {\n",
+ " \"uid\": \"output_11\",\n",
" \"status\": \"pass\",\n",
" \"type\": \"regression\",\n",
" \"properties\": {\n",
@@ -3967,27 +4281,27 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_10_0.csv\",\n",
+ " \"name\": \"output_11_0.csv\",\n",
" \"sdc\": {}\n",
" },\n",
" {\n",
- " \"name\": \"output_10_1.csv\",\n",
+ " \"name\": \"output_11_1.csv\",\n",
" \"sdc\": {}\n",
" },\n",
" {\n",
- " \"name\": \"output_10_2.csv\",\n",
+ " \"name\": \"output_11_2.csv\",\n",
" \"sdc\": {}\n",
" }\n",
" ],\n",
" \"outcome\": {},\n",
" \"command\": \"results = acro.olsr(\",\n",
" \"summary\": \"pass; dof=807.0 >= 10\",\n",
- " \"timestamp\": \"2023-10-04T16:08:40.431387\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.449726\",\n",
" \"comments\": [],\n",
" \"exception\": \"\"\n",
" },\n",
- " \"output_11\": {\n",
- " \"uid\": \"output_11\",\n",
+ " \"output_12\": {\n",
+ " \"uid\": \"output_12\",\n",
" \"status\": \"pass\",\n",
" \"type\": \"regression\",\n",
" \"properties\": {\n",
@@ -3996,23 +4310,23 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_11_0.csv\",\n",
+ " \"name\": \"output_12_0.csv\",\n",
" \"sdc\": {}\n",
" },\n",
" {\n",
- " \"name\": \"output_11_1.csv\",\n",
+ " \"name\": \"output_12_1.csv\",\n",
" \"sdc\": {}\n",
" }\n",
" ],\n",
" \"outcome\": {},\n",
" \"command\": \"results = acro.probit(y, x)\",\n",
" \"summary\": \"pass; dof=806.0 >= 10\",\n",
- " \"timestamp\": \"2023-10-04T16:08:40.540353\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.499724\",\n",
" \"comments\": [],\n",
" \"exception\": \"\"\n",
" },\n",
- " \"output_12\": {\n",
- " \"uid\": \"output_12\",\n",
+ " \"output_13\": {\n",
+ " \"uid\": \"output_13\",\n",
" \"status\": \"pass\",\n",
" \"type\": \"regression\",\n",
" \"properties\": {\n",
@@ -4021,18 +4335,18 @@
" },\n",
" \"files\": [\n",
" {\n",
- " \"name\": \"output_12_0.csv\",\n",
+ " \"name\": \"output_13_0.csv\",\n",
" \"sdc\": {}\n",
" },\n",
" {\n",
- " \"name\": \"output_12_1.csv\",\n",
+ " \"name\": \"output_13_1.csv\",\n",
" \"sdc\": {}\n",
" }\n",
" ],\n",
" \"outcome\": {},\n",
" \"command\": \"results = acro.logit(y, x)\",\n",
" \"summary\": \"pass; dof=806.0 >= 10\",\n",
- " \"timestamp\": \"2023-10-04T16:08:40.616654\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.537725\",\n",
" \"comments\": [],\n",
" \"exception\": \"\"\n",
" },\n",
@@ -4051,9 +4365,9 @@
" \"suppressed\": false,\n",
" \"negative\": 0,\n",
" \"missing\": 0,\n",
- " \"threshold\": 7,\n",
- " \"p-ratio\": 2,\n",
- " \"nk-rule\": 1\n",
+ " \"threshold\": 14,\n",
+ " \"p-ratio\": 8,\n",
+ " \"nk-rule\": 7\n",
" },\n",
" \"cells\": {\n",
" \"negative\": [],\n",
@@ -4061,47 +4375,123 @@
" \"threshold\": [\n",
" [\n",
" 0,\n",
- " 1\n",
+ " 0\n",
" ],\n",
" [\n",
" 0,\n",
- " 3\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 0,\n",
+ " 5\n",
" ],\n",
" [\n",
" 1,\n",
- " 3\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 1,\n",
+ " 5\n",
" ],\n",
" [\n",
" 2,\n",
- " 3\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 2,\n",
+ " 5\n",
" ],\n",
" [\n",
" 3,\n",
- " 3\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 3,\n",
+ " 5\n",
" ],\n",
" [\n",
" 4,\n",
- " 3\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 4,\n",
+ " 5\n",
" ],\n",
" [\n",
" 5,\n",
- " 3\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
+ " 1\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
+ " 5\n",
" ]\n",
" ],\n",
" \"p-ratio\": [\n",
" [\n",
" 0,\n",
- " 1\n",
+ " 0\n",
" ],\n",
" [\n",
" 0,\n",
- " 3\n",
+ " 2\n",
+ " ],\n",
+ " [\n",
+ " 0,\n",
+ " 5\n",
+ " ],\n",
+ " [\n",
+ " 1,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 2,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 3,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 4,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
+ " 0\n",
" ]\n",
" ],\n",
" \"nk-rule\": [\n",
" [\n",
" 0,\n",
- " 3\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 0,\n",
+ " 5\n",
+ " ],\n",
+ " [\n",
+ " 1,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 2,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 3,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 4,\n",
+ " 0\n",
+ " ],\n",
+ " [\n",
+ " 5,\n",
+ " 0\n",
" ]\n",
" ]\n",
" }\n",
@@ -4109,47 +4499,80 @@
" }\n",
" ],\n",
" \"outcome\": {\n",
- " \"G\": {\n",
+ " \"('G', 'Dead in 2015')\": {\n",
+ " \"2010\": \"threshold; p-ratio; nk-rule; \",\n",
+ " \"2011\": \"threshold; p-ratio; nk-rule; \",\n",
+ " \"2012\": \"threshold; p-ratio; nk-rule; \",\n",
+ " \"2013\": \"threshold; p-ratio; nk-rule; \",\n",
+ " \"2014\": \"threshold; p-ratio; nk-rule; \",\n",
+ " \"2015\": \"threshold; p-ratio; nk-rule; \",\n",
+ " \"All\": \"ok\"\n",
+ " },\n",
+ " \"('G', 'Alive in 2015')\": {\n",
" \"2010\": \"ok\",\n",
" \"2011\": \"ok\",\n",
" \"2012\": \"ok\",\n",
" \"2013\": \"ok\",\n",
" \"2014\": \"ok\",\n",
- " \"2015\": \"ok\"\n",
+ " \"2015\": \"threshold; \",\n",
+ " \"All\": \"ok\"\n",
" },\n",
- " \"N\": {\n",
+ " \"('N', 'Alive in 2015')\": {\n",
" \"2010\": \"threshold; p-ratio; \",\n",
" \"2011\": \"ok\",\n",
" \"2012\": \"ok\",\n",
" \"2013\": \"ok\",\n",
" \"2014\": \"ok\",\n",
- " \"2015\": \"ok\"\n",
+ " \"2015\": \"ok\",\n",
+ " \"All\": \"ok\"\n",
" },\n",
- " \"R\": {\n",
+ " \"('R', 'Dead in 2015')\": {\n",
" \"2010\": \"ok\",\n",
" \"2011\": \"ok\",\n",
" \"2012\": \"ok\",\n",
" \"2013\": \"ok\",\n",
" \"2014\": \"ok\",\n",
- " \"2015\": \"ok\"\n",
+ " \"2015\": \"ok\",\n",
+ " \"All\": \"ok\"\n",
" },\n",
- " \"R/G\": {\n",
+ " \"('R', 'Alive in 2015')\": {\n",
+ " \"2010\": \"ok\",\n",
+ " \"2011\": \"ok\",\n",
+ " \"2012\": \"ok\",\n",
+ " \"2013\": \"ok\",\n",
+ " \"2014\": \"ok\",\n",
+ " \"2015\": \"ok\",\n",
+ " \"All\": \"ok\"\n",
+ " },\n",
+ " \"('R/G', 'Alive in 2015')\": {\n",
" \"2010\": \"threshold; p-ratio; nk-rule; \",\n",
" \"2011\": \"threshold; \",\n",
" \"2012\": \"threshold; \",\n",
" \"2013\": \"threshold; \",\n",
" \"2014\": \"threshold; \",\n",
- " \"2015\": \"threshold; \"\n",
+ " \"2015\": \"threshold; \",\n",
+ " \"All\": \"ok\"\n",
+ " },\n",
+ " \"('All', '')\": {\n",
+ " \"2010\": \"ok\",\n",
+ " \"2011\": \"ok\",\n",
+ " \"2012\": \"ok\",\n",
+ " \"2013\": \"ok\",\n",
+ " \"2014\": \"ok\",\n",
+ " \"2015\": \"ok\",\n",
+ " \"All\": \"ok\"\n",
" }\n",
" },\n",
- " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n",
- " \"summary\": \"fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n",
- " \"timestamp\": \"2023-10-04T16:08:34.251644\",\n",
- " \"comments\": [],\n",
- " \"exception\": \"y\"\n",
+ " \"command\": \"table = acro.crosstab(\",\n",
+ " \"summary\": \"fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \",\n",
+ " \"timestamp\": \"2023-10-05T18:03:56.973956\",\n",
+ " \"comments\": [\n",
+ " \"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"\n",
+ " ],\n",
+ " \"exception\": \"yes\"\n",
" },\n",
- " \"output_13\": {\n",
- " \"uid\": \"output_13\",\n",
+ " \"output_14\": {\n",
+ " \"uid\": \"output_14\",\n",
" \"status\": \"review\",\n",
" \"type\": \"custom\",\n",
" \"properties\": {},\n",
@@ -4162,11 +4585,11 @@
" \"outcome\": {},\n",
" \"command\": \"custom\",\n",
" \"summary\": \"review\",\n",
- " \"timestamp\": \"2023-10-04T16:08:40.800408\",\n",
+ " \"timestamp\": \"2023-10-05T18:04:09.660560\",\n",
" \"comments\": [\n",
" \"This output is an image showing the relationship between X and Y\"\n",
" ],\n",
- " \"exception\": \"y\"\n",
+ " \"exception\": \"yes\"\n",
" }\n",
" }\n",
"}\n"
diff --git a/test/test_initial.py b/test/test_initial.py
index 9987aa8..ef46863 100644
--- a/test/test_initial.py
+++ b/test/test_initial.py
@@ -15,7 +15,6 @@
# pylint: disable=redefined-outer-name
PATH: str = "RES_PYTEST"
-RUN_TEST = False
@pytest.fixture
@@ -42,6 +41,36 @@ def test_crosstab_without_suppression(data):
assert 48 == output.output[0]["R/G"].sum()
+def test_crosstab_with_aggfunc_sum(data, acro):
+ """Test the crosstab with two columns and aggfunc sum."""
+ acro = ACRO(suppress=False)
+ _ = acro.crosstab(
+ data.year,
+ [data.grant_type, data.survivor],
+ values=data.inc_grants,
+ aggfunc="sum",
+ )
+ _ = acro.crosstab(
+ [data.grant_type, data.survivor],
+ data.year,
+ values=data.inc_grants,
+ aggfunc="sum",
+ )
+ acro.add_exception("output_0", "Let me have it")
+ acro.add_exception("output_1", "I need this output")
+ results: Records = acro.finalise()
+ output_0 = results.get_index(0)
+ output_1 = results.get_index(1)
+ comment_0 = (
+ "Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
+ )
+ comment_1 = (
+ "Empty rows: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
+ )
+ assert output_0.comments == [comment_0]
+ assert output_1.comments == [comment_1]
+
+
def test_crosstab_threshold(data, acro):
"""Crosstab threshold test."""
_ = acro.crosstab(data.year, data.grant_type)
@@ -572,7 +601,7 @@ def test_crosstab_with_totals_with_suppression(data, acro):
assert "R/G" not in output.output[0].columns
-def test_crosstab_with_totals_with_suppression_herichical(data, acro):
+def test_crosstab_with_totals_with_suppression_hierarchical(data, acro):
"""Test the crosstab with both margins and suppression are true."""
_ = acro.crosstab(
[data.year, data.survivor], [data.grant_type, data.status], margins=True
@@ -641,7 +670,7 @@ def test_crosstab_with_manual_totals_with_suppression(data, acro):
assert "R/G" in output.output[0].columns
-def test_crosstab_with_manual_totals_with_suppression_herichical(data, acro):
+def test_crosstab_with_manual_totals_with_suppression_hierarchical(data, acro):
"""Test the crosstab with both margins and suppression
are true with multilevel indexes and columns while using the total manual function.
"""
@@ -682,7 +711,7 @@ def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_mean(data, ac
assert "R/G" in output.output[0].columns
-def test_herichical_crosstab_with_manual_totals_with_mean(data, acro):
+def test_hierarchical_crosstab_with_manual_totals_with_mean(data, acro):
"""Test the crosstab with both margins and suppression are true, with
aggfunc mean and with multilevel columns and rows while using the total manual function.
"""
@@ -746,106 +775,96 @@ def test_pivot_table_with_totals_with_suppression(data, acro):
assert "R/G" not in output.output[0].columns
-if RUN_TEST:
+def test_crosstab_multiple_aggregate_function(data, acro):
+ """Crosstab with multiple agg funcs."""
+ acro = ACRO(suppress=False)
- def test_crosstab_with_sum(data, acro):
- """Test the crosstab with two columns and aggfunc sum."""
- acro = ACRO(suppress=False)
- _ = acro.crosstab(
- data.year,
- [data.grant_type, data.survivor],
- values=data.inc_grants,
- aggfunc="sum",
- )
- output = acro.results.get_index(0)
- assert (6, 8) == output.output[0].shape
+ _ = acro.crosstab(
+ data.year, data.grant_type, values=data.inc_grants, aggfunc=["mean", "std"]
+ )
+ output = acro.results.get_index(0)
+ correct_summary: str = (
+ "fail; threshold: 14 cells may need suppressing;"
+ " p-ratio: 4 cells may need suppressing; "
+ "nk-rule: 2 cells may need suppressing; "
+ )
+ assert (
+ output.summary == correct_summary
+ ), f"\n{output.summary}\n should be \n{correct_summary}\n"
+ print(f"{output.output[0]['mean'][ 'R/G'].sum()}")
+ correctval = 97383496.0
+ errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}"
+ assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg
- def test_crosstab_multiple_aggregate_function(data, acro):
- """Crosstab with multiple agg funcs."""
- acro = ACRO(suppress=False)
- _ = acro.crosstab(
- data.year, data.grant_type, values=data.inc_grants, aggfunc=["mean", "std"]
- )
- output = acro.results.get_index(0)
- correct_summary: str = (
- "fail; threshold: 14 cells may need suppressing;"
- " p-ratio: 4 cells may need suppressing; "
- "nk-rule: 2 cells may need suppressing; "
- )
- assert (
- output.summary == correct_summary
- ), f"\n{output.summary}\n should be \n{correct_summary}\n"
- print(f"{output.output[0]['mean'][ 'R/G'].sum()}")
- correctval = 97383496.0
- errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}"
- assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg
-
- def test_crosstab_with_totals_with_suppression_with_two_aggfuncs(data, acro):
- """Test the crosstab with both margins and suppression are true
- and with a list of aggfuncs while using the total manual function.
- """
- _ = acro.crosstab(
- data.year,
- data.grant_type,
- values=data.inc_grants,
- aggfunc=["count", "std"],
- margins=True,
- )
- _ = acro.crosstab(
- data.year,
- data.grant_type,
- values=data.inc_grants,
- aggfunc="count",
- margins=True,
- )
- _ = acro.crosstab(
- data.year,
- data.grant_type,
- values=data.inc_grants,
- aggfunc="std",
- margins=True,
- )
- output = acro.results.get_index(0)
- assert 8 == output.output[0].shape[1]
- output_1 = acro.results.get_index(1)
- output_2 = acro.results.get_index(2)
- output_3 = pd.concat([output_1.output[0], output_2.output[0]], axis=1)
- output_4 = (output.output[0]).droplevel(0, axis=1)
- assert output_3.equals(output_4)
-
- def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_herichical(
- data, acro
- ):
- """Test the crosstab with both margins and suppression are true
- and with a list of aggfuncs and a list of columns while using
- the total manual function.
- """
- _ = acro.crosstab(
- data.year,
- [data.grant_type, data.survivor],
- values=data.inc_grants,
- aggfunc=["count", "std"],
- margins=True,
- )
- output = acro.results.get_index(0)
- assert ("G", "Dead in 2015") in output.output[0].columns
-
- def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc(
- data, acro, caplog
- ):
- """Test the crosstab with both margins and suppression are true
- and with a list of aggfuncs while using the total manual function.
- """
- _ = acro.crosstab(
- data.year,
- data.grant_type,
- values=data.inc_grants,
- aggfunc=["count", "std"],
- margins=True,
- show_suppressed=True,
- )
- assert (
- "We can not calculate the margins with a list of aggregation functions. "
- "Please create a table for each aggregation function" in caplog.text
- )
+def test_crosstab_with_totals_with_suppression_with_two_aggfuncs(data, acro):
+ """Test the crosstab with both margins and suppression are true
+ and with a list of aggfuncs while using the total manual function.
+ """
+ _ = acro.crosstab(
+ data.year,
+ data.grant_type,
+ values=data.inc_grants,
+ aggfunc=["count", "std"],
+ margins=True,
+ )
+ _ = acro.crosstab(
+ data.year,
+ data.grant_type,
+ values=data.inc_grants,
+ aggfunc="count",
+ margins=True,
+ )
+ _ = acro.crosstab(
+ data.year,
+ data.grant_type,
+ values=data.inc_grants,
+ aggfunc="std",
+ margins=True,
+ )
+ output = acro.results.get_index(0)
+ assert 8 == output.output[0].shape[1]
+ output_1 = acro.results.get_index(1)
+ output_2 = acro.results.get_index(2)
+ output_3 = pd.concat([output_1.output[0], output_2.output[0]], axis=1)
+ output_4 = (output.output[0]).droplevel(0, axis=1)
+ assert output_3.equals(output_4)
+
+
+def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_hierarchical(
+ data, acro
+):
+ """Test the crosstab with both margins and suppression are true
+ and with a list of aggfuncs and a list of columns while using
+ the total manual function.
+ """
+ _ = acro.crosstab(
+ data.year,
+ [data.grant_type, data.survivor],
+ values=data.inc_grants,
+ aggfunc=["count", "std"],
+ margins=True,
+ )
+ output = acro.results.get_index(0)
+ assert ("count", "G", "Alive in 2015") in output.output[0].columns
+ assert ("std", "G", "Alive in 2015") in output.output[0].columns
+
+
+def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc(
+ data, acro, caplog
+):
+ """Test the crosstab with both margins and suppression are true
+ and with a list of aggfuncs while using the total manual function.
+ """
+ _ = acro.crosstab(
+ data.year,
+ data.grant_type,
+ values=data.inc_grants,
+ aggfunc=["count", "std"],
+ margins=True,
+ show_suppressed=True,
+ )
+ assert (
+ "We can not calculate the margins with a list of aggregation functions. "
+ "Please create a table for each aggregation function" in caplog.text
+ )