diff --git a/acro/acro_tables.py b/acro/acro_tables.py index cd50361..07aa229 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -70,6 +70,9 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals default, computes a frequency table of the factors unless an array of values and an aggregation function are passed. + To provide consistent behaviour with different aggregation functions, + 'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed. + Parameters ---------- index : array-like, Series, or list of arrays/Series @@ -133,6 +136,28 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals dropna, normalize, ) + # delete empty rows and columns from table + deleted_rows = [] + deleted_cols = [] + # define empty columns and rows using boolean masks + empty_cols_mask = table.sum(axis=0) == 0 + empty_rows_mask = table.sum(axis=1) == 0 + + deleted_cols = list(table.columns[empty_cols_mask]) + table = table.loc[:, ~empty_cols_mask] + deleted_rows = list(table.index[empty_rows_mask]) + table = table.loc[~empty_rows_mask, :] + + # create a message with the deleted column's names + comments = [] + if deleted_cols: + msg_cols = ", ".join(str(col) for col in deleted_cols) + comments.append(f"Empty columns: {msg_cols} were deleted.") + if deleted_rows: + msg_rows = ", ".join(str(row) for row in deleted_rows) + comments.append(f"Empty rows: {msg_rows} were deleted.") + if comments: + logger.info(" ".join(comments)) masks = create_crosstab_masks( index, @@ -195,6 +220,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals summary=summary, outcome=outcome, output=[table], + comments=comments, ) return table @@ -548,10 +574,14 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals normalize=normalize, ) + # drop empty columns and rows if dropna or margins: - for col in t_values.columns: - if t_values[col].sum() == 0: - t_values = t_values.drop(col, axis=1) + empty_cols_mask = t_values.sum(axis=0) == 0 + empty_rows_mask = t_values.sum(axis=1) == 0 + + t_values = t_values.loc[:, ~empty_cols_mask] + t_values = t_values.loc[~empty_rows_mask, :] + t_values = t_values < THRESHOLD masks["threshold"] = t_values # check for negative values -- currently unsupported diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index feeda2b..b37af78 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -569,7 +569,7 @@ "id": "6d4730c4", "metadata": {}, "source": [ - "### ACRO crosstab with supression" + "### ACRO crosstab with suppression" ] }, { @@ -708,15 +708,67 @@ "id": "0c695e09", "metadata": {}, "source": [ - "### ACRO crosstab with supression and totals" + "### ACRO crosstab with suppression and totals" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "ef42beb6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:[\"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"]\n", + "INFO:acro:get_summary(): fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n", + "INFO:acro:outcome_df:\n", + "------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "grant_type |G |N |R |R/G |All|\n", + "survivor |Dead in 2015 Alive in 2015 |Alive in 2015 |Dead in 2015 Alive in 2015 |Alive in 2015 | |\n", + "year | | | | | |\n", + "------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "2010 | threshold; p-ratio; nk-rule; ok | threshold; p-ratio; | ok ok | threshold; p-ratio; nk-rule; | ok|\n", + "2011 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2012 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2013 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2014 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2015 | threshold; p-ratio; nk-rule; threshold; | ok | ok ok | threshold; | ok|\n", + "All | ok ok | ok | ok ok | ok | ok|\n", + "------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "\n", + "INFO:acro:records:add(): output_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "grant_type G N R \\\n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n", + "year \n", + "2010 2 12 5 40 \n", + "2011 3 12 58 45 \n", + "2012 3 12 59 45 \n", + "2013 3 12 59 47 \n", + "2014 3 12 59 43 \n", + "2015 3 9 58 28 \n", + "All 17 69 298 248 \n", + "\n", + "grant_type R/G All \n", + "survivor Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 20 4 83 \n", + "2011 24 8 150 \n", + "2012 24 8 151 \n", + "2013 24 8 153 \n", + "2014 24 8 149 \n", + "2015 23 8 129 \n", + "All 139 44 815 \n" + ] + } + ], "source": [ "acro.suppress = False\n", "table = acro.crosstab(\n", @@ -731,7 +783,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "506135e0", "metadata": {}, "outputs": [], @@ -749,7 +801,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "4ae844a0", "metadata": {}, "outputs": [ @@ -771,7 +823,7 @@ "2015 | ok | ok | ok | threshold; |\n", "---------------------------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_2\n" + "INFO:acro:records:add(): output_3\n" ] }, { @@ -866,7 +918,7 @@ "2015 11133433.0 146572.187500 10812888.0 18278624.0" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -907,7 +959,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "bf132239", "metadata": {}, "outputs": [ @@ -930,7 +982,7 @@ "All | | | | | |\n", "-------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_3\n" + "INFO:acro:records:add(): output_4\n" ] }, { @@ -1042,7 +1094,7 @@ "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1061,7 +1113,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "7cc417a0", "metadata": {}, "outputs": [], @@ -1079,7 +1131,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "15bcdc7c", "metadata": {}, "outputs": [ @@ -1101,7 +1153,7 @@ "2015 | | negative | negative | |\n", "----------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_4\n" + "INFO:acro:records:add(): output_5\n" ] }, { @@ -1196,7 +1248,7 @@ "2015 11133433.0 146572.015625 10388613.0 18278624.0" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1219,7 +1271,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "b13b5f7e", "metadata": {}, "outputs": [ @@ -1241,7 +1293,7 @@ "All |\n", "------------------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_5\n" + "INFO:acro:records:add(): output_6\n" ] }, { @@ -1367,7 +1419,7 @@ "All 839788672.0 4.888204e+09 " ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1386,7 +1438,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "3f016823", "metadata": {}, "outputs": [ @@ -1503,7 +1555,7 @@ "All 839788672.0 4.888204e+09 " ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1521,7 +1573,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "6d4730c4", "metadata": {}, "outputs": [ @@ -1542,7 +1594,7 @@ "R/G missing | missing |\n", "---------------------------------|\n", "\n", - "INFO:acro:records:add(): output_6\n" + "INFO:acro:records:add(): output_7\n" ] }, { @@ -1619,7 +1671,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1641,7 +1693,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "f3a87c20", "metadata": {}, "outputs": [ @@ -1662,7 +1714,7 @@ "R/G missing | missing |\n", "---------------------------------|\n", "\n", - "INFO:acro:records:add(): output_7\n" + "INFO:acro:records:add(): output_8\n" ] }, { @@ -1739,7 +1791,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1757,7 +1809,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "8b603548", "metadata": {}, "outputs": [], @@ -1775,7 +1827,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "de4266cd-b4d4-417b-ae44-5d972e8bfdde", "metadata": {}, "outputs": [ @@ -1796,7 +1848,7 @@ "R/G | |\n", "---------------------------------|\n", "\n", - "INFO:acro:records:add(): output_8\n" + "INFO:acro:records:add(): output_9\n" ] }, { @@ -1873,7 +1925,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1897,7 +1949,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "a521cb83", "metadata": {}, "outputs": [ @@ -1905,8 +1957,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:ols() outcome: pass; dof=807.0 >= 10\n", - "INFO:acro:records:add(): output_9\n" + "INFO:acro:ols() outcome: pass; dof=807.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:records:add(): output_10\n" ] }, { @@ -1924,10 +1982,10 @@ " Method: Least Squares F-statistic: 2261. \n", "\n", "\n", - " Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.00 \n", + " Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 16:08:40 Log-Likelihood: -14495. \n", + " Time: 18:04:09 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -1982,8 +2040,8 @@ "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", - "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.00\n", - "Time: 16:08:40 Log-Likelihood: -14495.\n", + "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.00\n", + "Time: 18:04:09 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -2009,7 +2067,7 @@ "\"\"\"" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2036,7 +2094,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "cc90f7c9", "metadata": {}, "outputs": [ @@ -2044,8 +2102,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n", - "INFO:acro:records:add(): output_10\n" + "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:records:add(): output_11\n" ] }, { @@ -2063,10 +2127,10 @@ " Method: Least Squares F-statistic: 2261. \n", "\n", "\n", - " Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.00 \n", + " Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 16:08:40 Log-Likelihood: -14495. \n", + " Time: 18:04:09 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -2121,8 +2185,8 @@ "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", - "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.00\n", - "Time: 16:08:40 Log-Likelihood: -14495.\n", + "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.00\n", + "Time: 18:04:09 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -2148,7 +2212,7 @@ "\"\"\"" ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2170,7 +2234,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "5b1a1611", "metadata": {}, "outputs": [ @@ -2178,8 +2242,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n", - "INFO:acro:records:add(): output_11\n" + "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:records:add(): output_12\n" ] }, { @@ -2206,10 +2276,10 @@ " Method: MLE Df Model: 4 \n", "\n", "\n", - " Date: Wed, 04 Oct 2023 Pseudo R-squ.: 0.2140 \n", + " Date: Thu, 05 Oct 2023 Pseudo R-squ.: 0.2140 \n", "\n", "\n", - " Time: 16:08:40 Log-Likelihood: -400.46 \n", + " Time: 18:04:09 Log-Likelihood: -400.46 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2247,8 +2317,8 @@ "Dep. Variable: survivor No. Observations: 811\n", "Model: Probit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", - "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 0.2140\n", - "Time: 16:08:40 Log-Likelihood: -400.46\n", + "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 0.2140\n", + "Time: 18:04:09 Log-Likelihood: -400.46\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.875e-46\n", "=================================================================================\n", @@ -2267,7 +2337,7 @@ "\"\"\"" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -2295,7 +2365,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "id": "dcf30f8f", "metadata": {}, "outputs": [ @@ -2303,8 +2373,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n", - "INFO:acro:records:add(): output_12\n" + "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:records:add(): output_13\n" ] }, { @@ -2331,10 +2407,10 @@ " Method: MLE Df Model: 4 \n", "\n", "\n", - " Date: Wed, 04 Oct 2023 Pseudo R-squ.: 0.2187 \n", + " Date: Thu, 05 Oct 2023 Pseudo R-squ.: 0.2187 \n", "\n", "\n", - " Time: 16:08:40 Log-Likelihood: -398.07 \n", + " Time: 18:04:09 Log-Likelihood: -398.07 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2372,8 +2448,8 @@ "Dep. Variable: survivor No. Observations: 811\n", "Model: Logit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", - "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 0.2187\n", - "Time: 16:08:40 Log-Likelihood: -398.07\n", + "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 0.2187\n", + "Time: 18:04:09 Log-Likelihood: -398.07\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.532e-47\n", "=================================================================================\n", @@ -2392,7 +2468,7 @@ "\"\"\"" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -2412,7 +2488,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "id": "ec960039", "metadata": { "scrolled": true @@ -2445,7 +2521,7 @@ "2013 15 59 71 8\n", "2014 15 59 71 8\n", "2015 15 59 71 8]\n", - "timestamp: 2023-10-04T16:08:29.606127\n", + "timestamp: 2023-10-05T18:03:53.064163\n", "comments: []\n", "exception: \n", "\n", @@ -2472,7 +2548,7 @@ "2013 13557147.0 147937.796875 7202273.5 NaN\n", "2014 13748147.0 133198.250000 8277525.5 NaN\n", "2015 11133433.0 146572.187500 10812888.0 NaN]\n", - "timestamp: 2023-10-04T16:08:29.806068\n", + "timestamp: 2023-10-05T18:03:54.913352\n", "comments: []\n", "exception: \n", "\n", @@ -2480,6 +2556,59 @@ "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]]}}\n", + "command: table = acro.crosstab(\n", + "summary: fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n", + "outcome: grant_type G N \\\n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 threshold; p-ratio; nk-rule; ok threshold; p-ratio; \n", + "2011 threshold; p-ratio; nk-rule; ok ok \n", + "2012 threshold; p-ratio; nk-rule; ok ok \n", + "2013 threshold; p-ratio; nk-rule; ok ok \n", + "2014 threshold; p-ratio; nk-rule; ok ok \n", + "2015 threshold; p-ratio; nk-rule; threshold; ok \n", + "All ok ok ok \n", + "\n", + "grant_type R R/G All \n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 ok ok threshold; p-ratio; nk-rule; ok \n", + "2011 ok ok threshold; ok \n", + "2012 ok ok threshold; ok \n", + "2013 ok ok threshold; ok \n", + "2014 ok ok threshold; ok \n", + "2015 ok ok threshold; ok \n", + "All ok ok ok ok \n", + "output: [grant_type G N R \\\n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n", + "year \n", + "2010 2 12 5 40 \n", + "2011 3 12 58 45 \n", + "2012 3 12 59 45 \n", + "2013 3 12 59 47 \n", + "2014 3 12 59 43 \n", + "2015 3 9 58 28 \n", + "All 17 69 298 248 \n", + "\n", + "grant_type R/G All \n", + "survivor Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 20 4 83 \n", + "2011 24 8 150 \n", + "2012 24 8 151 \n", + "2013 24 8 153 \n", + "2014 24 8 149 \n", + "2015 23 8 129 \n", + "All 139 44 815 ]\n", + "timestamp: 2023-10-05T18:03:56.973956\n", + "comments: [\"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"]\n", + "exception: \n", + "\n", + "uid: output_3\n", + "status: fail\n", + "type: table\n", + "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", @@ -2499,11 +2628,11 @@ "2013 13557147.0 147937.796875 7202273.5 16765625.0\n", "2014 13748147.0 133198.250000 8277525.5 17845750.0\n", "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n", - "timestamp: 2023-10-04T16:08:34.251644\n", + "timestamp: 2023-10-05T18:04:01.751627\n", "comments: []\n", "exception: \n", "\n", - "uid: output_3\n", + "uid: output_4\n", "status: review\n", "type: table\n", "properties: {'method': 'crosstab'}\n", @@ -2528,11 +2657,11 @@ "2014 13748147.0 135494.781250 8118565.0 17845750.0 6072600.0\n", "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5]\n", - "timestamp: 2023-10-04T16:08:39.363301\n", + "timestamp: 2023-10-05T18:04:05.126101\n", "comments: []\n", "exception: \n", "\n", - "uid: output_4\n", + "uid: output_5\n", "status: review\n", "type: table\n", "properties: {'method': 'crosstab'}\n", @@ -2555,11 +2684,11 @@ "2013 13557147.0 147937.625000 6988263.5 16765625.0\n", "2014 13748147.0 133198.078125 7997392.5 17845750.0\n", "2015 11133433.0 146572.015625 10388613.0 18278624.0]\n", - "timestamp: 2023-10-04T16:08:39.557302\n", + "timestamp: 2023-10-05T18:04:08.961665\n", "comments: []\n", "exception: \n", "\n", - "uid: output_5\n", + "uid: output_6\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", @@ -2591,11 +2720,11 @@ "R 551457280.0 3.134120e+09 \n", "R/G 146228992.0 7.325240e+08 \n", "All 839788672.0 4.888204e+09 ]\n", - "timestamp: 2023-10-04T16:08:39.760564\n", + "timestamp: 2023-10-05T18:04:09.105670\n", "comments: []\n", "exception: \n", "\n", - "uid: output_6\n", + "uid: output_7\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", @@ -2616,11 +2745,11 @@ "N 1.344319e+05 1.988737e+05\n", "R 8.098502e+06 3.204495e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-10-04T16:08:39.951135\n", + "timestamp: 2023-10-05T18:04:09.203761\n", "comments: []\n", "exception: \n", "\n", - "uid: output_7\n", + "uid: output_8\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", @@ -2641,11 +2770,11 @@ "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-10-04T16:08:40.063744\n", + "timestamp: 2023-10-05T18:04:09.264100\n", "comments: []\n", "exception: \n", "\n", - "uid: output_8\n", + "uid: output_9\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", @@ -2666,11 +2795,11 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-10-04T16:08:40.206681\n", + "timestamp: 2023-10-05T18:04:09.342995\n", "comments: []\n", "exception: \n", "\n", - "uid: output_9\n", + "uid: output_10\n", "status: pass\n", "type: regression\n", "properties: {'method': 'ols', 'dof': 807.0}\n", @@ -2684,8 +2813,8 @@ "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", - "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.000\n", - "Time: 16:08:40 Log-Likelihood: -14495.000\n", + "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.000\n", + "Time: 18:04:09 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2698,11 +2827,11 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-10-04T16:08:40.319370\n", + "timestamp: 2023-10-05T18:04:09.406745\n", "comments: []\n", "exception: \n", "\n", - "uid: output_10\n", + "uid: output_11\n", "status: pass\n", "type: regression\n", "properties: {'method': 'olsr', 'dof': 807.0}\n", @@ -2716,8 +2845,8 @@ "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", - "Date: Wed, 04 Oct 2023 Prob (F-statistic): 0.000\n", - "Time: 16:08:40 Log-Likelihood: -14495.000\n", + "Date: Thu, 05 Oct 2023 Prob (F-statistic): 0.000\n", + "Time: 18:04:09 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2730,11 +2859,11 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-10-04T16:08:40.431387\n", + "timestamp: 2023-10-05T18:04:09.449726\n", "comments: []\n", "exception: \n", "\n", - "uid: output_11\n", + "uid: output_12\n", "status: pass\n", "type: regression\n", "properties: {'method': 'probit', 'dof': 806.0}\n", @@ -2748,8 +2877,8 @@ "Dep. Variable: \n", "Model: Probit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", - "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 2.140000e-01\n", - "Time: 16:08:40 Log-Likelihood: -4.004600e+02\n", + "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 2.140000e-01\n", + "Time: 18:04:09 Log-Likelihood: -4.004600e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.875000e-46, coef std err z P>|z| [0.025 \\\n", "const 4.740000e-02 5.700000e-02 0.838 0.402 -6.300000e-02 \n", @@ -2764,11 +2893,11 @@ "inc_grants 1.620000e-07 \n", "inc_donations 3.300000e-07 \n", "total_costs -1.440000e-08 ]\n", - "timestamp: 2023-10-04T16:08:40.540353\n", + "timestamp: 2023-10-05T18:04:09.499724\n", "comments: []\n", "exception: \n", "\n", - "uid: output_12\n", + "uid: output_13\n", "status: pass\n", "type: regression\n", "properties: {'method': 'logit', 'dof': 806.0}\n", @@ -2782,8 +2911,8 @@ "Dep. Variable: \n", "Model: Logit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", - "Date: Wed, 04 Oct 2023 Pseudo R-squ.: 2.187000e-01\n", - "Time: 16:08:40 Log-Likelihood: -3.980700e+02\n", + "Date: Thu, 05 Oct 2023 Pseudo R-squ.: 2.187000e-01\n", + "Time: 18:04:09 Log-Likelihood: -3.980700e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.532000e-47, coef std err z P>|z| [0.025 \\\n", "const 5.120000e-02 9.100000e-02 0.561 0.575 -1.280000e-01 \n", @@ -2798,7 +2927,7 @@ "inc_grants 2.660000e-07 \n", "inc_donations 7.160000e-07 \n", "total_costs -2.150000e-08 ]\n", - "timestamp: 2023-10-04T16:08:40.616654\n", + "timestamp: 2023-10-05T18:04:09.537725\n", "comments: []\n", "exception: \n", "\n", @@ -2820,7 +2949,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "id": "b1f77749", "metadata": {}, "outputs": [ @@ -2848,7 +2977,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "id": "45ec04ef", "metadata": {}, "outputs": [ @@ -2874,7 +3003,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "id": "0c826271", "metadata": {}, "outputs": [ @@ -2902,7 +3031,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "id": "2816eac7", "metadata": {}, "outputs": [ @@ -2910,7 +3039,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:records:add_custom(): output_13\n" + "INFO:acro:records:add_custom(): output_14\n" ] } ], @@ -2930,7 +3059,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "id": "f38b4334", "metadata": {}, "outputs": [ @@ -2962,7 +3091,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "id": "9e554eea", "metadata": {}, "outputs": [ @@ -2989,10 +3118,39 @@ " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", + "N 1.344319e+05 1.988737e+05\n", + "R 8.098502e+06 3.204495e+07\n", + "R/G 1.664827e+07 1.583532e+07]\n", + "timestamp: 2023-10-05T18:04:09.203761\n", + "comments: []\n", + "exception: \n", + "\n", + "The status of the record above is: review.\n", + "Please explain why an exception should be granted.\n", + "\n", + "INFO:acro:records:\n", + "uid: output_8\n", + "status: review\n", + "type: table\n", + "properties: {'method': 'pivot_table'}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 8, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1]], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n", + "command: table = acro.pivot_table(\n", + "summary: review; missing values found\n", + "outcome: mean std\n", + " inc_grants inc_grants\n", + "grant_type \n", + "G missing missing\n", + "N missing missing\n", + "R missing missing\n", + "R/G missing missing\n", + "output: [ mean std\n", + " inc_grants inc_grants\n", + "grant_type \n", + "G 1.141279e+07 2.283220e+07\n", "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-10-04T16:08:40.063744\n", + "timestamp: 2023-10-05T18:04:09.264100\n", "comments: []\n", "exception: \n", "\n", @@ -3000,7 +3158,7 @@ "Please explain why an exception should be granted.\n", "\n", "INFO:acro:records:\n", - "uid: output_8\n", + "uid: output_9\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", @@ -3021,7 +3179,7 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-10-04T16:08:40.206681\n", + "timestamp: 2023-10-05T18:04:09.342995\n", "comments: []\n", "exception: \n", "\n", @@ -3033,34 +3191,60 @@ "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", - "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", - "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", - "outcome: grant_type G N R R/G\n", - "year \n", - "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; \n", - "2011 ok ok ok threshold; \n", - "2012 ok ok ok threshold; \n", - "2013 ok ok ok threshold; \n", - "2014 ok ok ok threshold; \n", - "2015 ok ok ok threshold; \n", - "output: [grant_type G N R R/G\n", - "year \n", - "2010 9921906.0 0.000000 8402284.0 11636000.0\n", - "2011 8502247.0 124013.859375 7716880.0 16047500.0\n", - "2012 11458580.0 131859.062500 6958050.5 16810000.0\n", - "2013 13557147.0 147937.796875 7202273.5 16765625.0\n", - "2014 13748147.0 133198.250000 8277525.5 17845750.0\n", - "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n", - "timestamp: 2023-10-04T16:08:34.251644\n", - "comments: []\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]]}}\n", + "command: table = acro.crosstab(\n", + "summary: fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n", + "outcome: grant_type G N \\\n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 threshold; p-ratio; nk-rule; ok threshold; p-ratio; \n", + "2011 threshold; p-ratio; nk-rule; ok ok \n", + "2012 threshold; p-ratio; nk-rule; ok ok \n", + "2013 threshold; p-ratio; nk-rule; ok ok \n", + "2014 threshold; p-ratio; nk-rule; ok ok \n", + "2015 threshold; p-ratio; nk-rule; threshold; ok \n", + "All ok ok ok \n", + "\n", + "grant_type R R/G All \n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 ok ok threshold; p-ratio; nk-rule; ok \n", + "2011 ok ok threshold; ok \n", + "2012 ok ok threshold; ok \n", + "2013 ok ok threshold; ok \n", + "2014 ok ok threshold; ok \n", + "2015 ok ok threshold; ok \n", + "All ok ok ok ok \n", + "output: [grant_type G N R \\\n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n", + "year \n", + "2010 2 12 5 40 \n", + "2011 3 12 58 45 \n", + "2012 3 12 59 45 \n", + "2013 3 12 59 47 \n", + "2014 3 12 59 43 \n", + "2015 3 9 58 28 \n", + "All 17 69 298 248 \n", + "\n", + "grant_type R/G All \n", + "survivor Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 20 4 83 \n", + "2011 24 8 150 \n", + "2012 24 8 151 \n", + "2013 24 8 153 \n", + "2014 24 8 149 \n", + "2015 23 8 129 \n", + "All 139 44 815 ]\n", + "timestamp: 2023-10-05T18:03:56.973956\n", + "comments: [\"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"]\n", "exception: \n", "\n", "The status of the record above is: fail.\n", "Please explain why an exception should be granted.\n", "\n", "INFO:acro:records:\n", - "uid: output_13\n", + "uid: output_14\n", "status: review\n", "type: custom\n", "properties: {}\n", @@ -3071,7 +3255,7 @@ "Columns: []\n", "Index: []\n", "output: ['XandY.jpeg']\n", - "timestamp: 2023-10-04T16:08:40.800408\n", + "timestamp: 2023-10-05T18:04:09.660560\n", "comments: ['This output is an image showing the relationship between X and Y']\n", "exception: \n", "\n", @@ -3099,7 +3283,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "id": "f78b5a08", "metadata": {}, "outputs": [ @@ -3115,16 +3299,17 @@ "output_10_2.csv\n", "output_11_0.csv\n", "output_11_1.csv\n", + "output_11_2.csv\n", "output_12_0.csv\n", "output_12_1.csv\n", + "output_13_0.csv\n", + "output_13_1.csv\n", "output_3_0.csv\n", "output_5_0.csv\n", "output_6_0.csv\n", "output_7_0.csv\n", "output_8_0.csv\n", "output_9_0.csv\n", - "output_9_1.csv\n", - "output_9_2.csv\n", "pivot_table_0.csv\n", "results.json\n" ] @@ -3150,7 +3335,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "id": "df2a02e0", "metadata": {}, "outputs": [ @@ -3165,16 +3350,17 @@ "output_10_2.csv.txt\n", "output_11_0.csv.txt\n", "output_11_1.csv.txt\n", + "output_11_2.csv.txt\n", "output_12_0.csv.txt\n", "output_12_1.csv.txt\n", + "output_13_0.csv.txt\n", + "output_13_1.csv.txt\n", "output_3_0.csv.txt\n", "output_5_0.csv.txt\n", "output_6_0.csv.txt\n", "output_7_0.csv.txt\n", "output_8_0.csv.txt\n", "output_9_0.csv.txt\n", - "output_9_1.csv.txt\n", - "output_9_2.csv.txt\n", "pivot_table_0.csv.txt\n", "results.json.txt\n" ] @@ -3201,7 +3387,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "id": "56d2b6a1", "metadata": {}, "outputs": [ @@ -3302,7 +3488,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type)\",\n", " \"summary\": \"fail; threshold: 6 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-10-04T16:08:29.606127\",\n", + " \"timestamp\": \"2023-10-05T18:03:53.064163\",\n", " \"comments\": [\n", " \"This is a cross table between year and grant_type\",\n", " \"6 cells were suppressed in this table\"\n", @@ -3311,7 +3497,7 @@ " },\n", " \"output_3\": {\n", " \"uid\": \"output_3\",\n", - " \"status\": \"review\",\n", + " \"status\": \"fail\",\n", " \"type\": \"table\",\n", " \"properties\": {\n", " \"method\": \"crosstab\"\n", @@ -3323,64 +3509,205 @@ " \"summary\": {\n", " \"suppressed\": false,\n", " \"negative\": 0,\n", - " \"missing\": 14,\n", + " \"missing\": 0,\n", " \"threshold\": 7,\n", " \"p-ratio\": 2,\n", " \"nk-rule\": 1\n", " },\n", " \"cells\": {\n", " \"negative\": [],\n", - " \"missing\": [\n", - " [\n", - " 0,\n", - " 0\n", - " ],\n", + " \"missing\": [],\n", + " \"threshold\": [\n", " [\n", " 0,\n", " 1\n", " ],\n", " [\n", " 0,\n", - " 2\n", - " ],\n", - " [\n", - " 0,\n", " 3\n", " ],\n", " [\n", " 1,\n", - " 1\n", - " ],\n", - " [\n", - " 1,\n", - " 2\n", + " 3\n", " ],\n", " [\n", " 2,\n", - " 2\n", - " ],\n", - " [\n", - " 3,\n", - " 1\n", + " 3\n", " ],\n", " [\n", " 3,\n", - " 2\n", - " ],\n", - " [\n", - " 4,\n", - " 1\n", + " 3\n", " ],\n", " [\n", " 4,\n", - " 2\n", + " 3\n", " ],\n", " [\n", " 5,\n", - " 0\n", - " ],\n", + " 3\n", + " ]\n", + " ],\n", + " \"p-ratio\": [\n", " [\n", - " 5,\n", + " 0,\n", + " 1\n", + " ],\n", + " [\n", + " 0,\n", + " 3\n", + " ]\n", + " ],\n", + " \"nk-rule\": [\n", + " [\n", + " 0,\n", + " 3\n", + " ]\n", + " ]\n", + " }\n", + " }\n", + " }\n", + " ],\n", + " \"outcome\": {\n", + " \"G\": {\n", + " \"2010\": \"ok\",\n", + " \"2011\": \"ok\",\n", + " \"2012\": \"ok\",\n", + " \"2013\": \"ok\",\n", + " \"2014\": \"ok\",\n", + " \"2015\": \"ok\"\n", + " },\n", + " \"N\": {\n", + " \"2010\": \"threshold; p-ratio; \",\n", + " \"2011\": \"ok\",\n", + " \"2012\": \"ok\",\n", + " \"2013\": \"ok\",\n", + " \"2014\": \"ok\",\n", + " \"2015\": \"ok\"\n", + " },\n", + " \"R\": {\n", + " \"2010\": \"ok\",\n", + " \"2011\": \"ok\",\n", + " \"2012\": \"ok\",\n", + " \"2013\": \"ok\",\n", + " \"2014\": \"ok\",\n", + " \"2015\": \"ok\"\n", + " },\n", + " \"R/G\": {\n", + " \"2010\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2011\": \"threshold; \",\n", + " \"2012\": \"threshold; \",\n", + " \"2013\": \"threshold; \",\n", + " \"2014\": \"threshold; \",\n", + " \"2015\": \"threshold; \"\n", + " }\n", + " },\n", + " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n", + " \"summary\": \"fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n", + " \"timestamp\": \"2023-10-05T18:04:01.751627\",\n", + " \"comments\": [],\n", + " \"exception\": \"This one is safe. Trust me, I'm a professor.\"\n", + " },\n", + " \"output_5\": {\n", + " \"uid\": \"output_5\",\n", + " \"status\": \"review\",\n", + " \"type\": \"table\",\n", + " \"properties\": {\n", + " \"method\": \"crosstab\"\n", + " },\n", + " \"files\": [\n", + " {\n", + " \"name\": \"output_5_0.csv\",\n", + " \"sdc\": {\n", + " \"summary\": {\n", + " \"suppressed\": false,\n", + " \"negative\": 10,\n", + " \"missing\": 11,\n", + " \"threshold\": 7,\n", + " \"p-ratio\": 2,\n", + " \"nk-rule\": 1\n", + " },\n", + " \"cells\": {\n", + " \"negative\": [\n", + " [\n", + " 0,\n", + " 2\n", + " ],\n", + " [\n", + " 1,\n", + " 1\n", + " ],\n", + " [\n", + " 1,\n", + " 2\n", + " ],\n", + " [\n", + " 2,\n", + " 2\n", + " ],\n", + " [\n", + " 3,\n", + " 1\n", + " ],\n", + " [\n", + " 3,\n", + " 2\n", + " ],\n", + " [\n", + " 4,\n", + " 1\n", + " ],\n", + " [\n", + " 4,\n", + " 2\n", + " ],\n", + " [\n", + " 5,\n", + " 1\n", + " ],\n", + " [\n", + " 5,\n", + " 2\n", + " ]\n", + " ],\n", + " \"missing\": [\n", + " [\n", + " 0,\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 1\n", + " ],\n", + " [\n", + " 0,\n", + " 2\n", + " ],\n", + " [\n", + " 0,\n", + " 3\n", + " ],\n", + " [\n", + " 1,\n", + " 1\n", + " ],\n", + " [\n", + " 1,\n", + " 2\n", + " ],\n", + " [\n", + " 2,\n", + " 2\n", + " ],\n", + " [\n", + " 4,\n", + " 2\n", + " ],\n", + " [\n", + " 5,\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", " 1\n", " ],\n", " [\n", @@ -3440,59 +3767,46 @@ " ],\n", " \"outcome\": {\n", " \"G\": {\n", - " \"2010\": \"missing\",\n", + " \"2010\": \"\",\n", " \"2011\": \"\",\n", " \"2012\": \"\",\n", " \"2013\": \"\",\n", " \"2014\": \"\",\n", - " \"2015\": \"missing\",\n", - " \"All\": \"\"\n", + " \"2015\": \"\"\n", " },\n", " \"N\": {\n", - " \"2010\": \"missing\",\n", - " \"2011\": \"missing\",\n", + " \"2010\": \"\",\n", + " \"2011\": \"negative\",\n", " \"2012\": \"\",\n", - " \"2013\": \"missing\",\n", - " \"2014\": \"missing\",\n", - " \"2015\": \"missing\",\n", - " \"All\": \"\"\n", + " \"2013\": \"negative\",\n", + " \"2014\": \"negative\",\n", + " \"2015\": \"negative\"\n", " },\n", " \"R\": {\n", - " \"2010\": \"missing\",\n", - " \"2011\": \"missing\",\n", - " \"2012\": \"missing\",\n", - " \"2013\": \"missing\",\n", - " \"2014\": \"missing\",\n", - " \"2015\": \"missing\",\n", - " \"All\": \"\"\n", + " \"2010\": \"negative\",\n", + " \"2011\": \"negative\",\n", + " \"2012\": \"negative\",\n", + " \"2013\": \"negative\",\n", + " \"2014\": \"negative\",\n", + " \"2015\": \"negative\"\n", " },\n", " \"R/G\": {\n", - " \"2010\": \"missing\",\n", - " \"2011\": \"\",\n", - " \"2012\": \"\",\n", - " \"2013\": \"\",\n", - " \"2014\": \"\",\n", - " \"2015\": \"\",\n", - " \"All\": \"\"\n", - " },\n", - " \"All\": {\n", " \"2010\": \"\",\n", " \"2011\": \"\",\n", " \"2012\": \"\",\n", " \"2013\": \"\",\n", " \"2014\": \"\",\n", - " \"2015\": \"\",\n", - " \"All\": \"\"\n", + " \"2015\": \"\"\n", " }\n", " },\n", - " \"command\": \"safe_table = acro.crosstab(\",\n", - " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-10-04T16:08:39.363301\",\n", + " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\\\"mean\\\")\",\n", + " \"summary\": \"review; negative values found\",\n", + " \"timestamp\": \"2023-10-05T18:04:08.961665\",\n", " \"comments\": [],\n", - " \"exception\": \"This one is safe. Trust me, I'm a professor.\"\n", + " \"exception\": \"It's not disclosive, I promise.\"\n", " },\n", - " \"output_5\": {\n", - " \"uid\": \"output_5\",\n", + " \"output_6\": {\n", + " \"uid\": \"output_6\",\n", " \"status\": \"review\",\n", " \"type\": \"table\",\n", " \"properties\": {\n", @@ -3500,7 +3814,7 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_5_0.csv\",\n", + " \"name\": \"output_6_0.csv\",\n", " \"sdc\": {\n", " \"summary\": {\n", " \"suppressed\": false,\n", @@ -3661,12 +3975,12 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-10-04T16:08:39.760564\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.105670\",\n", " \"comments\": [],\n", - " \"exception\": \"It's not disclosive, I promise.\"\n", + " \"exception\": \"I need this one too\"\n", " },\n", - " \"output_6\": {\n", - " \"uid\": \"output_6\",\n", + " \"output_7\": {\n", + " \"uid\": \"output_7\",\n", " \"status\": \"review\",\n", " \"type\": \"table\",\n", " \"properties\": {\n", @@ -3674,7 +3988,7 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_6_0.csv\",\n", + " \"name\": \"output_7_0.csv\",\n", " \"sdc\": {\n", " \"summary\": {\n", " \"suppressed\": false,\n", @@ -3743,12 +4057,12 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-10-04T16:08:39.951135\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.203761\",\n", " \"comments\": [],\n", - " \"exception\": \"I need this one too\"\n", + " \"exception\": \"yes\"\n", " },\n", - " \"output_7\": {\n", - " \"uid\": \"output_7\",\n", + " \"output_8\": {\n", + " \"uid\": \"output_8\",\n", " \"status\": \"review\",\n", " \"type\": \"table\",\n", " \"properties\": {\n", @@ -3756,7 +4070,7 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_7_0.csv\",\n", + " \"name\": \"output_8_0.csv\",\n", " \"sdc\": {\n", " \"summary\": {\n", " \"suppressed\": false,\n", @@ -3825,12 +4139,12 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-10-04T16:08:40.063744\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.264100\",\n", " \"comments\": [],\n", - " \"exception\": \"y\"\n", + " \"exception\": \"yes\"\n", " },\n", - " \"output_8\": {\n", - " \"uid\": \"output_8\",\n", + " \"output_9\": {\n", + " \"uid\": \"output_9\",\n", " \"status\": \"review\",\n", " \"type\": \"table\",\n", " \"properties\": {\n", @@ -3838,7 +4152,7 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_8_0.csv\",\n", + " \"name\": \"output_9_0.csv\",\n", " \"sdc\": {\n", " \"summary\": {\n", " \"suppressed\": false,\n", @@ -3924,12 +4238,12 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; negative values found\",\n", - " \"timestamp\": \"2023-10-04T16:08:40.206681\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.342995\",\n", " \"comments\": [],\n", - " \"exception\": \"y\"\n", + " \"exception\": \"yes\"\n", " },\n", - " \"output_9\": {\n", - " \"uid\": \"output_9\",\n", + " \"output_10\": {\n", + " \"uid\": \"output_10\",\n", " \"status\": \"pass\",\n", " \"type\": \"regression\",\n", " \"properties\": {\n", @@ -3938,27 +4252,27 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_9_0.csv\",\n", + " \"name\": \"output_10_0.csv\",\n", " \"sdc\": {}\n", " },\n", " {\n", - " \"name\": \"output_9_1.csv\",\n", + " \"name\": \"output_10_1.csv\",\n", " \"sdc\": {}\n", " },\n", " {\n", - " \"name\": \"output_9_2.csv\",\n", + " \"name\": \"output_10_2.csv\",\n", " \"sdc\": {}\n", " }\n", " ],\n", " \"outcome\": {},\n", " \"command\": \"results = acro.ols(y, x)\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-10-04T16:08:40.319370\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.406745\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", - " \"output_10\": {\n", - " \"uid\": \"output_10\",\n", + " \"output_11\": {\n", + " \"uid\": \"output_11\",\n", " \"status\": \"pass\",\n", " \"type\": \"regression\",\n", " \"properties\": {\n", @@ -3967,27 +4281,27 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_10_0.csv\",\n", + " \"name\": \"output_11_0.csv\",\n", " \"sdc\": {}\n", " },\n", " {\n", - " \"name\": \"output_10_1.csv\",\n", + " \"name\": \"output_11_1.csv\",\n", " \"sdc\": {}\n", " },\n", " {\n", - " \"name\": \"output_10_2.csv\",\n", + " \"name\": \"output_11_2.csv\",\n", " \"sdc\": {}\n", " }\n", " ],\n", " \"outcome\": {},\n", " \"command\": \"results = acro.olsr(\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-10-04T16:08:40.431387\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.449726\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", - " \"output_11\": {\n", - " \"uid\": \"output_11\",\n", + " \"output_12\": {\n", + " \"uid\": \"output_12\",\n", " \"status\": \"pass\",\n", " \"type\": \"regression\",\n", " \"properties\": {\n", @@ -3996,23 +4310,23 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_11_0.csv\",\n", + " \"name\": \"output_12_0.csv\",\n", " \"sdc\": {}\n", " },\n", " {\n", - " \"name\": \"output_11_1.csv\",\n", + " \"name\": \"output_12_1.csv\",\n", " \"sdc\": {}\n", " }\n", " ],\n", " \"outcome\": {},\n", " \"command\": \"results = acro.probit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-10-04T16:08:40.540353\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.499724\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", - " \"output_12\": {\n", - " \"uid\": \"output_12\",\n", + " \"output_13\": {\n", + " \"uid\": \"output_13\",\n", " \"status\": \"pass\",\n", " \"type\": \"regression\",\n", " \"properties\": {\n", @@ -4021,18 +4335,18 @@ " },\n", " \"files\": [\n", " {\n", - " \"name\": \"output_12_0.csv\",\n", + " \"name\": \"output_13_0.csv\",\n", " \"sdc\": {}\n", " },\n", " {\n", - " \"name\": \"output_12_1.csv\",\n", + " \"name\": \"output_13_1.csv\",\n", " \"sdc\": {}\n", " }\n", " ],\n", " \"outcome\": {},\n", " \"command\": \"results = acro.logit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-10-04T16:08:40.616654\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.537725\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -4051,9 +4365,9 @@ " \"suppressed\": false,\n", " \"negative\": 0,\n", " \"missing\": 0,\n", - " \"threshold\": 7,\n", - " \"p-ratio\": 2,\n", - " \"nk-rule\": 1\n", + " \"threshold\": 14,\n", + " \"p-ratio\": 8,\n", + " \"nk-rule\": 7\n", " },\n", " \"cells\": {\n", " \"negative\": [],\n", @@ -4061,47 +4375,123 @@ " \"threshold\": [\n", " [\n", " 0,\n", - " 1\n", + " 0\n", " ],\n", " [\n", " 0,\n", - " 3\n", + " 2\n", + " ],\n", + " [\n", + " 0,\n", + " 5\n", " ],\n", " [\n", " 1,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 1,\n", + " 5\n", " ],\n", " [\n", " 2,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 5\n", " ],\n", " [\n", " 3,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 5\n", " ],\n", " [\n", " 4,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 4,\n", + " 5\n", " ],\n", " [\n", " 5,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", + " 1\n", + " ],\n", + " [\n", + " 5,\n", + " 5\n", " ]\n", " ],\n", " \"p-ratio\": [\n", " [\n", " 0,\n", - " 1\n", + " 0\n", " ],\n", " [\n", " 0,\n", - " 3\n", + " 2\n", + " ],\n", + " [\n", + " 0,\n", + " 5\n", + " ],\n", + " [\n", + " 1,\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 0\n", + " ],\n", + " [\n", + " 4,\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", + " 0\n", " ]\n", " ],\n", " \"nk-rule\": [\n", " [\n", " 0,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 5\n", + " ],\n", + " [\n", + " 1,\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 0\n", + " ],\n", + " [\n", + " 4,\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", + " 0\n", " ]\n", " ]\n", " }\n", @@ -4109,47 +4499,80 @@ " }\n", " ],\n", " \"outcome\": {\n", - " \"G\": {\n", + " \"('G', 'Dead in 2015')\": {\n", + " \"2010\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2011\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2012\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2013\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2014\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2015\": \"threshold; p-ratio; nk-rule; \",\n", + " \"All\": \"ok\"\n", + " },\n", + " \"('G', 'Alive in 2015')\": {\n", " \"2010\": \"ok\",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\"\n", + " \"2015\": \"threshold; \",\n", + " \"All\": \"ok\"\n", " },\n", - " \"N\": {\n", + " \"('N', 'Alive in 2015')\": {\n", " \"2010\": \"threshold; p-ratio; \",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\"\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", " },\n", - " \"R\": {\n", + " \"('R', 'Dead in 2015')\": {\n", " \"2010\": \"ok\",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\"\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", " },\n", - " \"R/G\": {\n", + " \"('R', 'Alive in 2015')\": {\n", + " \"2010\": \"ok\",\n", + " \"2011\": \"ok\",\n", + " \"2012\": \"ok\",\n", + " \"2013\": \"ok\",\n", + " \"2014\": \"ok\",\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", + " },\n", + " \"('R/G', 'Alive in 2015')\": {\n", " \"2010\": \"threshold; p-ratio; nk-rule; \",\n", " \"2011\": \"threshold; \",\n", " \"2012\": \"threshold; \",\n", " \"2013\": \"threshold; \",\n", " \"2014\": \"threshold; \",\n", - " \"2015\": \"threshold; \"\n", + " \"2015\": \"threshold; \",\n", + " \"All\": \"ok\"\n", + " },\n", + " \"('All', '')\": {\n", + " \"2010\": \"ok\",\n", + " \"2011\": \"ok\",\n", + " \"2012\": \"ok\",\n", + " \"2013\": \"ok\",\n", + " \"2014\": \"ok\",\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", " }\n", " },\n", - " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n", - " \"summary\": \"fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-10-04T16:08:34.251644\",\n", - " \"comments\": [],\n", - " \"exception\": \"y\"\n", + " \"command\": \"table = acro.crosstab(\",\n", + " \"summary\": \"fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \",\n", + " \"timestamp\": \"2023-10-05T18:03:56.973956\",\n", + " \"comments\": [\n", + " \"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"\n", + " ],\n", + " \"exception\": \"yes\"\n", " },\n", - " \"output_13\": {\n", - " \"uid\": \"output_13\",\n", + " \"output_14\": {\n", + " \"uid\": \"output_14\",\n", " \"status\": \"review\",\n", " \"type\": \"custom\",\n", " \"properties\": {},\n", @@ -4162,11 +4585,11 @@ " \"outcome\": {},\n", " \"command\": \"custom\",\n", " \"summary\": \"review\",\n", - " \"timestamp\": \"2023-10-04T16:08:40.800408\",\n", + " \"timestamp\": \"2023-10-05T18:04:09.660560\",\n", " \"comments\": [\n", " \"This output is an image showing the relationship between X and Y\"\n", " ],\n", - " \"exception\": \"y\"\n", + " \"exception\": \"yes\"\n", " }\n", " }\n", "}\n" diff --git a/test/test_initial.py b/test/test_initial.py index 9987aa8..ef46863 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -15,7 +15,6 @@ # pylint: disable=redefined-outer-name PATH: str = "RES_PYTEST" -RUN_TEST = False @pytest.fixture @@ -42,6 +41,36 @@ def test_crosstab_without_suppression(data): assert 48 == output.output[0]["R/G"].sum() +def test_crosstab_with_aggfunc_sum(data, acro): + """Test the crosstab with two columns and aggfunc sum.""" + acro = ACRO(suppress=False) + _ = acro.crosstab( + data.year, + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc="sum", + ) + _ = acro.crosstab( + [data.grant_type, data.survivor], + data.year, + values=data.inc_grants, + aggfunc="sum", + ) + acro.add_exception("output_0", "Let me have it") + acro.add_exception("output_1", "I need this output") + results: Records = acro.finalise() + output_0 = results.get_index(0) + output_1 = results.get_index(1) + comment_0 = ( + "Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted." + ) + comment_1 = ( + "Empty rows: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted." + ) + assert output_0.comments == [comment_0] + assert output_1.comments == [comment_1] + + def test_crosstab_threshold(data, acro): """Crosstab threshold test.""" _ = acro.crosstab(data.year, data.grant_type) @@ -572,7 +601,7 @@ def test_crosstab_with_totals_with_suppression(data, acro): assert "R/G" not in output.output[0].columns -def test_crosstab_with_totals_with_suppression_herichical(data, acro): +def test_crosstab_with_totals_with_suppression_hierarchical(data, acro): """Test the crosstab with both margins and suppression are true.""" _ = acro.crosstab( [data.year, data.survivor], [data.grant_type, data.status], margins=True @@ -641,7 +670,7 @@ def test_crosstab_with_manual_totals_with_suppression(data, acro): assert "R/G" in output.output[0].columns -def test_crosstab_with_manual_totals_with_suppression_herichical(data, acro): +def test_crosstab_with_manual_totals_with_suppression_hierarchical(data, acro): """Test the crosstab with both margins and suppression are true with multilevel indexes and columns while using the total manual function. """ @@ -682,7 +711,7 @@ def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_mean(data, ac assert "R/G" in output.output[0].columns -def test_herichical_crosstab_with_manual_totals_with_mean(data, acro): +def test_hierarchical_crosstab_with_manual_totals_with_mean(data, acro): """Test the crosstab with both margins and suppression are true, with aggfunc mean and with multilevel columns and rows while using the total manual function. """ @@ -746,106 +775,96 @@ def test_pivot_table_with_totals_with_suppression(data, acro): assert "R/G" not in output.output[0].columns -if RUN_TEST: +def test_crosstab_multiple_aggregate_function(data, acro): + """Crosstab with multiple agg funcs.""" + acro = ACRO(suppress=False) - def test_crosstab_with_sum(data, acro): - """Test the crosstab with two columns and aggfunc sum.""" - acro = ACRO(suppress=False) - _ = acro.crosstab( - data.year, - [data.grant_type, data.survivor], - values=data.inc_grants, - aggfunc="sum", - ) - output = acro.results.get_index(0) - assert (6, 8) == output.output[0].shape + _ = acro.crosstab( + data.year, data.grant_type, values=data.inc_grants, aggfunc=["mean", "std"] + ) + output = acro.results.get_index(0) + correct_summary: str = ( + "fail; threshold: 14 cells may need suppressing;" + " p-ratio: 4 cells may need suppressing; " + "nk-rule: 2 cells may need suppressing; " + ) + assert ( + output.summary == correct_summary + ), f"\n{output.summary}\n should be \n{correct_summary}\n" + print(f"{output.output[0]['mean'][ 'R/G'].sum()}") + correctval = 97383496.0 + errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}" + assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg - def test_crosstab_multiple_aggregate_function(data, acro): - """Crosstab with multiple agg funcs.""" - acro = ACRO(suppress=False) - _ = acro.crosstab( - data.year, data.grant_type, values=data.inc_grants, aggfunc=["mean", "std"] - ) - output = acro.results.get_index(0) - correct_summary: str = ( - "fail; threshold: 14 cells may need suppressing;" - " p-ratio: 4 cells may need suppressing; " - "nk-rule: 2 cells may need suppressing; " - ) - assert ( - output.summary == correct_summary - ), f"\n{output.summary}\n should be \n{correct_summary}\n" - print(f"{output.output[0]['mean'][ 'R/G'].sum()}") - correctval = 97383496.0 - errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}" - assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg - - def test_crosstab_with_totals_with_suppression_with_two_aggfuncs(data, acro): - """Test the crosstab with both margins and suppression are true - and with a list of aggfuncs while using the total manual function. - """ - _ = acro.crosstab( - data.year, - data.grant_type, - values=data.inc_grants, - aggfunc=["count", "std"], - margins=True, - ) - _ = acro.crosstab( - data.year, - data.grant_type, - values=data.inc_grants, - aggfunc="count", - margins=True, - ) - _ = acro.crosstab( - data.year, - data.grant_type, - values=data.inc_grants, - aggfunc="std", - margins=True, - ) - output = acro.results.get_index(0) - assert 8 == output.output[0].shape[1] - output_1 = acro.results.get_index(1) - output_2 = acro.results.get_index(2) - output_3 = pd.concat([output_1.output[0], output_2.output[0]], axis=1) - output_4 = (output.output[0]).droplevel(0, axis=1) - assert output_3.equals(output_4) - - def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_herichical( - data, acro - ): - """Test the crosstab with both margins and suppression are true - and with a list of aggfuncs and a list of columns while using - the total manual function. - """ - _ = acro.crosstab( - data.year, - [data.grant_type, data.survivor], - values=data.inc_grants, - aggfunc=["count", "std"], - margins=True, - ) - output = acro.results.get_index(0) - assert ("G", "Dead in 2015") in output.output[0].columns - - def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc( - data, acro, caplog - ): - """Test the crosstab with both margins and suppression are true - and with a list of aggfuncs while using the total manual function. - """ - _ = acro.crosstab( - data.year, - data.grant_type, - values=data.inc_grants, - aggfunc=["count", "std"], - margins=True, - show_suppressed=True, - ) - assert ( - "We can not calculate the margins with a list of aggregation functions. " - "Please create a table for each aggregation function" in caplog.text - ) +def test_crosstab_with_totals_with_suppression_with_two_aggfuncs(data, acro): + """Test the crosstab with both margins and suppression are true + and with a list of aggfuncs while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc=["count", "std"], + margins=True, + ) + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="count", + margins=True, + ) + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="std", + margins=True, + ) + output = acro.results.get_index(0) + assert 8 == output.output[0].shape[1] + output_1 = acro.results.get_index(1) + output_2 = acro.results.get_index(2) + output_3 = pd.concat([output_1.output[0], output_2.output[0]], axis=1) + output_4 = (output.output[0]).droplevel(0, axis=1) + assert output_3.equals(output_4) + + +def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_hierarchical( + data, acro +): + """Test the crosstab with both margins and suppression are true + and with a list of aggfuncs and a list of columns while using + the total manual function. + """ + _ = acro.crosstab( + data.year, + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc=["count", "std"], + margins=True, + ) + output = acro.results.get_index(0) + assert ("count", "G", "Alive in 2015") in output.output[0].columns + assert ("std", "G", "Alive in 2015") in output.output[0].columns + + +def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc( + data, acro, caplog +): + """Test the crosstab with both margins and suppression are true + and with a list of aggfuncs while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc=["count", "std"], + margins=True, + show_suppressed=True, + ) + assert ( + "We can not calculate the margins with a list of aggregation functions. " + "Please create a table for each aggregation function" in caplog.text + )