diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index 1bc6777..a82dd96 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -929,198 +929,29 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
meanstd
grant_typeGNRR/GAllGNRR/GAll
year
20109921906.00.0000008402284.011636000.08308286.01.855055e+070.0000003.059557e+071.701088e+072.727398e+07
20118502247.0124013.8593757716880.016047500.05303808.01.688595e+07205959.4929032.954322e+071.561638e+072.137658e+07
201211458580.0131859.0625006958050.516810000.05259893.02.061090e+07210476.5391752.721184e+071.646449e+072.026400e+07
201313557147.0147937.7968757202273.516765625.05605045.52.486844e+07203747.4170172.989833e+071.671112e+072.251787e+07
201413748147.0133198.2500008277525.517845750.06117054.53.134559e+07181865.9255803.546348e+071.741251e+072.641722e+07
201511133433.0146572.18750010812888.018278624.06509989.52.553919e+07201602.8008324.130935e+071.730471e+072.784636e+07
All11412787.0134431.8906258098502.016648273.05997796.52.283220e+07198873.7266563.204495e+071.583532e+072.405324e+07
\n", - "
" - ], - "text/plain": [ - " mean \\\n", - "grant_type G N R R/G All \n", - "year \n", - "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.0 \n", - "2011 8502247.0 124013.859375 7716880.0 16047500.0 5303808.0 \n", - "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.0 \n", - "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", - "2014 13748147.0 133198.250000 8277525.5 17845750.0 6117054.5 \n", - "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", - "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", - "\n", - " std \\\n", - "grant_type G N R R/G \n", - "year \n", - "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", - "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", - "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", - "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", - "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", - "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", - "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", - "\n", - " \n", - "grant_type All \n", - "year \n", - "2010 2.727398e+07 \n", - "2011 2.137658e+07 \n", - "2012 2.026400e+07 \n", - "2013 2.251787e+07 \n", - "2014 2.641722e+07 \n", - "2015 2.784636e+07 \n", - "All 2.405324e+07 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:172: PerformanceWarning: indexing past lexsort depth may impact performance.\n", + " if t_values[col].sum() == 0:\n" + ] + }, + { + "ename": "ValueError", + "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\notebooks\\test.ipynb Cell 21\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m safe_table \u001b[39m=\u001b[39m acro\u001b[39m.\u001b[39;49mcrosstab(\n\u001b[0;32m 2\u001b[0m df\u001b[39m.\u001b[39;49myear, df\u001b[39m.\u001b[39;49mgrant_type, values\u001b[39m=\u001b[39;49mdf\u001b[39m.\u001b[39;49minc_grants, aggfunc\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39mmean\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstd\u001b[39;49m\u001b[39m\"\u001b[39;49m], margins\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m safe_table\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:172\u001b[0m, in \u001b[0;36mTables.crosstab\u001b[1;34m(self, index, columns, values, rownames, colnames, aggfunc, margins, margins_name, dropna, normalize, show_suppressed)\u001b[0m\n\u001b[0;32m 170\u001b[0m \u001b[39mif\u001b[39;00m dropna \u001b[39mor\u001b[39;00m margins:\n\u001b[0;32m 171\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m t_values\u001b[39m.\u001b[39mcolumns:\n\u001b[1;32m--> 172\u001b[0m \u001b[39mif\u001b[39;00m t_values[col]\u001b[39m.\u001b[39;49msum() \u001b[39m==\u001b[39;49m \u001b[39m0\u001b[39;49m:\n\u001b[0;32m 173\u001b[0m t_values \u001b[39m=\u001b[39m t_values\u001b[39m.\u001b[39mdrop(col, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[0;32m 174\u001b[0m t_values \u001b[39m=\u001b[39m t_values \u001b[39m<\u001b[39m THRESHOLD\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\SACRO\\venvs\\acro_venv\\lib\\site-packages\\pandas\\core\\generic.py:1527\u001b[0m, in \u001b[0;36mNDFrame.__nonzero__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1525\u001b[0m \u001b[39m@final\u001b[39m\n\u001b[0;32m 1526\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__nonzero__\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m-> 1527\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1528\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mThe truth value of a \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m is ambiguous. \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1529\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mUse a.empty, a.bool(), a.item(), a.any() or a.all().\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1530\u001b[0m )\n", + "\u001b[1;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ] } ], "source": [ - "safe_table = pd.crosstab(\n", + "safe_table = acro.crosstab(\n", " df.year, df.grant_type, values=df.inc_grants, aggfunc=[\"mean\", \"std\"], margins=True\n", ")\n", "safe_table" @@ -1136,7 +967,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "bf132239", "metadata": {}, "outputs": [ @@ -1271,7 +1102,7 @@ "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1290,7 +1121,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "7cc417a0", "metadata": {}, "outputs": [], @@ -1308,7 +1139,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "15bcdc7c", "metadata": {}, "outputs": [ @@ -1425,7 +1256,7 @@ "2015 11133433.0 146572.015625 10388613.0 18278624.0" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1448,7 +1279,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "6d4730c4", "metadata": {}, "outputs": [ @@ -1546,7 +1377,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1568,7 +1399,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "f3a87c20", "metadata": {}, "outputs": [ @@ -1666,7 +1497,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1684,7 +1515,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "8b603548", "metadata": {}, "outputs": [], @@ -1702,7 +1533,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "de4266cd-b4d4-417b-ae44-5d972e8bfdde", "metadata": {}, "outputs": [ @@ -1800,7 +1631,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1824,7 +1655,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "a521cb83", "metadata": {}, "outputs": [ @@ -1860,7 +1691,7 @@ " Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -14495. \n", + " Time: 16:44:14 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -1916,7 +1747,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00\n", - "Time: 12:25:20 Log-Likelihood: -14495.\n", + "Time: 16:44:14 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -1942,7 +1773,7 @@ "\"\"\"" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1969,7 +1800,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "cc90f7c9", "metadata": {}, "outputs": [ @@ -1977,7 +1808,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n", + "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "INFO:acro:records:add(): output_10\n" ] }, @@ -1999,7 +1836,7 @@ " Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -14495. \n", + " Time: 16:44:14 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -2055,7 +1892,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00\n", - "Time: 12:25:20 Log-Likelihood: -14495.\n", + "Time: 16:44:14 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -2081,7 +1918,7 @@ "\"\"\"" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2103,7 +1940,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "5b1a1611", "metadata": {}, "outputs": [ @@ -2111,13 +1948,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n", "INFO:acro:records:add(): output_11\n" ] }, @@ -2148,7 +1979,7 @@ " Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2140 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -400.46 \n", + " Time: 16:44:15 Log-Likelihood: -400.46 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2187,7 +2018,7 @@ "Model: Probit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2140\n", - "Time: 12:25:20 Log-Likelihood: -400.46\n", + "Time: 16:44:15 Log-Likelihood: -400.46\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.875e-46\n", "=================================================================================\n", @@ -2206,7 +2037,7 @@ "\"\"\"" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2234,7 +2065,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "dcf30f8f", "metadata": {}, "outputs": [ @@ -2242,13 +2073,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n", "INFO:acro:records:add(): output_12\n" ] }, @@ -2279,7 +2104,7 @@ " Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2187 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -398.07 \n", + " Time: 16:44:15 Log-Likelihood: -398.07 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2318,7 +2143,7 @@ "Model: Logit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2187\n", - "Time: 12:25:20 Log-Likelihood: -398.07\n", + "Time: 16:44:15 Log-Likelihood: -398.07\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.532e-47\n", "=================================================================================\n", @@ -2337,7 +2162,7 @@ "\"\"\"" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2357,7 +2182,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "ec960039", "metadata": { "scrolled": true @@ -2390,7 +2215,7 @@ "2013 15 59 71 8\n", "2014 15 59 71 8\n", "2015 15 59 71 8]\n", - "timestamp: 2023-09-26T12:25:19.155653\n", + "timestamp: 2023-09-26T16:43:37.007348\n", "comments: []\n", "exception: \n", "\n", @@ -2417,7 +2242,7 @@ "2013 13557147.0 147937.796875 7202273.5 NaN\n", "2014 13748147.0 133198.250000 8277525.5 NaN\n", "2015 11133433.0 146572.187500 10812888.0 NaN]\n", - "timestamp: 2023-09-26T12:25:19.230978\n", + "timestamp: 2023-09-26T16:43:37.137368\n", "comments: []\n", "exception: \n", "\n", @@ -2459,7 +2284,7 @@ "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5]\n", - "timestamp: 2023-09-26T12:25:19.445330\n", + "timestamp: 2023-09-26T16:43:37.469934\n", "comments: []\n", "exception: \n", "\n", @@ -2486,7 +2311,7 @@ "2013 13557147.0 147937.796875 7202273.5 16765625.0\n", "2014 13748147.0 133198.250000 8277525.5 17845750.0\n", "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n", - "timestamp: 2023-09-26T12:25:19.553369\n", + "timestamp: 2023-09-26T16:43:37.654102\n", "comments: []\n", "exception: \n", "\n", @@ -2515,7 +2340,7 @@ "2014 13748147.0 135494.781250 8118565.0 17845750.0 6072600.0\n", "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5]\n", - "timestamp: 2023-09-26T12:25:19.750575\n", + "timestamp: 2023-09-26T16:44:04.817429\n", "comments: []\n", "exception: \n", "\n", @@ -2542,7 +2367,7 @@ "2013 13557147.0 147937.625000 6988263.5 16765625.0\n", "2014 13748147.0 133198.078125 7997392.5 17845750.0\n", "2015 11133433.0 146572.015625 10388613.0 18278624.0]\n", - "timestamp: 2023-09-26T12:25:19.848126\n", + "timestamp: 2023-09-26T16:44:14.222443\n", "comments: []\n", "exception: \n", "\n", @@ -2567,7 +2392,7 @@ "N 1.344319e+05 1.988737e+05\n", "R 8.098502e+06 3.204495e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:19.915731\n", + "timestamp: 2023-09-26T16:44:14.374409\n", "comments: []\n", "exception: \n", "\n", @@ -2592,7 +2417,7 @@ "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:19.994753\n", + "timestamp: 2023-09-26T16:44:14.514747\n", "comments: []\n", "exception: \n", "\n", @@ -2617,7 +2442,7 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:20.122274\n", + "timestamp: 2023-09-26T16:44:14.676251\n", "comments: []\n", "exception: \n", "\n", @@ -2636,7 +2461,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.000\n", - "Time: 12:25:20 Log-Likelihood: -14495.000\n", + "Time: 16:44:14 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2649,7 +2474,7 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-09-26T12:25:20.194182\n", + "timestamp: 2023-09-26T16:44:14.841394\n", "comments: []\n", "exception: \n", "\n", @@ -2668,7 +2493,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.000\n", - "Time: 12:25:20 Log-Likelihood: -14495.000\n", + "Time: 16:44:14 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2681,7 +2506,7 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-09-26T12:25:20.256095\n", + "timestamp: 2023-09-26T16:44:14.939403\n", "comments: []\n", "exception: \n", "\n", @@ -2700,7 +2525,7 @@ "Model: Probit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 2.140000e-01\n", - "Time: 12:25:20 Log-Likelihood: -4.004600e+02\n", + "Time: 16:44:15 Log-Likelihood: -4.004600e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.875000e-46, coef std err z P>|z| [0.025 \\\n", "const 4.740000e-02 5.700000e-02 0.838 0.402 -6.300000e-02 \n", @@ -2715,7 +2540,7 @@ "inc_grants 1.620000e-07 \n", "inc_donations 3.300000e-07 \n", "total_costs -1.440000e-08 ]\n", - "timestamp: 2023-09-26T12:25:20.316852\n", + "timestamp: 2023-09-26T16:44:15.025370\n", "comments: []\n", "exception: \n", "\n", @@ -2734,7 +2559,7 @@ "Model: Logit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 2.187000e-01\n", - "Time: 12:25:20 Log-Likelihood: -3.980700e+02\n", + "Time: 16:44:15 Log-Likelihood: -3.980700e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.532000e-47, coef std err z P>|z| [0.025 \\\n", "const 5.120000e-02 9.100000e-02 0.561 0.575 -1.280000e-01 \n", @@ -2749,7 +2574,7 @@ "inc_grants 2.660000e-07 \n", "inc_donations 7.160000e-07 \n", "total_costs -2.150000e-08 ]\n", - "timestamp: 2023-09-26T12:25:20.356849\n", + "timestamp: 2023-09-26T16:44:15.133359\n", "comments: []\n", "exception: \n", "\n", @@ -2771,7 +2596,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "b1f77749", "metadata": {}, "outputs": [ @@ -2799,7 +2624,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "45ec04ef", "metadata": {}, "outputs": [ @@ -2825,7 +2650,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "0c826271", "metadata": {}, "outputs": [ @@ -2853,7 +2678,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "id": "2816eac7", "metadata": {}, "outputs": [ @@ -2881,7 +2706,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "id": "f38b4334", "metadata": {}, "outputs": [ @@ -2913,7 +2738,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "id": "9e554eea", "metadata": {}, "outputs": [ @@ -2943,7 +2768,7 @@ "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:19.994753\n", + "timestamp: 2023-09-26T16:44:14.514747\n", "comments: []\n", "exception: \n", "\n", @@ -2972,7 +2797,7 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:20.122274\n", + "timestamp: 2023-09-26T16:44:14.676251\n", "comments: []\n", "exception: \n", "\n", @@ -3018,7 +2843,7 @@ "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5]\n", - "timestamp: 2023-09-26T12:25:19.445330\n", + "timestamp: 2023-09-26T16:43:37.469934\n", "comments: []\n", "exception: \n", "\n", @@ -3037,7 +2862,7 @@ "Columns: []\n", "Index: []\n", "output: ['XandY.jpeg']\n", - "timestamp: 2023-09-26T12:25:20.472783\n", + "timestamp: 2023-09-26T16:44:15.336345\n", "comments: ['This output is an image showing the relationship between X and Y']\n", "exception: \n", "\n", @@ -3065,7 +2890,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "f78b5a08", "metadata": {}, "outputs": [ @@ -3116,7 +2941,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "id": "df2a02e0", "metadata": {}, "outputs": [ @@ -3167,7 +2992,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "id": "56d2b6a1", "metadata": {}, "outputs": [ @@ -3268,7 +3093,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type)\",\n", " \"summary\": \"fail; threshold: 6 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-09-26T12:25:19.155653\",\n", + " \"timestamp\": \"2023-09-26T16:43:37.007348\",\n", " \"comments\": [\n", " \"This is a cross table between year and grant_type\",\n", " \"6 cells were suppressed in this table\"\n", @@ -3383,7 +3208,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n", " \"summary\": \"fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-09-26T12:25:19.553369\",\n", + " \"timestamp\": \"2023-09-26T16:43:37.654102\",\n", " \"comments\": [],\n", " \"exception\": \"This one is safe. Trust me, I'm a professor.\"\n", " },\n", @@ -3581,7 +3406,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\\\"mean\\\")\",\n", " \"summary\": \"review; negative values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:19.848126\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.222443\",\n", " \"comments\": [],\n", " \"exception\": \"It's not disclosive, I promise.\"\n", " },\n", @@ -3663,7 +3488,7 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:19.915731\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.374409\",\n", " \"comments\": [],\n", " \"exception\": \"I need this one too\"\n", " },\n", @@ -3745,7 +3570,7 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:19.994753\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.514747\",\n", " \"comments\": [],\n", " \"exception\": \"yes\"\n", " },\n", @@ -3844,7 +3669,7 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; negative values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.122274\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.676251\",\n", " \"comments\": [],\n", " \"exception\": \"yes\"\n", " },\n", @@ -3873,7 +3698,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.ols(y, x)\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.194182\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.841394\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3902,7 +3727,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.olsr(\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.256095\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.939403\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3927,7 +3752,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.probit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.316852\",\n", + " \"timestamp\": \"2023-09-26T16:44:15.025370\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3952,7 +3777,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.logit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.356849\",\n", + " \"timestamp\": \"2023-09-26T16:44:15.133359\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -4171,7 +3996,7 @@ " },\n", " \"command\": \"table = acro.crosstab(\",\n", " \"summary\": \"fail; threshold: 14 cells suppressed; p-ratio: 8 cells suppressed; nk-rule: 7 cells suppressed; \",\n", - " \"timestamp\": \"2023-09-26T12:25:19.445330\",\n", + " \"timestamp\": \"2023-09-26T16:43:37.469934\",\n", " \"comments\": [],\n", " \"exception\": \"yes\"\n", " },\n", @@ -4189,11 +4014,11 @@ " \"outcome\": {},\n", " \"command\": \"custom\",\n", " \"summary\": \"review\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.472783\",\n", + " \"timestamp\": \"2023-09-26T16:44:15.336345\",\n", " \"comments\": [\n", " \"This output is an image showing the relationship between X and Y\"\n", " ],\n", - " \"exception\": \"d\"\n", + " \"exception\": \"yes\"\n", " }\n", " }\n", "}\n" diff --git a/test/test_initial.py b/test/test_initial.py index aa7430e..1071ce2 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -585,6 +585,7 @@ def test_crosstab_with_totals_with_suppression_herichical(data, acro): 12 ] assert total_cols == total_rows == output.output[0]["All"].iat[12] == 852 + assert ("G", "dead") not in output.output[0].columns def test_crosstab_with_totals_with_suppression_with_aggfunc(data, acro): @@ -599,6 +600,7 @@ def test_crosstab_with_totals_with_suppression_with_aggfunc(data, acro): output = acro.results.get_index(0) assert 8689781 == output.output[0]["All"].iat[0] assert 5425170.5 == output.output[0]["All"].iat[6] + assert "R/G" not in output.output[0].columns def test_crosstab_with_manual_totals_with_suppression(data, acro): @@ -615,6 +617,87 @@ def test_crosstab_with_manual_totals_with_suppression(data, acro): assert "R/G" in output.output[0].columns +def test_crosstab_with_manual_totals_with_suppression_herichical(data, acro): + """Test the crosstab with both margins and suprression + are true with multilevel index and columns while using the total manual function. + """ + _ = acro.crosstab( + [data.year, data.survivor], + [data.grant_type, data.status], + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert 47 == output.output[0]["All"].iat[0] + + total_rows = (output.output[0].loc[("All", ""), :].sum()) - output.output[0][ + "All" + ].iat[12] + total_cols = (output.output[0].loc[:, "All"].sum()) - output.output[0]["All"].iat[ + 12 + ] + assert total_cols == total_rows == output.output[0]["All"].iat[12] == 852 + assert ("G", "dead") in output.output[0].columns + + +def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_mean(data, acro): + """Test the crosstab with both margins and suprression are true + and with aggfunc mean while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="mean", + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert 8689780 == round(output.output[0]["All"].iat[0]) + assert 5425170 == round(output.output[0]["All"].iat[6]) + assert "R/G" in output.output[0].columns + + +def test_herichical_crosstab_with_manual_totals_with_mean(data, acro): + """Test the crosstab with both margins and suprression are true,with + aggfunc mean and with multilevel columns and rows while using the total manual function. + """ + _ = acro.crosstab( + [data.year, data.survivor], + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc="mean", + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert 1385162 == round(output.output[0]["All"].iat[0]) + assert 5434959 == round(output.output[0]["All"].iat[12]) + assert ("G", "Dead in 2015") in output.output[0].columns + + +def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_std( + data, acro, caplog +): + """Test the crosstab with both margins and suprression are true and with + aggfunc std while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="std", + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert "All" not in output.output[0].columns + assert ( + "The margins with the std agg func can not be calculated. " + "Please set the show_suppressed to false to calculate it." in caplog.text + ) + + if RUN_TEST: def test_crosstab_with_sum(data, acro): @@ -649,3 +732,22 @@ def test_crosstab_multiple_aggregate_function(data, acro): correctval = 97383496.0 errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}" assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg + + def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc( + data, acro, caplog + ): + """Test the crosstab with both margins and suprression are true + and with a list of aggfuncs while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc=["count", "std"], + margins=True, + show_suppressed=True, + ) + assert ( + "We can not calculate the margins with a list of aggregation functions. " + "Please create a table for each aggregation function" in caplog.text + )