From ed4734d07ffce840cfd0d03a77a2e61fcae149c5 Mon Sep 17 00:00:00 2001 From: Francois Caud Date: Fri, 22 Dec 2023 18:28:12 +0100 Subject: [PATCH] update starting_kit script in notebook --- tephra_starting_kit.ipynb | 131 +++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 64 deletions(-) diff --git a/tephra_starting_kit.ipynb b/tephra_starting_kit.ipynb index c42d884..4f4fed7 100644 --- a/tephra_starting_kit.ipynb +++ b/tephra_starting_kit.ipynb @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -189,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -588,7 +588,7 @@ "[6220 rows x 95 columns]" ] }, - "execution_count": 12, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -600,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -999,7 +999,7 @@ "[839 rows x 95 columns]" ] }, - "execution_count": 13, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -1011,7 +1011,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -1038,7 +1038,7 @@ " dtype='object')" ] }, - "execution_count": 15, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1049,7 +1049,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1058,7 +1058,7 @@ "" ] }, - "execution_count": 17, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, @@ -1091,7 +1091,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -1106,7 +1106,7 @@ " dtype=object))" ] }, - "execution_count": 39, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -1124,7 +1124,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -1163,7 +1163,7 @@ "Name: Event, dtype: int64" ] }, - "execution_count": 22, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1174,7 +1174,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1210,7 +1210,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -2176,7 +2176,7 @@ "[30 rows x 455 columns]" ] }, - "execution_count": 40, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -2189,7 +2189,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -2231,7 +2231,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -2253,7 +2253,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -2264,7 +2264,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -2637,7 +2637,7 @@ "[6220 rows x 35 columns]" ] }, - "execution_count": 32, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -2648,7 +2648,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -2657,7 +2657,7 @@ "" ] }, - "execution_count": 33, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, @@ -2687,7 +2687,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -2698,7 +2698,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -3071,7 +3071,7 @@ "[839 rows x 35 columns]" ] }, - "execution_count": 35, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -3082,7 +3082,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -3091,7 +3091,7 @@ "" ] }, - "execution_count": 36, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, @@ -3121,7 +3121,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -3142,7 +3142,7 @@ " 4.26585366, 0.99751983]])" ] }, - "execution_count": 30, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -3154,7 +3154,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -3175,7 +3175,7 @@ " nan, nan]])" ] }, - "execution_count": 37, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -3195,7 +3195,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -3238,7 +3238,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -3247,7 +3247,7 @@ "(array([19, 19, 19, ..., 28, 28, 8], dtype=int8), (6220,))" ] }, - "execution_count": 45, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -3259,7 +3259,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -3318,7 +3318,7 @@ " (839,))" ] }, - "execution_count": 47, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -3337,7 +3337,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -3355,7 +3355,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -3373,20 +3373,20 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
Pipeline(steps=[('transformer',\n",
+       "
Pipeline(steps=[('transformer',\n",
        "                 Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),\n",
        "                                 ('scaler', StandardScaler())])),\n",
-       "                ('classifier', LogisticRegression(max_iter=500))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),\n",
+       "                ('scaler', StandardScaler())])
SimpleImputer(strategy='median')
StandardScaler()
LogisticRegression(max_iter=500)
" ], "text/plain": [ "Pipeline(steps=[('transformer',\n", @@ -3395,7 +3395,7 @@ " ('classifier', LogisticRegression(max_iter=500))])" ] }, - "execution_count": 51, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -3414,7 +3414,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -3423,7 +3423,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -3432,7 +3432,7 @@ "(839,)" ] }, - "execution_count": 53, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -3450,7 +3450,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -3459,7 +3459,7 @@ "0.7377830750893921" ] }, - "execution_count": 54, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -3470,7 +3470,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -3530,7 +3530,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -3562,7 +3562,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -3571,7 +3571,7 @@ "0.7158235028382088" ] }, - "execution_count": 56, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -3614,7 +3614,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -3637,12 +3637,15 @@ " self.pipe = make_pipeline(self.transformer, self.model)\n", "\n", " def fit(self, X, y):\n", + " X = X.drop([\"groups\"], axis=1)\n", " self.pipe.fit(X, y)\n", "\n", " def predict(self, X):\n", + " X = X.drop([\"groups\"], axis=1)\n", " return self.pipe.predict(X)\n", "\n", " def predict_proba(self, X):\n", + " X = X.drop([\"groups\"], axis=1)\n", " return self.pipe.predict_proba(X)" ] }, @@ -3656,7 +3659,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -3669,20 +3672,20 @@ "\u001b[38;5;178m\u001b[1mTraining submissions/starting_kit ...\u001b[0m\n", "\u001b[38;5;178m\u001b[1mCV fold 0\u001b[0m\n", "\t\u001b[38;5;178m\u001b[1mscore bal_acc acc time\u001b[0m\n", - "\t\u001b[38;5;10m\u001b[1mtrain\u001b[0m \u001b[38;5;10m\u001b[1m0.833\u001b[0m \u001b[38;5;150m0.864\u001b[0m \u001b[38;5;150m1.809391\u001b[0m\n", - "\t\u001b[38;5;12m\u001b[1mvalid\u001b[0m \u001b[38;5;12m\u001b[1m0.682\u001b[0m \u001b[38;5;105m0.713\u001b[0m \u001b[38;5;105m0.008721\u001b[0m\n", - "\t\u001b[38;5;1m\u001b[1mtest\u001b[0m \u001b[38;5;1m\u001b[1m0.635\u001b[0m \u001b[38;5;218m0.695\u001b[0m \u001b[38;5;218m0.001404\u001b[0m\n", + "\t\u001b[38;5;10m\u001b[1mtrain\u001b[0m \u001b[38;5;10m\u001b[1m0.833\u001b[0m \u001b[38;5;150m0.864\u001b[0m \u001b[38;5;150m1.835487\u001b[0m\n", + "\t\u001b[38;5;12m\u001b[1mvalid\u001b[0m \u001b[38;5;12m\u001b[1m0.682\u001b[0m \u001b[38;5;105m0.713\u001b[0m \u001b[38;5;105m0.020979\u001b[0m\n", + "\t\u001b[38;5;1m\u001b[1mtest\u001b[0m \u001b[38;5;1m\u001b[1m0.635\u001b[0m \u001b[38;5;218m0.695\u001b[0m \u001b[38;5;218m0.005458\u001b[0m\n", "\u001b[38;5;178m\u001b[1mCV fold 1\u001b[0m\n", "\t\u001b[38;5;178m\u001b[1mscore bal_acc acc time\u001b[0m\n", - "\t\u001b[38;5;10m\u001b[1mtrain\u001b[0m \u001b[38;5;10m\u001b[1m0.826\u001b[0m \u001b[38;5;150m0.879\u001b[0m \u001b[38;5;150m1.175791\u001b[0m\n", - "\t\u001b[38;5;12m\u001b[1mvalid\u001b[0m \u001b[38;5;12m\u001b[1m0.671\u001b[0m \u001b[38;5;105m0.692\u001b[0m \u001b[38;5;105m0.012650\u001b[0m\n", - "\t\u001b[38;5;1m\u001b[1mtest\u001b[0m \u001b[38;5;1m\u001b[1m0.609\u001b[0m \u001b[38;5;218m0.648\u001b[0m \u001b[38;5;218m0.001484\u001b[0m\n", + "\t\u001b[38;5;10m\u001b[1mtrain\u001b[0m \u001b[38;5;10m\u001b[1m0.826\u001b[0m \u001b[38;5;150m0.879\u001b[0m \u001b[38;5;150m1.536761\u001b[0m\n", + "\t\u001b[38;5;12m\u001b[1mvalid\u001b[0m \u001b[38;5;12m\u001b[1m0.670\u001b[0m \u001b[38;5;105m0.692\u001b[0m \u001b[38;5;105m0.013410\u001b[0m\n", + "\t\u001b[38;5;1m\u001b[1mtest\u001b[0m \u001b[38;5;1m\u001b[1m0.609\u001b[0m \u001b[38;5;218m0.648\u001b[0m \u001b[38;5;218m0.004637\u001b[0m\n", "\u001b[38;5;178m\u001b[1m----------------------------\u001b[0m\n", "\u001b[38;5;178m\u001b[1mMean CV scores\u001b[0m\n", "\u001b[38;5;178m\u001b[1m----------------------------\u001b[0m\n", "\t\u001b[38;5;178m\u001b[1mscore bal_acc acc time\u001b[0m\n", - "\t\u001b[38;5;10m\u001b[1mtrain\u001b[0m \u001b[38;5;10m\u001b[1m0.829\u001b[0m \u001b[38;5;150m\u001b[38;5;150m\u001b[38;5;150m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;150m0.0035\u001b[0m \u001b[38;5;150m0.871\u001b[0m \u001b[38;5;150m\u001b[38;5;150m\u001b[38;5;150m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;150m0.0076\u001b[0m \u001b[38;5;150m1.5\u001b[0m \u001b[38;5;150m\u001b[38;5;150m\u001b[38;5;150m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;150m0.32\u001b[0m\n", - "\t\u001b[38;5;12m\u001b[1mvalid\u001b[0m \u001b[38;5;12m\u001b[1m0.676\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m055\u001b[0m \u001b[38;5;105m0.703\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m103\u001b[0m \u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m\n", + "\t\u001b[38;5;10m\u001b[1mtrain\u001b[0m \u001b[38;5;10m\u001b[1m0.829\u001b[0m \u001b[38;5;150m\u001b[38;5;150m\u001b[38;5;150m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;150m0.0035\u001b[0m \u001b[38;5;150m0.871\u001b[0m \u001b[38;5;150m\u001b[38;5;150m\u001b[38;5;150m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;150m0.0076\u001b[0m \u001b[38;5;150m1.7\u001b[0m \u001b[38;5;150m\u001b[38;5;150m\u001b[38;5;150m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;150m0.15\u001b[0m\n", + "\t\u001b[38;5;12m\u001b[1mvalid\u001b[0m \u001b[38;5;12m\u001b[1m0.676\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m056\u001b[0m \u001b[38;5;105m0.703\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m103\u001b[0m \u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m\u001b[38;5;105m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;105m\u001b[38;5;105m0.0\u001b[0m\u001b[0m\n", "\t\u001b[38;5;1m\u001b[1mtest\u001b[0m \u001b[38;5;1m\u001b[1m0.622\u001b[0m \u001b[38;5;218m\u001b[38;5;218m\u001b[38;5;218m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;218m\u001b[38;5;218m\u001b[38;5;218m0.0\u001b[0m\u001b[0m129\u001b[0m \u001b[38;5;218m0.672\u001b[0m \u001b[38;5;218m\u001b[38;5;218m\u001b[38;5;218m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;218m\u001b[38;5;218m\u001b[38;5;218m0.0\u001b[0m\u001b[0m232\u001b[0m \u001b[38;5;218m\u001b[38;5;218m0.0\u001b[0m\u001b[0m \u001b[38;5;218m\u001b[38;5;218m\u001b[38;5;218m±\u001b[0m\u001b[0m\u001b[0m \u001b[38;5;218m\u001b[38;5;218m0.0\u001b[0m\u001b[0m\n", "\u001b[38;5;178m\u001b[1m----------------------------\u001b[0m\n", "\u001b[38;5;178m\u001b[1mBagged scores\u001b[0m\n",