diff --git a/img/DATAIA-h.png b/img/DATAIA-h.png new file mode 100644 index 0000000..f9ea606 Binary files /dev/null and b/img/DATAIA-h.png differ diff --git a/template_starting_kit.ipynb b/template_starting_kit.ipynb index e15635d..c0cc708 100644 --- a/template_starting_kit.ipynb +++ b/template_starting_kit.ipynb @@ -5,8 +5,8 @@ "metadata": {}, "source": [ "
\n", "\n", "# Template Kit for RAMP challenge\n", @@ -36,7 +36,7 @@ "The goal of this section is to show what's in the data, and how to play with it.\n", "This is the first set in any data science project, and here, you should give a sense of the data the participants will be working with.\n", "\n", - "You can first load and describe the data, and then show some intersting properties of it." + "You can first load and describe the data, and then show some interesting properties of it." ] }, { @@ -83,7 +83,7 @@ "source": [ "## The pipeline workflow\n", "\n", - "The input data are stored in a dataframe. To go from a dataframe to a numpy array we will a scikit-learn column transformer. The first example we will write will just consist in selecting a subset of columns we want to work with." + "The input data are stored in a dataframe. To go from a dataframe to a numpy array we will use a scikit-learn column transformer. The first example we will write will just consist in selecting a subset of columns we want to work with." ] }, { @@ -117,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -129,11 +129,8 @@ } ], "source": [ - "import problem\n", "from sklearn.model_selection import cross_val_score\n", "\n", - "X_df, y = problem.get_train_data()\n", - "\n", "scores = cross_val_score(get_estimator(), X_df, y, cv=5, scoring='accuracy')\n", "print(scores)" ]