diff --git a/docs/api/datasets/fetch_criteo.rst b/docs/api/datasets/fetch_criteo.rst index b3f72da..7a116d2 100644 --- a/docs/api/datasets/fetch_criteo.rst +++ b/docs/api/datasets/fetch_criteo.rst @@ -6,4 +6,16 @@ .. autofunction:: sklift.datasets.datasets.fetch_criteo -.. include:: ../../../sklift/datasets/descr/criteo.rst \ No newline at end of file +.. include:: ../../../sklift/datasets/descr/criteo.rst + +About Criteo +################## + +.. figure:: https://upload.wikimedia.org/wikipedia/commons/d/d2/Criteo_logo21.svg + +`Criteo `__ is an advertising company that provides online display advertisements. +The company was founded and is headquartered in Paris, France. Criteo's product is a form of display advertising, +which displays interactive banner advertisements, generated based on the online browsing preferences and behaviour for each customer. +The solution operates on a pay per click/cost per click (CPC) basis. + +Link to the company's website: https://www.criteo.com/ \ No newline at end of file diff --git a/docs/api/datasets/fetch_hillstrom.rst b/docs/api/datasets/fetch_hillstrom.rst index d71d722..8eb036f 100644 --- a/docs/api/datasets/fetch_hillstrom.rst +++ b/docs/api/datasets/fetch_hillstrom.rst @@ -6,4 +6,12 @@ .. autofunction:: sklift.datasets.datasets.fetch_hillstrom -.. include:: ../../../sklift/datasets/descr/hillstrom.rst \ No newline at end of file +.. include:: ../../../sklift/datasets/descr/hillstrom.rst + +About Hillstrom +################## + +The dataset was provided by Kevin Hillstorm. +Kevin is President of MineThatData, a consultancy that helps CEOs understand the complex relationship between Customers, Advertising, Products, Brands, and Channels. + +Link to the blog: https://blog.minethatdata.com/ \ No newline at end of file diff --git a/docs/api/datasets/fetch_lenta.rst b/docs/api/datasets/fetch_lenta.rst index dd2f225..65d5ecc 100644 --- a/docs/api/datasets/fetch_lenta.rst +++ b/docs/api/datasets/fetch_lenta.rst @@ -6,4 +6,14 @@ .. autofunction:: sklift.datasets.datasets.fetch_lenta -.. include:: ../../../sklift/datasets/descr/lenta.rst \ No newline at end of file +.. include:: ../../../sklift/datasets/descr/lenta.rst + +About Lenta +################## + +.. figure:: https://upload.wikimedia.org/wikipedia/commons/7/73/Lenta_logo.svg + +`Lenta (Russian: Лентa) `__ is a Russian super - and hypermarket chain. With 149 locations across the country, +it is one of Russia's largest retail chains in addition to being the country's second largest hypermarket chain. + +Link to the company's website: https://www.lenta.com/ \ No newline at end of file diff --git a/docs/api/datasets/fetch_megafon.rst b/docs/api/datasets/fetch_megafon.rst index 0829df8..8af63ea 100644 --- a/docs/api/datasets/fetch_megafon.rst +++ b/docs/api/datasets/fetch_megafon.rst @@ -6,4 +6,14 @@ .. autofunction:: sklift.datasets.datasets.fetch_megafon -.. include:: ../../../sklift/datasets/descr/megafon.rst \ No newline at end of file +.. include:: ../../../sklift/datasets/descr/megafon.rst + +About MegaFon +################## + +.. figure:: https://upload.wikimedia.org/wikipedia/commons/9/9e/MegaFon_logo.svg + +`MegaFon (Russian: МегаФон) `__ , previously known as North-West GSM, is the second largest mobile phone operator and the third largest telecom operator in Russia. +It works in the GSM, UMTS and LTE standard. As of June 2012, the company serves 62.1 million subscribers in Russia and 1.6 million in Tajikistan. It is headquartered in Moscow. + +Link to the company's website: https://megafon.ru/ \ No newline at end of file diff --git a/docs/api/datasets/fetch_x5.rst b/docs/api/datasets/fetch_x5.rst index cb42b2f..fdda3a7 100644 --- a/docs/api/datasets/fetch_x5.rst +++ b/docs/api/datasets/fetch_x5.rst @@ -6,4 +6,16 @@ .. autofunction:: sklift.datasets.datasets.fetch_x5 -.. include:: ../../../sklift/datasets/descr/x5.rst \ No newline at end of file +.. include:: ../../../sklift/datasets/descr/x5.rst + +About X5 +################## + +.. figure:: https://upload.wikimedia.org/wikipedia/en/8/83/X5_Retail_Group_logo_2015.png + +`X5 Group `__ is a leading Russian food retailer. +The Company operates several retail formats: proximity stores under the Pyaterochka brand, +supermarkets under the Perekrestok brand and hypermarkets under the Karusel brand, as well as the Perekrestok.ru online market, +the 5Post parcel and Dostavka.Pyaterochka and Perekrestok. Bystro food delivery services. + +Link to the company's website: https://www.x5.ru/ \ No newline at end of file diff --git a/docs/changelog.md b/docs/changelog.md index 642b1fc..849f5d2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -8,6 +8,13 @@ * 🔨 something that previously didn’t work as documented – or according to reasonable expectations – should now work. * ❗️ you will need to change your code to have the same effect in the future; or a feature will be removed in the future. +## Version 0.4.1 + +### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html) + +* 🔨 Fix bug in dataset links. +* 📝 Add about a company section + ## Version 0.4.0 ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.4.0/api/index/metrics.html) diff --git a/notebooks/uplift_metrics_tutorial_advanced.ipynb b/notebooks/uplift_metrics_tutorial_advanced.ipynb new file mode 100644 index 0000000..9906d35 --- /dev/null +++ b/notebooks/uplift_metrics_tutorial_advanced.ipynb @@ -0,0 +1,2166 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "uplift_metrics_tutorial_advanced.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "sklift-env", + "language": "python", + "name": "sklift-env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "12a2acaf31694c63a2813f34d37300dd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_7553ee7087e245e694f7f85837e088ec", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_60d81df4c86240999f4be223efec1533", + "IPY_MODEL_db7f043fbefb4dcda7d3486cffd28faf", + "IPY_MODEL_f7418cb987f9437c9627dceea703ecd6" + ] + } + }, + "7553ee7087e245e694f7f85837e088ec": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "60d81df4c86240999f4be223efec1533": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_9f9467cab1f2430fa89e2fc396ced9f8", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_ecf054c1199c45389e7dbf94c5ac41df" + } + }, + "db7f043fbefb4dcda7d3486cffd28faf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_f309aeffa2824070afc241d3477c6bcd", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "", + "max": 144735744, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 144735744, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_353821f9d47f4434b71b956c3946ad01" + } + }, + "f7418cb987f9437c9627dceea703ecd6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_60a724915a0a4c9f86197e8b7066b513", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 145M/145M [00:20<00:00, 29.6MiB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_16efe430cd6a4b608dd690598ece62cb" + } + }, + "9f9467cab1f2430fa89e2fc396ced9f8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "ecf054c1199c45389e7dbf94c5ac41df": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f309aeffa2824070afc241d3477c6bcd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "353821f9d47f4434b71b956c3946ad01": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "60a724915a0a4c9f86197e8b7066b513": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "16efe430cd6a4b608dd690598ece62cb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ATqE_EOoymEd" + }, + "source": [ + "# 🎯 Uplift modeling `metrics` advanced\n", + "\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + " SCIKIT-UPLIFT REPO | \n", + " SCIKIT-UPLIFT DOCS | \n", + " USER GUIDE\n", + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jmg3AprtymEg" + }, + "source": [ + "import sys\n", + "\n", + "# install uplift library scikit-uplift and other libraries \n", + "!{sys.executable} -m pip install scikit-uplift dill catboost\n", + "from IPython.display import clear_output\n", + "clear_output()" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cHFiqvnrymEh" + }, + "source": [ + "# 📝 Load data\n", + "\n", + "We are going to use a `Lenta dataset` from the BigTarget Hackathon hosted in summer 2020 by Lenta and Microsoft.\n", + "\n", + "Lenta is a russian food retailer. \n", + "\n", + "### Data description\n", + "\n", + "✏️ Dataset can be loaded from `sklift.datasets` module using `fetch_lenta` function.\n", + "\n", + "Read more about dataset in the api docs. \n", + "\n", + "This is an uplift modeling dataset containing data about Lenta's customers grociery shopping, marketing campaigns communications as `treatment` and store visits as `target`.\n", + "\n", + "#### ✏️ Major columns:\n", + "\n", + "- `group` - treatment / control flag\n", + "- `response_att` - binary target\n", + "- `CardHolder` - customer id\n", + "- `gender` - customer gender \n", + "- `age` - customer age" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5o0Hm-iqymEi", + "scrolled": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "12a2acaf31694c63a2813f34d37300dd", + "7553ee7087e245e694f7f85837e088ec", + "60d81df4c86240999f4be223efec1533", + "db7f043fbefb4dcda7d3486cffd28faf", + "f7418cb987f9437c9627dceea703ecd6", + "9f9467cab1f2430fa89e2fc396ced9f8", + "ecf054c1199c45389e7dbf94c5ac41df", + "f309aeffa2824070afc241d3477c6bcd", + "353821f9d47f4434b71b956c3946ad01", + "60a724915a0a4c9f86197e8b7066b513", + "16efe430cd6a4b608dd690598ece62cb" + ] + }, + "outputId": "233891ac-d09c-440f-8d54-50ea3ebfc3cd" + }, + "source": [ + "from sklift.datasets import fetch_lenta\n", + "\n", + "# returns sklearn Bunch object\n", + "# with data, target, treatment keys\n", + "# data features (pd.DataFrame), target (pd.Series), treatment (pd.Series) values \n", + "dataset = fetch_lenta()" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "12a2acaf31694c63a2813f34d37300dd", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + " 0%| | 0.00/145M [00:00\n", + "\n", + "Dataset features shape: (687029, 193)\n", + "Dataset target shape: (687029,)\n", + "Dataset treatment shape: (687029,)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F4-jlzDbymEk" + }, + "source": [ + "# 📝 EDA" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 406 + }, + "id": "t7kA0MxxymEk", + "outputId": "d5d5637e-ce85-4fcb-eb8c-695733919b99" + }, + "source": [ + "dataset.data.head().append(dataset.data.tail())" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agecheque_count_12m_g20cheque_count_12m_g21cheque_count_12m_g25cheque_count_12m_g32cheque_count_12m_g33cheque_count_12m_g38cheque_count_12m_g39cheque_count_12m_g41cheque_count_12m_g42cheque_count_12m_g45cheque_count_12m_g46cheque_count_12m_g48cheque_count_12m_g52cheque_count_12m_g56cheque_count_12m_g57cheque_count_12m_g58cheque_count_12m_g79cheque_count_3m_g20cheque_count_3m_g21cheque_count_3m_g25cheque_count_3m_g42cheque_count_3m_g45cheque_count_3m_g52cheque_count_3m_g56cheque_count_3m_g57cheque_count_3m_g79cheque_count_6m_g20cheque_count_6m_g21cheque_count_6m_g25cheque_count_6m_g32cheque_count_6m_g33cheque_count_6m_g38cheque_count_6m_g39cheque_count_6m_g40cheque_count_6m_g41cheque_count_6m_g42cheque_count_6m_g45cheque_count_6m_g46cheque_count_6m_g48...perdelta_days_between_visits_15_30dpromo_share_15dresponse_smsresponse_vibersale_count_12m_g32sale_count_12m_g33sale_count_12m_g49sale_count_12m_g54sale_count_12m_g57sale_count_3m_g24sale_count_3m_g33sale_count_3m_g57sale_count_6m_g24sale_count_6m_g25sale_count_6m_g32sale_count_6m_g33sale_count_6m_g44sale_count_6m_g54sale_count_6m_g57sale_sum_12m_g24sale_sum_12m_g25sale_sum_12m_g26sale_sum_12m_g27sale_sum_12m_g32sale_sum_12m_g44sale_sum_12m_g54sale_sum_3m_g24sale_sum_3m_g26sale_sum_3m_g32sale_sum_3m_g33sale_sum_6m_g24sale_sum_6m_g25sale_sum_6m_g26sale_sum_6m_g32sale_sum_6m_g33sale_sum_6m_g44sale_sum_6m_g54stdev_days_between_visits_15dstdev_discount_depth_15dstdev_discount_depth_1m
047.03.022.019.03.028.08.07.06.01.013.012.016.03.015.011.00.04.00.07.08.00.05.01.06.06.01.00.012.09.01.06.04.02.05.01.00.05.05.06.0...1.33930.58210.9230770.07142910.084.31498.016.011.0137.28228.7766.0169.65810.6807.028.77621.08.09.04469.86658.851286.327736.05418.803233.31811.732321.61182.82283.843648.233141.25356.67237.25283.843648.231195.37535.421.70780.27980.3008
157.01.00.02.01.01.01.00.01.00.01.00.01.00.00.00.00.01.00.00.02.00.01.00.00.00.01.01.00.02.01.01.01.00.03.01.00.01.00.01.0...0.00000.00001.0000000.0000001.01.0002.02.00.00.0001.0000.01.7442.0001.01.0000.02.00.0113.3962.6958.7193.3587.010.00122.980.0058.7187.01179.83113.3962.6958.7187.01179.830.00122.980.00000.00000.0000
238.07.00.015.04.09.05.09.014.07.06.010.014.05.011.00.03.02.02.00.03.02.01.01.00.00.02.06.00.09.02.05.01.07.07.08.03.02.06.06.0...0.00000.72561.0000000.2500005.021.10250.0109.00.00.0007.5940.025.29411.0843.011.15831.059.00.01564.91971.09177.933257.49975.212555.276351.290.000.000.00783.871239.19533.4683.37593.131217.431336.833709.820.0000NaN0.0803
365.06.03.025.02.010.014.011.08.01.00.02.06.07.02.00.00.00.01.00.05.00.00.01.00.00.00.02.01.011.02.03.05.05.04.02.01.00.01.03.0...0.00000.00000.9090910.0000002.012.54449.039.00.00.0002.7780.02.00034.2122.03.7782.013.00.0358.223798.18680.931425.07175.73602.813544.760.00119.9973.24346.74139.681849.91360.40175.73496.73172.581246.210.00000.00000.0000
461.00.01.02.00.02.01.00.03.02.01.01.05.05.00.00.00.01.00.01.01.00.00.02.00.00.01.00.01.02.00.02.01.00.08.02.02.01.01.04.0...0.00000.78651.0000000.1000000.01.45425.025.00.00.0000.4540.03.03612.0000.01.4548.023.00.0226.98168.05960.371560.210.00342.451039.850.0066.180.0087.94226.98168.05461.370.00237.93225.51995.271.41420.34950.3495
68702435.00.00.04.00.02.00.01.00.03.02.02.03.02.01.00.01.00.00.00.03.02.01.02.01.00.00.00.00.03.00.02.00.00.05.00.02.02.02.02.0...1.33330.40020.0000000.1666670.03.00014.02.00.019.8563.0000.019.85629.0000.03.00015.01.00.0550.09695.32111.87114.210.001173.84147.68550.09111.870.00330.96550.09669.33111.870.00330.961173.84119.992.64580.36460.3282
68702533.00.00.00.00.00.00.00.00.00.00.00.02.00.00.00.00.00.0NaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.00.00.00.01.00.00.00.00.02.0...0.00000.00001.0000000.0000000.00.0001.01.00.0NaNNaNNaN0.0000.0000.00.0000.01.00.00.000.000.000.000.000.0028.01NaNNaNNaNNaN0.000.000.000.000.000.0028.010.00000.00000.0000
68702636.00.00.03.00.00.00.00.01.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0...0.00000.98471.0000000.0000000.00.0005.03.00.00.0000.0000.00.0000.0000.00.00015.00.00.00.00155.9723.9941.510.00615.7787.470.000.000.000.000.000.000.000.000.00449.010.000.0000NaNNaN
68702737.00.01.02.00.00.00.00.00.01.00.01.00.01.00.00.00.00.00.00.01.00.00.01.00.00.00.00.00.01.00.00.00.00.01.00.00.00.00.00.0...0.00000.83181.0000000.0000000.00.0001.00.00.00.0000.0000.00.0000.4760.00.0000.00.00.00.0081.9029.820.000.000.000.000.000.000.000.000.0046.720.000.000.000.000.000.0000NaNNaN
68702840.00.01.00.00.02.00.00.02.02.02.02.03.01.01.02.01.04.00.01.00.01.00.00.01.01.03.00.01.00.00.01.00.00.00.00.01.00.02.02.0...0.00000.00001.0000000.1000000.06.45225.017.03.06.6601.3441.06.6600.0000.01.34418.04.01.0531.250.000.00916.440.002407.561304.03290.010.000.00228.47290.010.000.000.00228.47752.32596.860.00000.00000.0000
\n", + "

10 rows × 193 columns

\n", + "
" + ], + "text/plain": [ + " age ... stdev_discount_depth_1m\n", + "0 47.0 ... 0.3008\n", + "1 57.0 ... 0.0000\n", + "2 38.0 ... 0.0803\n", + "3 65.0 ... 0.0000\n", + "4 61.0 ... 0.3495\n", + "687024 35.0 ... 0.3282\n", + "687025 33.0 ... 0.0000\n", + "687026 36.0 ... NaN\n", + "687027 37.0 ... NaN\n", + "687028 40.0 ... 0.0000\n", + "\n", + "[10 rows x 193 columns]" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cNSQsJcqymEk" + }, + "source": [ + "### 🤔 target share for `treatment / control` " + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 142 + }, + "id": "d0BPrhjnymEl", + "outputId": "a57f8423-1dd4-42ed-f2d4-63b939077770" + }, + "source": [ + "import pandas as pd \n", + "\n", + "pd.crosstab(dataset.treatment, dataset.target, normalize='index')" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
response_att01
group
control0.8974210.102579
test0.8898740.110126
\n", + "
" + ], + "text/plain": [ + "response_att 0 1\n", + "group \n", + "control 0.897421 0.102579\n", + "test 0.889874 0.110126" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "q48zr_exymEl" + }, + "source": [ + "# make treatment binary\n", + "treat_dict = {\n", + " 'test': 1,\n", + " 'control': 0\n", + "}\n", + "\n", + "dataset.treatment = dataset.treatment.map(treat_dict)" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9oGypq_JymEm", + "outputId": "90909780-1bb9-4166-90a2-1f343803959e" + }, + "source": [ + "# fill NaNs in the categorical feature `gender` \n", + "# for CatBoostClassifier\n", + "dataset.data['gender'] = dataset.data['gender'].fillna(value='Не определен')\n", + "\n", + "print(dataset.data['gender'].value_counts(dropna=False))" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Ж 433448\n", + "М 243910\n", + "Не определен 9671\n", + "Name: gender, dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKHT1JzsymEm" + }, + "source": [ + "### ✂️ train test split\n", + "\n", + "- stratify by two columns: treatment and target. \n", + "\n", + "`Intuition:` In a binary classification problem definition we stratify train set by splitting target `0/1` column. In uplift modeling we have two columns instead of one. " + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u4lM58UMymEm", + "outputId": "84cc2abf-b854-41a4-8bb8-bbddeb97baab" + }, + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "stratify_cols = pd.concat([dataset.treatment, dataset.target], axis=1)\n", + "\n", + "X_train, X_val, trmnt_train, trmnt_val, y_train, y_val = train_test_split(\n", + " dataset.data,\n", + " dataset.treatment,\n", + " dataset.target,\n", + " stratify=stratify_cols,\n", + " test_size=0.3,\n", + " random_state=42\n", + ")\n", + "\n", + "print(f\"Train shape: {X_train.shape}\")\n", + "print(f\"Validation shape: {X_val.shape}\")" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Train shape: (480920, 193)\n", + "Validation shape: (206109, 193)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BYzpcKwuymEn" + }, + "source": [ + "# 👾 Class Transformation uplift model and Two Models\n", + "\n", + "### For example, let's take the models [ Class Transformation ](https://github.com/maks-sh/scikit-uplift/blob/c9dd56aa0277e81ef7c4be62bf2fd33432e46f36/sklift/models/models.py#L181) and [Two Models](https://github.com/maks-sh/scikit-uplift/blob/c9dd56aa0277e81ef7c4be62bf2fd33432e46f36/sklift/models/models.py#L271). Let's display their uplift scores on one graph" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PBwZVdIEymEn" + }, + "source": [ + "from catboost import CatBoostClassifier\n", + "from sklearn.base import clone\n", + "\n", + "from sklift.models import TwoModels\n", + "from sklift.models import ClassTransformation\n", + "\n", + "first_estimator = CatBoostClassifier(verbose=100,\n", + " task_type=\"GPU\",\n", + " devices='0:1',\n", + " cat_features=['gender'],\n", + " random_state=42,\n", + " thread_count=1)\n", + "second_estimator = clone(first_estimator)\n", + "\n", + "transform_model = ClassTransformation(estimator=first_estimator)\n", + "two_model = TwoModels(estimator_trmnt=first_estimator, estimator_ctrl=second_estimator)" + ], + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6IcMWundymEn", + "outputId": "5f59657c-2a7d-47aa-cb21-89a900e47e23" + }, + "source": [ + "transform_model = transform_model.fit(\n", + " X=X_train, \n", + " y=y_train, \n", + " treatment=trmnt_train\n", + ")\n", + "\n", + "two_model = two_model.fit(\n", + " X=X_train, \n", + " y=y_train, \n", + " treatment=trmnt_train\n", + ")" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Learning rate set to 0.024003\n", + "0:\tlearn: 0.6893849\ttotal: 59ms\tremaining: 58.9s\n", + "100:\tlearn: 0.6100331\ttotal: 5.39s\tremaining: 48s\n", + "200:\tlearn: 0.6019326\ttotal: 12s\tremaining: 47.8s\n", + "300:\tlearn: 0.6000429\ttotal: 18.7s\tremaining: 43.4s\n", + "400:\tlearn: 0.5992161\ttotal: 25.4s\tremaining: 37.9s\n", + "500:\tlearn: 0.5986674\ttotal: 32s\tremaining: 31.8s\n", + "600:\tlearn: 0.5982996\ttotal: 38.5s\tremaining: 25.5s\n", + "700:\tlearn: 0.5980941\ttotal: 44.8s\tremaining: 19.1s\n", + "800:\tlearn: 0.5979237\ttotal: 51.2s\tremaining: 12.7s\n", + "900:\tlearn: 0.5976503\ttotal: 57.5s\tremaining: 6.32s\n", + "999:\tlearn: 0.5975015\ttotal: 1m 3s\tremaining: 0us\n", + "Learning rate set to 0.02591\n", + "0:\tlearn: 0.6711650\ttotal: 23.1ms\tremaining: 23.1s\n", + "100:\tlearn: 0.2887976\ttotal: 3.03s\tremaining: 27s\n", + "200:\tlearn: 0.2763838\ttotal: 6.53s\tremaining: 26s\n", + "300:\tlearn: 0.2729584\ttotal: 10.2s\tremaining: 23.7s\n", + "400:\tlearn: 0.2713649\ttotal: 13.9s\tremaining: 20.8s\n", + "500:\tlearn: 0.2703728\ttotal: 17.6s\tremaining: 17.6s\n", + "600:\tlearn: 0.2696703\ttotal: 21.3s\tremaining: 14.1s\n", + "700:\tlearn: 0.2691328\ttotal: 24.9s\tremaining: 10.6s\n", + "800:\tlearn: 0.2686616\ttotal: 28.6s\tremaining: 7.11s\n", + "900:\tlearn: 0.2682632\ttotal: 32.3s\tremaining: 3.55s\n", + "999:\tlearn: 0.2678762\ttotal: 36s\tremaining: 0us\n", + "Learning rate set to 0.024384\n", + "0:\tlearn: 0.6735712\ttotal: 44.9ms\tremaining: 44.9s\n", + "100:\tlearn: 0.3063022\ttotal: 4.82s\tremaining: 42.9s\n", + "200:\tlearn: 0.2925770\ttotal: 10.2s\tremaining: 40.4s\n", + "300:\tlearn: 0.2895685\ttotal: 15.6s\tremaining: 36.3s\n", + "400:\tlearn: 0.2880540\ttotal: 21.3s\tremaining: 31.9s\n", + "500:\tlearn: 0.2872389\ttotal: 26.9s\tremaining: 26.8s\n", + "600:\tlearn: 0.2866951\ttotal: 32.6s\tremaining: 21.6s\n", + "700:\tlearn: 0.2863474\ttotal: 38.1s\tremaining: 16.3s\n", + "800:\tlearn: 0.2860138\ttotal: 43.6s\tremaining: 10.8s\n", + "900:\tlearn: 0.2857359\ttotal: 49.2s\tremaining: 5.41s\n", + "999:\tlearn: 0.2854954\ttotal: 54.8s\tremaining: 0us\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "POrn2kgwymEo" + }, + "source": [ + "### Uplift prediction" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Xx_hHajjymEo" + }, + "source": [ + "uplift_transform_model_val = transform_model.predict(X_val)\n", + "uplift_transform_model_train = transform_model.predict(X_train)\n", + "\n", + "uplift_two_model = two_model.predict(X_val)" + ], + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-5PofV6aymEp" + }, + "source": [ + "# 🚀🚀🚀 Uplift metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SmvFxIALymEp" + }, + "source": [ + "### 🚀 `uplift@k`\n", + "\n", + "- uplift at first k%\n", + "- usually falls between [0; 1] depending on k, model quality and data\n", + "\n", + "\n", + "### `uplift@k` = `target mean at k% in the treatment group` - `target mean at k% in the control group`\n", + "\n", + "___\n", + "\n", + "How to count `uplift@k`:\n", + "\n", + "1. sort by predicted uplift\n", + "2. select first k%\n", + "3. count target mean in the treatment group\n", + "4. count target mean in the control group\n", + "5. substract the mean in the control group from the mean in the treatment group\n", + "\n", + "---\n", + "\n", + "Code parameter options:\n", + "\n", + "- `strategy='overall'` - sort by uplift treatment and control together\n", + "- `strategy='by_group'` - sort by uplift treatment and control separately" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KWBOEv0Z6daH" + }, + "source": [ + "## `🚀uplift@k with a small step ot the k parameter`\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZWjC06aQymEp" + }, + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "from sklift.metrics import uplift_at_k\n", + "\n", + "values_uplift_k_transform = []\n", + "values_uplift_k_two = []\n", + "values_k = []\n", + "for k in np.arange(0.01,1,0.01):\n", + " values_uplift_k_transform.append(uplift_at_k(y_val, uplift_transform_model_val, trmnt_val, strategy='overall', k=k))\n", + " values_uplift_k_two.append(uplift_at_k(y_val, uplift_two_model, trmnt_val, strategy='overall', k=k))\n", + " values_k.append(k)" + ], + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oshHc_VWlKmw" + }, + "source": [ + "### `For ClassTransformation model`" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + }, + "id": "O6tfXwaLlHJV", + "outputId": "03c001f3-b386-4838-a6dd-e7a215b235f2" + }, + "source": [ + "plt.plot(values_k, values_uplift_k_transform)\n", + "plt.title('Dependence of uplift@k on k')\n", + "plt.xlabel('The value of k')\n", + "plt.ylabel('The value of uplift@k')\n", + "plt.show()" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7eRHptiLlXpb" + }, + "source": [ + "### `For TwoModels`" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + }, + "id": "lQGkD3dTlEQn", + "outputId": "ff2debcb-030e-4158-ed64-24216a341985" + }, + "source": [ + "plt.plot(values_k, values_uplift_k_two)\n", + "plt.title('Dependence of uplift@k on k')\n", + "plt.xlabel('The value of k')\n", + "plt.ylabel('The value of uplift@k')\n", + "plt.show()" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KD5ZUzlEJN1b" + }, + "source": [ + "# 🚀 `ASD metric`\n", + "### `The average squared deviation (ASD) is a model stability metric that shows how much the model overfits the training data. Larger values of ASD mean greater overfit.`\n", + "\n", + "## Code parameter options:\n", + "\n", + "- `strategy='overall'` - The first step is taking the first k observations of all test data ordered by uplift prediction (overall both groups - control and treatment) and conversions in treatment and control groups calculated only on them. Then the difference between these conversions is calculated.\n", + "- `strategy='by_group'` - Separately calculates conversions in top k observations in each group (control and treatment) sorted by uplift predictions. Then the difference between these conversions is calculated\n", + "- `bins=10` - Determines the number of bins (and the relative percentile) in the data." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FnqffihzymEv", + "outputId": "30a329cc-387b-4500-8c3e-f8f6ab043f37" + }, + "source": [ + "from sklift.metrics import average_squared_deviation\n", + "\n", + "asd_overall = average_squared_deviation(y_train, uplift_transform_model_train, trmnt_train, y_val,\n", + " uplift_transform_model_val, trmnt_val, strategy='overall')\n", + "asd_by_group = average_squared_deviation(y_train, uplift_transform_model_train, trmnt_train, y_val, \n", + " uplift_transform_model_val, trmnt_val, strategy='by_group')\n", + "\n", + "print(f\"average squared deviation by overall strategy for the ClassTransformation model: {asd_overall:.6f}\")\n", + "print(f\"average squared deviation by group strategy for the ClassTransformation model: {asd_by_group:.6f}\")" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "text": [ + "average squared deviation by overall strategy for the ClassTransformation model: 0.000007\n", + "average squared deviation by group strategy for the ClassTransformation model: 0.000011\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VSg7zXHGG_76" + }, + "source": [ + "# `↗️Display 2 different model uplift scores on one qini plot`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BrRjY_zlYThJ" + }, + "source": [ + "### `Only qiwi curves`" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "id": "-WFTIgynAI28", + "outputId": "b37fb698-7522-44ac-c786-21ea6f8e21a7" + }, + "source": [ + "from sklift.viz import plot_qini_curve\n", + "\n", + "fig, ax_roc = plt.subplots(1, 1)\n", + "plot_qini_curve(y_val, uplift_transform_model_val, trmnt_val, name='Transform model', random=False, perfect=False, ax=ax_roc)\n", + "plot_qini_curve(y_val, uplift_two_model, trmnt_val, name='Two models', random=False, perfect=False, ax=ax_roc)" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 16 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QI7Rn8viY2eF" + }, + "source": [ + "### `Qini curves with a random curve and with a perfect curve`" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "id": "plgwP2Srq7ed", + "outputId": "3cc745f4-b95d-4238-89b7-69127b234339" + }, + "source": [ + "fig, ax_roc = plt.subplots(1, 1)\n", + "plot_qini_curve(y_val, uplift_transform_model_val, trmnt_val, name='Transform model', random=True, perfect=True, ax=ax_roc)\n", + "plot_qini_curve(y_val, uplift_two_model, trmnt_val, name='Two models', random=True, perfect=True, ax=ax_roc)" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 17 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEGCAYAAABB6hAxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeXyTVfY/8M9J0jZJ941SKG0RWkoLlKWDoCCCyiKKCAI6uKC4gPIdR3REHR0dR5YZZUbrKMjPBVABFREZRBEFBxVFisjSUkrBljZdKBTapHuS8/sjTzoBu6S0Sbqc9+uVV/Pc3Od5Tst2uPc+5xIzQwghhBBCXBqVpwMQQgghhOjIJJkSQgghhGgFSaaEEEIIIVpBkikhhBBCiFaQZEoIIYQQohU0ng7A3cLCwjg2NtbTYQghRIeyf//+M8wc7uk4hGiPulwyFRsbi7S0NE+HIYQQHQoR5Xo6BiHaK5dN8xHR20R0moiOOLR9QES/KK8cIvpFaY8loiqHz1Y6nDOMiA4TUTYRpRIRKe0hRLSDiI4rX4Nd9b0IIYQQQjTGlWumVgOY6NjAzLOYeTAzDwbwMYBNDh+fsH/GzPMc2lcAuA9AnPKyX/MJAF8zcxyAr5VjIYQQQgi3clkyxcy7AZQ29JkyujQTwPqmrkFEkQACmPlHtpVqXwtgqvLxTQDWKO/XOLQLIYQQQriNp9ZMjQZQzMzHHdp6E9EBAOUAnmbmbwH0BJDv0CdfaQOACGYuVN4XAYho7GZEdD+A+wEgOjq6bb4DIYTo4vbv399No9G8CWAA5Olw0XlZARwxm833Dhs27HRDHTyVTN2GC0elCgFEM/NZIhoGYDMRJTl7MWZmImp0k0FmXgVgFQCkpKTIZoRCCNEGNBrNm927d+8fHh5+TqVSyd+tolOyWq1UUlKSWFRU9CaAKQ31cfv/JIhIA2AagA/sbcxcw8xnlff7AZwAEA/AACDK4fQopQ0AipVpQPt0YIPZohBCCJcZEB4eXi6JlOjMVCoVh4eHl8E2AttwHzfGY3ctgExmrp++I6JwIlIr7y+DbaH5SWUar5yIRijrrO4E8Kly2hYAdynv73JoF0II4R4qSaREV6D8Pm80Z3LZNB8RrQdwNYAwIsoH8CwzvwXgVvx24flVAJ4nojrY5ibnMbN98fqDsD0ZqAPwufICgGUAPiSiuQByYVvQ3qHllefhPyf/AytbPR2KEKKD8VZ7Y1a/WQj0CfR0KEJ0OS5Lppj5tkba5zTQ9jFspRIa6p+GBobWlGnBa1oXZfvy7tF3sT5zPQjk6VCEEB0IwzY4FK4Lx81xN3s4GiG6ni5XAb09yzfmo39If3x444eeDkUI0YGU1ZRh1IZRqKir8HQoblVUVKS++uqr+wHAmTNnvFQqFYeEhJgB4Jdffjmq1WrbbArywIED2ttuu+0yIsLGjRtPJCUl1bTVtV3l2LFj3jfccEPc8ePH0y+lT25urtecOXNidu3alW1vu+eee3pt3bo1uLCw8JBarQYALFy4sIefn5/l+eefL7b369mz58C0tLSjkZGR5lOnTmkefPDB6IMHD+oDAgIsYWFhda+++mreoEGDLvlnWFVVRbfcckvvw4cP64OCgswfffTRyX79+tVe3G/jxo0Bjz32WLTVasXtt99+ZsmSJUUAsGTJkvCVK1dG5OXl+RQUFByMjIw0A8D69esD9+7d6/vyyy8XtCQeeZS1HTGYDOjh18PTYQghOhitRgsAqDJXeTgS9+revbslMzMzIzMzM+POO+8smTdvXrH9WKvVcl1dXZvd66OPPgqaMmXKuaNHj2Y4k0hZrVZYLJY2u78nLFmyJGLu3Lln7McWiwVffPFFUGRkZO22bdv8nbmG1WrFlClT+l511VXGvLy8I+np6UeXLVtmKCgo8GpNbK+88kpYYGCg+dSpU0cWLFhQvHDhwqiL+5jNZjzyyCPR27Zty8rKykr/+OOPQ/bv368FgDFjxph27NiR1aNHjwsSsFmzZpVt3749yGg0tig/kpGpdoKZUWAqwKieozwdihCig/FWeUNFKo8mU3/aeLBXVpFR35bXjO/uX/niLcl5LTln+vTpsT4+PtYjR47ohw8fbpo9e3bpI488El1TU6PSarXW1atX/5qcnFyTmpoaunXr1qCqqirVqVOnfCZNmnR+5cqV+WazGbNmzYo9dOiQLxHx7NmzzyQkJNSsWrUqQqVS8X//+1//vXv3Zj333HMR77//fhgA3HHHHSV/+ctfTh87dsx7woQJ8UOGDDEdPnzY95VXXsldsGBBzNChQyv279/vN2jQoIp77rnnzPPPP9/z7NmzmtWrV58cO3ZspWP8qampoVu2bAmqrKxU5ebmah966KGi2tpa1QcffBDq7e1t/fLLL49HRERY9uzZo5s/f35MVVWVKiYmpmbdunU54eHhlm+//VZ/7733xgLA1VdfXW6/rtlsxkMPPRT1/fff+9fW1tJ99913+k9/+tMZNOGzzz4Lfvnllw0Ox/5xcXFVt9xyy7l169aF3Hjjjcbmfj22bt3qr9Fo+PHHHy+xt40cObLVv1G3bt0a9NxzzxUAwN13331u0aJF0VarFSrV/3Kgb775xjcmJqYmMTGxFgCmTZtWunHjxqBhw4YVXXnllQ3GoFKpcMUVVxg/+OCDwHvvvfecs/HIyFQ7cbb6LKot1ejp17P5zkII4YCIoNPoutzIVGMKCwu9f/7558w333wzPzk5uXrfvn2ZR48ezXj22WcNjz/+eP0IRkZGhn7z5s0njx49mr5ly5bg7Oxsrx9++EFfWFjodfz48fSsrKyMhx566OysWbPK7CNfe/fuzfr222/169atC92/f//RtLS0o2vXrg3//vvvdQBw6tQpnwULFpRkZ2en9+3btzYvL0+7aNGi4hMnThw5ceKE9v333w9NS0vLXLx4cf7ixYsjG4o/KytL99lnn53Yt2/f0aVLl/bU6/XWo0ePZqSkpFS88cYboQAwZ86c3kuWLMnPysrKSEpKqlq0aFEPAJg7d27syy+/fOrYsWMZjtd8+eWXwwIDAy1Hjhw5evDgwaNr1qwJz8zM9G7sZ5iZmekdGBho1ul09VOl69atC5k5c2bp7Nmzz3399deBNTU1zS7wPXTokC45ObmyuX4AMGzYsH4JCQmJF782b978m1Gw4uJi7969e9cCgJeXF/z8/CzFxcUXDBDl5eV59+zZs37kKSoqqtZgMDT6PdulpKRUfPvtt37OxGwnI1PthMFkS/6j/H8zUimEEM3ydDLV0hEkV5o2bdo5jcb2z1tpaal61qxZvXNycrRExHV1dfUJwKhRo8pDQ0MtANC3b9/qEydO+AwdOrQqLy/P56677up14403lt18883lF1//m2++8bv++uvPBwQEWAFg8uTJ53bt2uU/Y8aM85GRkbXXXHNN/eK1nj171gwfPrwKAOLj46vGjRtXrlKpMHTo0MoXXnihwXUdV1xxhTE4ONgaHBxs9fPzs8yYMeM8AAwcOLDy0KFD+rNnz6qNRqN68uTJJgC47777zs6YMeOyM2fOqI1Go3rSpEkmALjnnnvO7ty5MxAAvvrqq4DMzEz9li1bggHAaDSqMzIytElJSdUNxZCXl+dlX38GANXV1bRz587AFStW5AUHB1sHDx5csWnTpoDbbrutrLGi2U0V027I/v37j7Wkv6t0797dXFRU1GzS5UiSqXbCYLQlUz18Zc2UEKLlPJ1MtSd+fn719WUWLVrUc8yYMcYdO3acOHbsmPe4ceP62T/z9vau/8derVZzXV0dhYeHW44cOZLxySefBKxcuTL8gw8+CPnoo49ynL23Xq+/oLaN4z1UKhXsi+LVajUsFkuDIzuNnaNSqWA2my/pcW9mpuXLl5+aPn36BcnhsWPHGkwa9Hq9taampn72atOmTQFGo1E9YMCAJACoqqpSabVa62233VYWGhpqLiwsvOA6FRUV6rCwMMvAgQOrNm/eHOxMjMOGDetXUVGhvrh92bJleVOnTr1gSjEiIqL2119/9e7Tp09dXV0dTCaTOiIiwuzYp1evXheMROXn518wUtWYqqoq0mq1LapRJNN87YR9ZEoWoAshLoUkUw0rLy9XR0VF1QLAG2+8EdZc/8LCQo3FYsGcOXPOL1261HD48OHfrAMbO3asadu2bUFGo1FVXl6u2rZtW/DYsWObXT/UVkJDQy0BAQGWL774wg8A3nrrrdCRI0eawsLCLP7+/pbt27f7AcDq1atD7Odcd911ZStWrAi3T80dOnTIp7y8vNEcYODAgTWOicj69etDXn755VyDwXDYYDAczsnJOfzdd98FGI1G1TXXXGPavn174Llz51QAsGbNmqCEhIRKjUaDG2+80VhbW0svvfRS/c9+7969Onvsjvbv33/M/gCB4+viRAoAJk+efP7tt98OBYB33nkneOTIkUbH9VIAMGbMmIqcnBxtZmamd3V1NW3atClk+vTp55v7+R47dkyblJTUoj9Mkky1EwaTASHaEOi92nT9phCii5BkqmGLFi0qeu6556L69++faDabm+2fk5PjNWrUqH4JCQmJd9xxx2XPP/98/sV9Ro0aVfn73//+7NChQ/sPGzas/x133FHS2IJmV3nnnXd+XbRoUVR8fHzioUOHdMuWLSsAgLfeeivnD3/4Q3RCQkIiM9ePYj3yyCNnEhISqgcOHNg/Li4u6b777otxnPK8WEBAgDU6OrrmyJEjPkajUbV79+5A+3Sj/fOUlBTThg0bAi+//PKq++677/SIESMSEhISEletWhX+9ttv5wC20bQtW7ac2LlzZ0CvXr0G9O3bN2nRokU9e/bs2apHLR9++OEz586d00RHRw949dVXu7/00kv5gO3Xb8yYMX0B21qq5cuXn5o4cWJ8XFxc0tSpU0tTUlKqAeCFF17oFhERMai4uNg7OTk5cdasWTH2a+/evdt/6tSpZS2Jh5i71k4AKSkpnJaW5ukwfuO+L+9DZV0l3p/8vqdDEUJ0QPd+eS+qzdV47/r3XHJ9ItrPzCmObQcPHsxJTk5u8okw0XGtXbs2KC0tTZ+amtqimksdWV5enmbmzJmX/fDDD1kXf3bw4MGw5OTk2IbOk5GpdkJqTAkhWkNGpkRbu/POO8/HxsY2u8aoMzl58qT38uXLW/wwhSxAbwcsVgsKKwoxPma8p0MRQnRQkkwJV1i4cGGXGnkcM2aMU2UcLiYjU+1ASVUJzFYzevpLjSkhxKXRa/SoNjf4lLsQwsUkmWoH8o229Y1SsFMIcalkZEoIz5Fkqh2wl0WQZEoIcansyVRXe6hIiPZAkql2oMBUAAIh0rfBnQWEEKJZOo0OFragztp2m/t2BGq1elhCQkJiXFxc0rhx4/qeOXPmN0UfL0VqamronXfeGd0W1xKdnyRT7UC+KR/d9N3grW5R9XohhKin1WgBoMtN9fn4+FgzMzMzjh8/nh4UFGR+8cUXwz0dk+h6JJlqBwwmg0zxCSFaRafRAeh6yZSjESNGVNirdu/atUs/ePDghP79+ycOGTIk4eDBgz6AbcRp/PjxfUaPHh0XExMzYN68efUbor7yyiuhsbGxAwYOHNh/z5499RW6jx075j1ixIj4+Pj4xJEjR8YfP37cGwCmT58eO3v27Ojk5OSEqKiogVu3bvWfMWNG7GWXXZY0ffr0WDd/+8KDpDRCO2AwGfC7iN95OgwhRAdmT6YqzZf0ZHfrbX6oF05ntO0WDt0SKzH1Nadq/pjNZuzatct/7ty5ZwAgOTm5et++fZleXl7YvHmz/+OPPx61ffv2EwCQkZGhP3jwYIZOp7P27dt3wGOPPVbs5eWFZcuW9di/f//RkJAQyxVXXNFvwIABlQAwf/786NmzZ5/9v//7v7Mvv/xy6Pz583t99dVXJwCgrKxMc+DAgcx169YF3XrrrX137tyZOWzYsKpBgwb137Nnj+6KK67outltFyLJlIfVWepwuvK0lEUQQrSKPZnqauURampqVAkJCYnFxcVeffr0qZ46dWo5AJSWlqpnzZrVOycnR0tE7Lh1yqhRo8pDQ0MtANC3b9/qEydO+Jw+fVozYsQIY48ePcwAMG3atNKsrCwtABw4cMD3888/PwEA8+fPL/3rX/9aP5o1efLk8yqVCkOHDq0MDQ2tGz58eBUAxMfHV504ccJHkqmuodlkioj0AB4FEM3M9xFRHIB+zLzV5dF1AUUVRbCyVab5hBCt4vFpPidHkNqafc2U0WhUXX311XHLli3r9vTTT59etGhRzzFjxhh37Nhx4tixY97jxo3rZz/H29u7/pFHtVrNTe1R1xytVsvKdS64rkqlgtlsvuTrio7FmTVT7wCoATBSOTYAeKG5k4jobSI6TURHHNqeIyIDEf2ivK53+OxJIsomomNENMGhfaLSlk1ETzi09yaivUr7B0TUIVdv55ukxpQQovU8nkx5mL+/vzU1NfXU66+/HlFXV4fy8nJ1VFRULQC88cYbYc2df9VVV1Xs3bvXv6ioSF1TU0OffPJJsP2zIUOGVLz55pvByrVCUlJSTK77TkRH5Ewy1YeZ/wGgDgCYuRKAM9n2agATG2j/FzMPVl7bAICIEgHcCiBJOed1IlITkRrAawAmAUgEcJvSFwD+rlyrL4BzAOY6EVO7IzWmhBBtoasnUwBw5ZVXViUkJFStWrUqZNGiRUXPPfdcVP/+/RPNZnOz58bExNQtWrSoYMSIEf1TUlIS4uPj6+dLV65ceerdd98Ni4+PT1y/fn3o66+/7pFRONF+UXMF3ohoD4BrAHzPzEOJqA+A9cw8vNmLE8UC2MrMA5Tj5wCYmPmli/o9CQDMvFQ53g7gOeXj55h5gmM/AMsAlADozsxmIhrp2K8pKSkpnJaW1lw3t0n9ORXvHHkHabenQa1qk/IoQogu6FT5KUz+ZDIWj1qMKX2mtPn1iWg/M6c4th08eDAnOTm5S+3dJrqugwcPhiUnJ8c29JkzI1PPAvgCQC8ieh/A1wAeb0U8C4jokDINaB9G7QnAMdPPV9oaaw8FcJ6ZzRe1N4iI7ieiNCJKKykpaUXobS/flI/uvt0lkRJCtEr9yFRd1x2ZEsJTmk2mmHkHgGkA5gBYDyCFmb+5xPutANAHwGAAhQCWX+J1WoSZVzFzCjOnhIe3r3puUmNKCNEWZJpPCM9xtmhnTwBqAN4AriKiaZdyM2YuZmYLM1sB/D8A9qlCA4BeDl2jlLbG2s8CCCIizUXtHY7BaJCyCEKIVqtPpiySTAnhbs6URngbwCAA6QCsSjMD2NTSmxFRJDMXKoc3A7A/6bcFwDoi+ieAHgDiAPwE20L3OCLqDVuydCuA3zMzE9EuALcA2ADgLgCftjQeT6s2V+Ns9VkZmRJCtJpapYa3yltGpoTwAGeKdo5g5sTmu12IiNYDuBpAGBHlw7b26moiGgxbMpYD4AEAYOZ0IvoQQAYAM4CHmNmiXGcBgO2wjYy9zczpyi0WAdhARC8AOADgrZbG6GkFpgIA8iSfEKJt6Lx0smZKCA9wJpn6gYgSmTmjJRdm5tsaaG404WHmxQAWN9C+DcC2BtpP4n/ThB2S1JgSQrQlnUYnI1NCeIAza6bWwpZQHVOewjtMRIdcHVhXIDWmhBBtqSsmU2q1elhCQkJiXFxc0qRJky4zGo3OrgUGADzwwANRffv2TXrggQeimu99oSeeeKJ7S88RnZMzv+neAnAHbMU0bwRwg/JVtFKBqQA+ah+E6ZotziuEEM3SqrVdLpmybydz/PjxdC8vL16+fLlTj2zX1dUBANatWxeWmZmZ/sYbb+S39N6pqamRLT1HdE7OTPOVMPMWl0fSBRlMBvTw6wEi2b5JCNF6XXFkytGoUaNMhw4d0pWXl6vmzp0bnZmZqTObzfTnP/+54Pbbbz+fmpoaunnz5uDKykqVxWIhf39/S2VlpXrAgAGJjz76aOHkyZONd999d4zBYPAGgH/+85+nxo8fX1FWVqaaO3du9KFDh/QA8NRTTxX89NNPvvZNluPj46u2bNnyq2e/e+FJziRTB4hoHYD/wLZHHwCAmVv8NJ+4UL4xHz38eng6DCFEJ6Hz0qGsuswj937m+2d6ZZ/L1rflNfsG963825V/c2rrlrq6Omzfvj1g/Pjx5U899VTk2LFjyz/66KOcM2fOqFNSUvpPmTKlHADS09P1hw4dSo+IiLAAgF6vH5KZmZkBADfeeGPvhQsXFk+YMMF0/Phx7wkTJsSdPHky/YknnogMCAiwZGVlZQBASUmJes6cOedXr17dzX6u6NqcSaZ0sCVR4x3aLqk0griQwWTAoPBBng5DCNFJ6DV6FJmLPB2GW9lHhwDg8ssvNz788MNnUlJSErZv3x6UmpraXelD2dnZ3gAwevTocnsidbHvv/8+4Pjx4zr7sclkUpeVlal2794dsGHDhpP29vDw8AbPF11Xs8kUM9/tjkC6GmOtEeW15bL4XAjRZjw5zefsCFJbs6+ZcmxjZmzcuDE7OTm5xrH9u+++89Xr9VY0gpnx888/H9Xr9U1vWivERZpdgE5EUUT0CRGdVl4fE1GLn3oQF5IaU0KIttbV10zZjR07tnz58uURVqstb/r+++91zZwCABg1alT50qVLu9mP9+zZowOAMWPGlP/rX/+qby8pKVEDgEaj4ZqaGln0Kpx6mu8d2CqU91Be/1HaRCtIjSkhRFuTZMpm2bJlBWazmRISEhL79u2b9PTTTzv1F+2qVavyfv75Z9/4+PjEPn36JP373/8OB4ClS5cWnj9/Xh0XF5fUr1+/xG3btvkDwOzZs0v69++fOGXKlN6u/H5E++fMmqlwZnZMnlYT0R9dFVBXISNTQoi2ptVoUW2phpWtUFGLyi11WJWVlQcubvPz8+N169blXtz+hz/84Sxse7s2eH5kZKT5s88+O3nxeYGBgdZNmzblXNy+YsUK+x6yootz5k/bWSK6nYjUyut2XPSbUbScwWSAr5cvAn0CPR2KEKKTsG92XG2u9nAkQnQtziRT9wCYCaAIQCFsmwvLovRWMhilxpQQom3ZkymZ6hPCvZx5mi8XwBQ3xNKl5JvyEeUv6/iFEG1HkikhPMOZp/nWEFGQw3EwEb3t2rA6N2ZGgakAUX6STAkh2o4kU0J4hjPTfIOY+bz9gJnPARjiupA6v/M151FprpTF50KINiXJlBCe4UwypSKiYPsBEYXAuacARSMMJtvDH7KVjBCiLUkyJYRnOJMULQfwAxF9pBzPALDEdSF1flJjSgjhCpJMCeEZzY5MMfNaANMAFCuvaUqbuET2GlOyAF0I0Za6WjJVVFSkTkhISExISEgMCwtL7tat2yD7cXV1dbt4VHrr1q3+Y8eO7dvaPo5MJhP97ne/62c2m+vbnn/++W4+Pj5Dz549q7a3paamht55553RjucOHz683+7du/UAUFZWpvr9738f06tXrwFJSUn9hw8f3m/nzp2+Tn9zDbBarZgzZ06v6OjoAfHx8Ynfffddg5tff/vtt/r4+PjE6OjoAXPmzOllr1b/wAMPRPXu3TspPj4+8brrrutz5swZNQD89NNPuunTp8e2JjZXcmYB+rvMnMHM/1ZeGUT0rjuC66wMRgOCfILg69Wq37NCCHGBrpZMde/e3ZKZmZmRmZmZceedd5bMmzev2H6s1Wo77f56r776atiUKVPOaTT/m1zauHFjyIABAyree++9oCZOvcDs2bNjg4ODzTk5OUfS09OPrl279tfTp0+3ahnPRx99FHjy5EltTk7OkRUrVuQ++OCD0Q31e/DBB2NWrFiRm5OTc+TkyZPajRs3BgDAhAkTyrOystKzsrIy+vbtW/3MM890B4Dhw4dXFRYWeh8/fty7NfG5ijM/tCTHAyJSAxjmmnC6BoPJIOulhBBtzpPJVMFTf+5Vc/x4g6MQl8onLq6yx5LFTm+gbLVakZSU1D89Pf3oDz/8oLviiisSs7KyDsfFxdX26tVrQEZGRkZBQYHmrrvuii0tLdWEhoaa165dmxMXF1freJ2FCxf2yMnJ8c7NzfUpLCz0Xrp0ad4PP/zgt3PnzoCIiIi6r776KtvHx4c//fRT/yeeeKKXxWJBcnJy5dq1a3N1Oh1v3Lgx4E9/+lMvnU5nHT58uMl+3fLyctXcuXOjMzMzdWazmf785z8X3H777ecd7/3ZZ5/5Pfroo9EAQETYs2dPZnBw8AWbM3/44YehGzZsqK/Unp6e7lNZWal+5ZVXcpcsWRL58MMPN1tYOz093efAgQO+mzdvPqlW2wazEhISahMSEmqbObVJn376adDs2bPPqlQqXHPNNRXl5eWa3Nxcr5iYmDp7n9zcXC+TyaS65pprKgBg9uzZZzdv3hw8c+bM8mnTppXb+40cObJi48aN9Wu2J02adH7NmjXBL7zwQnFrYnSFRkemiOhJIjICGERE5URkVI5PA/jUbRF2QgaTQdZLCSHaXFcbmbqYSqVCTU2NqrS0VLVr1y6/pKSkyq+++sovKyvLOzQ01Ozv72+dP39+9OzZs89mZWVlzJo16+z8+fN7NXSt3Nxcnz179mR9/PHH2fPmzes9bty48qysrAytVmv98MMPAysrK+mBBx7o/cEHH5zIysrKMJvNePHFF8MrKytpwYIFsVu2bMk+cuTI0dOnT3vZr/nUU09Fjh07tvzw4cNHv/3222NPP/10VHl5+QX/Di9fvrx7ampqbmZmZsaPP/6Y6efnd0EiVV1dTXl5eT79+vWrT3rWrl0bfPPNN5dOnDjR9Ouvv2rz8vKaHSj55ZdftImJiZWOo1uNmTx58mX26VPH17///e/Qi/sWFhZ6xcbG1scWGRlZm5ub6+XYJzc31ysyMrI+uYqJiaktLCy8oA8ArF69OmzixIll9uPLL7+8Ys+ePf7NBuwBjf4UmXkpgKVEtJSZn2zphZVaVDcAOM3MA5S2FwHcCKAWwAkAdzPzeSKKBXAUwDHl9B+ZeZ5yzjAAqwHoAGwD8DAzs/JU4QcAYgHkAJiplG1o16xsRYGpAGN7jfV0KEKITsZH7QMCeSSZaskIkiulpKSYvvrqK7/vvvvO//HHHy/84osvApkZI0aMMFb/hh8AACAASURBVAHAgQMHfD///PMTADB//vzSv/71rw0uXr322mvLfHx8ePjw4VUWi4VuueWWcgBISkqq+vXXX70PHjyojYqKqhk0aFANAMyZM+fsa6+91u3aa681RkVF1QwcOLAGsI26vPnmm+EA8M033wRs3749KDU1tTsA1NTUUHZ29gXTViNGjDA99thjvWbOnFl62223nevTp88FyVRRUZHG39/f7Ni2adOm0E2bNmWr1Wpcf/315959993gp556qqSxHTZauvNGQ/sVutqiRYu6q9VqnjdvXqm9LTIy0lxcXPybpKs9cGaa73MiuuriRmbe3cx5qwH8G4DjYvUdAJ5kZjMR/R3AkwAWKZ+dYObBDVxnBYD7AOyFLZmaCOBzAE8A+JqZlxHRE8rxogbOb1fOVJ1BrbVWRqaEEG2OiKDT6LrsyBQAjB492rh7927//Px879mzZ59fvnx5dwB8ww03lDV7sgMfHx8GALVaDY1GwyqVbQBJpVLBbDZf0uJ2ZsbGjRuzk5OTaxzbCwoK6hOEJUuWFE2dOrXs008/DRw9enTCZ599dnzIkCH1my36+vpaa2tr60ezfvrpJ11ubq7PxIkT4wGgrq6OoqKiap966qmSsLAw8/nz59WO9zp//rw6IiLCHBISYjl69KjebDajudGpyZMnX3bixAntxe0LFiwoXrBgwQVTipGRkXU5OTn1CWJhYaG34xQfAMTExNQ5jkTl5uZ6O45Upaamhm7fvj3o22+/zbL/3AGgqqpKpdVqL0gu2wtn6kz9yeH1DID/AHiuuZOUZKv0orYvmdmeUf8IoMnH2YgoEkAAM//IzAxbYjZV+fgmAGuU92sc2ts1qTElhHAlrUbbpZOpa6+91vTxxx+H9O7du0atViMoKMi8a9euwOuuu84EAEOGDKl48803gwHgjTfeCElJSTE1fcWGJScnVxsMBu8jR474AMDatWtDR48ebRw8eHC1wWDwTk9P9wGADRs2hNjPGTt2bPny5csj7E+uff/997qLr5uenu4zfPjwqsWLFxcNGjSo4siRIxckMeHh4RaLxUKVlZWk3Dfk0UcfLTAYDIcNBsPh06dPHyouLvbKysryHjVqVMX+/fv9Tp06pQGA3bt362tra1V9+vSpTUpKqhk0aFDFwoULe9jjOXbsmPeGDRsCL47ps88+O2lf2O/4ujiRAoApU6acf//990OtViu+/vprX39/f0tDyZSfn5/166+/9rVarXj//fdDb7rppvMAsHHjxoBXXnml+7Zt27L9/f0vSJwyMjJ8+vXr1y5/cztTGuFGh9d1AAYAaIvptHtgG2Gy601EB4jov0Q0WmnrCSDfoU++0gYAEcxcqLwvAhDR2I2I6H4iSiOitJKSkjYI/dLlG5UaU/4yMiWEaHtdfWSqX79+tcxMo0ePNgLAyJEjTf7+/pbw8HALAKxcufLUu+++GxYfH5+4fv360Ndff/2Spif1ej2vXLkyZ8aMGX3i4+MTVSoVHnvssRK9Xs+vvvpq7g033NA3MTGxf1hYWP2U3LJlywrMZjMlJCQk9u3bN+npp5/+zT8E//jHP7rFxcUlxcfHJ3p5efEtt9zymxG1q666quzLL7/0A4DNmzeHzJw584JF7JMmTTq3Zs2akF69epn//ve/502cODEuISEh8ZFHHun13nvv1S84f++993JOnz7tFRMTMyAuLi7pjjvu6O04QnQpZs6cWRYTE1MTExMzYP78+TGvvfZarv2zhISERPv71157LXfevHmxMTExA2JjY2tmzJhRBgALFy6MrqioUI8bNy4+ISEh8fe//33904A7d+4MaOkIo7uQbcCnBSfYJlvTmTnRib6xALba10w5tP8ZQApsNauYiHwA+DHzWWWN1GbYniKMB7CMma9VzhsNYBEz30BE55nZcc/Ac8wcjGakpKRwWlqas99um3vj4Bv49y//xr7Z+6DV/GbUVAghWuXmT29GtH80Xhn3Sptel4j2M3OKY9vBgwdzkpOTz7TpjUSzvvvuO/1LL70UsXnz5l89HYu7VFVV0YgRI/qlpaVlenl5ZtnUwYMHw5KTk2Mb+qzZNVNE9CoAe8alAjAYwM+XGgwRzYFtYfo1ytQdmLkGQI3yfj8RnYAtkTLgwqnAKKUNAIqJKJKZC5XpwNOXGpM7GUwGhOnCJJESQriEXqNHtaW6+Y6iwxo1alRlWlpauTPrnTqL7Oxs78WLFxs8lUg1x5lfBcdhHDOA9cz8/aXcjIgmAngcwBhmrnRoDwdQyswWIroMQByAk8xcqpRlGAHbAvQ7AbyqnLYFwF0AlilfO0S5BimLIIRwJTdP81mtViupVKpOWyCzvfrjH//YbC2pzmTgwIH1T0h6gtVqJQCNLn53Zs3UGgDrAewHcBDAT87cmIjWA/gBQD8iyieiubA93ecPYAcR/UJEK5XuVwE4RES/ANgIYB4z2xevPwjgTQDZsJVTsK+zWgbgOiI6DuBa5bjdk2RKCOFKbk6mjpSUlAQq/9AI0SlZrVYqKSkJBHCksT7OTPNdDdvTcjkACEAvIrqrudIIzHxbA81vNdL3YwAfN/JZGmyL3i9uPwvgmqZiaG/MVjOKKopwfe/rPR2KEKKTcmcyZTab7y0qKnqzqKhoAJx7OlyIjsgK4IjZbL63sQ7OTPMtBzCemY8BABHFwzZSJVvKtFBxZTEsbJGRKSGEy2g1WlTVuSeZGjZs2GkAU9xyMyHaMWf+J+FlT6QAgJmzALTPFWDtnMFoWzsvZRGEEK7S1UsjCOEJTi1AJ6I3AbynHM/GhYvShZPsBTtlZEoI4SqSTAnhfs4kU/MBPATgD8rxtwBed1lEnZjBZICKVOju293ToQghOimdRgczm1FnrYOXSiYRhHCHZpMppQbUP5WXaAWDyYAIfYT8BSeEcBmdxrZDSZW5Cl7e8neNEO4gT1+4kZRFEEK4ms5LSabctAhdCCHJlFtJMiWEcDXHkSkhhHtIMuUmtZZalFSWyJN8QgiX0qklmRLC3RpdM0VE/8H/9uT7DWaW2iItUGAqAINlZEoI4VIyMiWE+zW1AP0lt0XRBUhZBCGEO9SvmZJkSgi3aTSZYub/ujOQzk6SKSGEO9hHpqrN1R6ORIiuw5m9+eIALAWQCEBrb2fmy1wYV6djMBmgUWnQTd/N06EIIToxezJVaa70cCRCdB3OLEB/B8AKAGYAYwGsxf+qoQsnGUwG9PDtARXJmn8hhOvImikh3M+Zf9l1zPw1AGLmXGZ+DsBk14bV+RiMUhZBCOF6kkwJ4X7OJFM1RKQCcJyIFhDRzQD8XBxXp1NQUSBlEYQQLqfV2FZjSDIlhPs4k0w9DEAP2958wwDcDuBOVwbV2VTWVaK0ulRGpoQQLuel8oJGpZFkSgg3ciaZimVmEzPnM/PdzDwdQLSrA+tM5Ek+IYQ76TQ6SaaEcCNnkqknnWwTjZBkSgjhTjqNTkojCOFGTVVAnwTgegA9iSjV4aMA2J7sE06SZEoI4U56jV5GpoRwo6ZGpgoApAGoBrDf4bUFwARnLk5EbxPRaSI64tAWQkQ7iOi48jVYaSciSiWibCI6RERDHc65S+l/nIjucmgfRkSHlXNSiYha8s27i8FkgE6jQ4g2xNOhCCG6AJnmE8K9Gk2mmPkgM68B0JeZ1zi8NjHzOSevvxrAxIvangDwNTPHAfhaOQaASQDilNf9sNW2AhGFAHgWwOUAhgN41p6AKX3uczjv4nu1CwajrcZUO831hBCdjCRTQriXM2umhisjSFlEdJKIfiWik85cnJl3Ayi9qPkmAGuU92sATHVoX8s2PwIIIqJI2EbBdjBzqZLE7QAwUfksgJl/ZGaGrZjoVLRDUhZBCOFOWo1Wkikh3KjZ7WQAvAXgEdim+CxtcM8IZi5U3hcBiFDe9wSQ59AvX2lrqj2/gfZ2x2A0YEi3IZ4OQwjRReg0OpyuPO3pMIToMpxJpsqY+XNX3JyZmYjYFdd2RET3wzZ1iOho91Z1KKspg7HOKIvPhRBuI9N8QriXM9N8u4joRSIaSURD7a9W3LNYmaKD8tX+3ycDgF4O/aKUtqbaoxpo/w1mXsXMKcycEh4e3orQW06e5BNCuJskU0K4lzMjU5crX1Mc2hjAuEu85xYAdwFYpnz91KF9ARFtUO5ZxsyFRLQdwBKHRefjATzJzKVEVE5EIwDsha0q+6uXGJPLFJgKAEgyJYRwH0mmhHCvZpMpZh57qRcnovUArgYQRkT5sD2VtwzAh0Q0F0AugJlK922w1bXKBlAJ4G7l/qVE9DcA+5R+zzOzfVH7g7A9MagD8LnyalfqR6ZkAboQwk3sRTuZWZ4iFsINmk2miCgCwBIAPZh5EhElAhjJzG81dy4z39bIR9c00JcBPNTIdd4G8HYD7WkABjQXhyflG/Ph7+WPAO8AT4cihOgidBodGIxqSzV0Gp2nwxGi03NmzdRqANsB9FCOswD80VUBdTYGk0FGpYQQbqXVaAFApvqEcBNnkqkwZv4QgBUAmNmMtimR0CUUmApkvZQQwq30Gj0ASaaEcBdnkqkKIgqFbdE5lAXfZS6NqpNgZlvBTkmmhBBuZJ/aq6qTZEoId3Dmab6FsD1p14eIvgcQDuAWl0bVSZytPosqcxV6+PVovrMQQrSR+mRKRqaEcAtnnub7mYjGAOgHgAAcY+Y6l0fWCdif5Ivyi2qmpxBCtB17MlVtqfZwJEJ0Dc48zaeGrWRBrNJ/PBGBmf/p4tg6PKkxJYTwBBmZEsK9nJnm+w+AagCHoSxCF86xj0zJNJ8Qwp3syVSludLDkQjRNTiTTEUx8yCXR9IJ5RvzEaINgd5L7+lQhBBdSH1pBFmALoRbOPM03+dENN7lkXRCBpNBpviEEG4n03xCuJczydSPAD4hoiplLzwjEZW7OrDOQGpMCSE8QZIpIdzLmWTqnwBGAtAzcwAz+zOz7I3SDIvVgoKKAlkvJYRwO6mALoR7OZNM5QE4ouydJ5xUUlUCs9UsI1NCCLdTkap+s2MhhOs5swD9JIBviOhzADX2RimN0LR8Yz4AqTElhPAMnUYnI1NCuIkzydSvystbeQknFFQoNaZkk2MhhAdIMiWE+zhTAf2vAEBEemaWoiVOMhgNIBAifSM9HYoQoguSZEoI92l2zRQRjSSiDACZynEyEb3u8sg6uHxTPsL14fBWy2CeEML9tGqtJFNCuIkzC9BfBjABwFkAYOaDAK5yZVCdQYGpQNZLCSE8RuclI1NCuIszyRSYOe+iJosLYulUpGCnEMKTZJpPCPdxZgF6HhFdAYCJyAvAwwCOujasjq3OWofiymKpMSWE8BhJpoRwH2dGpuYBeAhATwAGAIOVY9GIIlMRrGyVkSkhhMdIMiWE+zSZTBGRGsArzDybmSOYuRsz387MZy/1hkTUj4h+cXiVE9Efieg5IjI4tF/vcM6TRJRNRMeIaIJD+0SlLZuInrjUmNqaocIAAIjylzVTQgjPkGRKCPdpcpqPmS1EFENE3sxc2xY3ZOZjsI1u2ZM1A4BPANwN4F/M/JJjfyJKBHArgCQAPQB8RUTxysevAbgOQD6AfUS0hZkz2iLO1jAYbcmUTPMJITxFkikh3MfZCujfE9EWABX2xjaqgH4NgBPMnEtEjfW5CcAGZq4B8CsRZQMYrnyWzcwnAYCINih9PZ9MmQxQkxoR+ghPhyKE6KK0Gi3qrHUwW83QqJz5q14IcamcWTN1AsBWpa+/w6st3ApgvcPxAiI6RERvE1Gw0tYTtv0B7fKVtsbaf4OI7ieiNCJKKykpaaPQG5dvykd33+7yF5gQwmP0Gj0A2exYCHdwugJ6WyMibwBTADypNK0A8DcArHxdDuCetrgXM68CsAoAUlJSXL5hs9SYEkJ4mk6jA2BLpvy92+r/v0KIhjhTAX0HEQU5HAcT0fY2uPckAD8zczEAMHMxM1uY2Qrg/+F/U3kGAL0czotS2hpr9ziDySDrpYQQHmVPpqrN1R6ORIjOz5lpvnBmPm8/YOZzALq1wb1vg8MUHxE5bmJ3M4AjyvstAG4lIh8i6g0gDsBPAPYBiCOi3soo161KX4+qNlfjTNUZKYsghPAox5EpIYRrObOox0JE0cx8CgCIKAa2qbhLRkS+sD2F94BD8z+IaLBy7Rz7Z8ycTkQfwraw3AzgIWa2KNdZAGA7ADWAt5k5vTVxtYUCUwEAoKe/JFNCCM+RZEoI93EmmfozgO+I6L8ACMBoAPe35qbMXAEg9KK2O5rovxjA4gbatwHY1ppY2prBpNSYkjVTQggPsidTleZKD0ciROfnzAL0L4hoKIARStMfmfmMa8PquOzJlKyZEkJ4klajBSAjU0K4g7PP7vsAKFX6JxIRmHm368LquAwmA7xV3gjThXk6FCFEFybTfEK4T7PJFBH9HcAsAOkArEozA5BkqgH2J/lU5MzafiGEcA1JpoRwH2dGpqYC6KdUIBfNMJgMsvhcCOFxUhpBCPdxZvjkJAAvVwfSWRhMBvT0lWRKCOFZUgFdCPdxZmSqEsAvRPQ1gPrRKWb+g8ui6qBMtSaU1ZTJyJQQwuO81F7QkEaSKSHcwJlkagvaQTHMjsD+JJ8U7BRCtAc6jU6SKSHcwJnSCGvcEUhnIDWmhBDtiVajlWRKCDdoNJkiog+ZeSYRHUYDFc+ZeZBLI+uApMaUEKI90Wl0qKqTZEoIV2tqZOph5esN7gikMzCYDNBr9AjyCWq+sxBCuJhM8wnhHo0mU8xcqHzNdV84HZu9LAIReToUIYSwJVMWSaaEcDWpLNmGpCyCEKI9kZEpIdxDkqk2wswwGKVgpxCi/ZBkSgj3aDSZUupK2beTEc04X3MeleZKKYsghGg3dF6yAF0Id2hqAXokEV0BYAoRbQBwwUIgZv7ZpZF1MAWmAgBSY0oI0X5o1VIaQQh3aCqZ+guAZwBEAfjnRZ8xgHGuCqojyjflA5BkSgjRfsg0nxDu0dTTfBsBbCSiZ5j5b26MqUOS6udCiPbGnkwxszxlLIQLOVMB/W9ENAXAVUrTN8y81bVhdTwGowGBPoHw8/bzdChCCAEA0HvpwWDUWGqg1Wg9HY4QnVazT/MR0VLYCnhmKK+HiWiJqwPraAwVBhmVEkK0KzqNDgBQba72cCRCdG7ObHQ8GcBgZrYCABGtAXAAwFOuDKyjMRgNiAuO83QYQghRz55MVZmrEATZmUEIV3G2zpTjn8LAtrgxEeUQ0WEi+oWI0pS2ECLaQUTHla/BSjsRUSoRZRPRISIa6nCdu5T+x4norraIraWsbEWBqUBGpoQQ7YpjMiWEcB1nkqmlAA4Q0WplVGo/gMVtdP+xzDyYmVOU4ycAfM3McQC+Vo4BYBKAOOV1P4AVgC35AvAsgMsBDAfwrD0Bc6czVWdQa62VZEoI0a5o1bZ1UpJMCeFazSZTzLwewAgAmwB8DGAkM3/gonhuArBGeb8GwFSH9rVs8yOAICKKBDABwA5mLmXmcwB2AJjootgaJTWmhBDtkc7LNjJVaa70cCRCdG7OrJmyb3q8pY3vzQC+JCIG8AYzrwIQYd9gGUARgAjlfU8AeQ7n5ittjbVfgIjuh21EC9HR0W35PdhuKjWmhBDtkEzzCeEeTiVTLjKKmQ1E1A3ADiLKdPyQmVlJtFpNSdRWAUBKSkqbXNORwWirMdXDr0dbX1oIIS6ZJFNCuIfHNjpmZoPy9TSAT2Bb81SsTN9B+Xpa6W4A0Mvh9CilrbF2tzKYDAjThUkdFyFEuyKlEYRwjyaTKSJSXzxi1BaIyJeI/O3vAYwHcAS2qUT7E3l3AfhUeb8FwJ3KU30jAJQp04HbAYwnomBl4fl4pc2tCkwFMiolhGh3ZGRKCPdocpqPmS1EdIyIopn5VBveNwLAJ8r2BhoA65j5CyLaB+BDIpoLIBfATKX/NgDXA8gGUAngbiW+UiL6G4B9Sr/nmbm0DeN0Sr4pH4PCB7n7tkII0SS9Rg9AkikhXM2ZNVPBANKJ6CcAFfZGZp5yqTdl5pMAkhtoPwvgmgbaGcBDjVzrbQBvX2osrWW2mlFUUYTre1/vqRCEEKJBPmofAJJMCeFqziRTz7g8ig6suLIYFrbIk3xCiHZHrVLDR+0jyZQQLubMRsf/JaIYAHHM/BUR6QGoXR9ax2CvMSVrpoQQ7ZFOo5NkSggXc2aj4/sAbATwhtLUE8BmVwbVkeQbbTWmovyiPByJEEL8liRTQrieM6URHgJwJYByAGDm4wC6uTKojsRgMkBFKnT36+7pUIQQ4jckmRLC9ZxJpmqYudZ+QEQa2KqXC9im+SL0EfBSeXk6FCGE+A1JpoRwPWeSqf8S0VMAdER0HYCPAPzHtWF1HAaTQdZLCSHaLUmmhHA9Z5KpJwCUADgM4AHYaj497cqgOpJ8U748ySeEaLe0Gq0kU0K4mDNP81mJaA2AvbBN7x1T6j51ebWWWpRUlsjicyFEuyUjU0K4XrPJFBFNBrASwAkABKA3ET3AzJ+7Orj2rrCiEAxGT38ZmRJCtE+STAnhes4U7VwOYCwzZwMAEfUB8BmALp9MGYy2PZV7+MqaKSFE+yTJlBCu58yaKaM9kVKcBGB0UTwdSr5JqTHlL9N8Qoj2Sa/Ro9pc7ekwhOjUGh2ZIqJpyts0ItoG4EPY1kzNwP82Fu7SDCYDNCoNwnXhng5FCCEapNPoUGOpgcVqgVolm1cI4QpNTfPd6PC+GMAY5X0JAJ3LIupACkwF6OHbQ/6CEkK0WzqN7a/raks1fFW+Ho5GiM6p0WSKme92ZyAdkdSYEkK0d1qNFgBQZa6Cr5ckU0K4gjNP8/UG8H8AYh37M/MU14XVMRhMBoztNdbTYQghRKPsI1NVdVUypyCEizjzNN9mAG/BVvXc6tpwOo7KukqUVpfK4nMhRLtmT6YqzZUejkSIzsuZZKqamVNdHkkHU2AqACBlEYQQ7Vv9yJSURxDCZZxJpl4homcBfAmgxt7IzD+7LKoOwGCy1ZiSgp1CiPbMcQG6EMI1nEmmBgK4A8A4/G+aj5XjLsteY0r25RNCtGc6L4c1U0IIl3AmmZoB4DJmrnV1MB2JwWSAVq1FqDbU06EIIUSjZJpPCNdzpgL6EQBBbXVDIupFRLuIKIOI0onoYaX9OSIyENEvyut6h3OeJKJsIjpGRBMc2icqbdlE9ERbxeiMAlMBevj1ABG587ZCCNEiOrUkU0K4mjMjU0EAMoloHy5cM3WppRHMAB5l5p+JyB/AfiLaoXz2L2Z+ybEzESUCuBVAEoAeAL4ionjl49cAXAcgH8A+ItrCzBmXGFeLGEwGmeITQrR7MjIlhOs5k0w925Y3ZOZCAIXKeyMRHQXQVFZyE4ANzFwD4FciygYwXPksm5lPAgARbVD6uieZMhowOHywO24lhBCXrH7NlCRTQrhMs8kUM//XVTcnolgAQwDsBXAlgAVEdCeANNhGr87Blmj96HBaPv6XfOVd1H55I/e5H8D9ABAdHd3quMtqymCsM0qNKSFEu+et8oaKVJJMCeFCza6ZIiIjEZUrr2oishBReWtvTER+AD4G8EdmLgewAkAfAINhG7la3tp72DHzKmZOYeaU8PDWb0pcX2NKtpIRQrRzRASdRifJlBAu5MzIlL/9PdlWW98EYERrbkpEXrAlUu8z8yblPsUOn/8/AFuVQwOAXg6nRyltaKLdpeprTMmaKSFEByDJlBCu5czTfPXYZjOACc12boSSkL0F4Cgz/9OhPdKh282wPUUIAFsA3EpEPso+gXEAfgKwD0AcEfUmIm/YFqlvudS4WkKSKSFERyLJlBCu5cxGx9McDlUAUgC0ppTulbAVAT1MRL8obU8BuI2IBsNWEDQHwAMAwMzpRPQhbAvLzQAeYmaLEtsCANsBqAG8zczprYjLaQaTAf5e/gj0CXTH7YQQolW0Gq0kU0K4kDNP893o8N4MW6Jz06XekJm/A9BQcaZtTZyzGMDiBtq3NXWeqxhMBlkvJYToMGRkSgjXcmbN1N3uCKQjMRgNiAmI8XQYQgjhFEmmhHCtRpMpIvpLE+cxM//NBfG0e8yMgooCXNHzCk+HIoQQTtFpdDhXfc7TYQjRaTU1MlXRQJsvgLkAQgF0yWSqtLoUVeYqWXwuhOgwZGRKCNdqNJli5vo6T8q2Lw8DuBvABrRhDaiORp7kE0J0NHqNHtXm1jw3JIRoSpNrpogoBMBCALMBrAEwVKlK3mVJMiWE6GhkZEoI12pqzdSLAKYBWAVgIDOb3BZVOybJlBCio7EnU8wMW6k/IURbaqpo56MAegB4GkCBw5YyxrbYTqajMpgMCPYJht5L7+lQhBDCKVqNFha2oM5a5+lQhOiUmloz1aLq6F2FwWiQUSkhRIei0+gAAFXmKnirvT0cjRCdjzNFO4UDg8mA/qH9PR2GEKITYmYAts2JrVYrzlTUombDOpiWv/ibvtXde0Jb5Nx2pCkAPgRQOvmM7NwghAtIMtUCFqsFBRUFuDbmWk+HIjoRa1UVanNyoA4IgHHnLvhfew28Im1bVbLVChCBKytRnZUF3YABgMo2aExq9QXX4dpa1GRno+zTTxE4bTrUAf4wnz4NtlqhDgyCd68okJdXi+Pj2lrUGgwgL2+ofLzBdXUAEVR6PVT+/uCqKtQaDLCcPw9z8WnoU4aBvL2hDgoCiAAiWMvKYD53DuX/2QpNZHfUHD0KXXIy6goKUH00E3UFBfCOjYVu8GCAGcWLbRseaCIiYD59GuqwUBAIuiFDoA4JRtnmTxF44w3wvfJKWE0mQKMB19SiLu8ULOVGQEWwlhtRvs22QYJu8GCQRgPS62AuOYOqM2eB8+egqrNNe1l1vihLGgL/nOPQnClu9Gdxwc/F1w9UYUJpZAyM4T1Rl5uLOh8d4ktO4pM+o+GrAiZl/bfJaxwO7Y2TwT2QYCxEv+KTLfp1cTaRclT8e4QtDgAAEv1JREFUzQ70vqVPi88TQjSN7P8T6ipSUlI4LS3tks4tqijCdRuvwzMjnsHMfjPbODLRUVhra1H017+i6peDqD1xwtPhtDlNZHeYC4s8HUa9Or0vvCobKnvnvJ/D49G98ix6VJxto6jcK3xgOUITTbCqvWFW6VBbq0f+h7aF5D7xOqjGdUf1iVrUBPmjpnsIVFp/WE1W+K/YAeugPqjoHoQzWitSFr2IbqGXtkyBiPYzc0pbfl9CdBYyMtUC8iRf51T/H4q6Opx64AFU/vCjW+9P3l7g2jr4XTEMpj37m+yrH5yIyl8yGvysNK4/NKWlCDj7v5EVDvAHlRsBnRZV0EBX1fxDuebCIqj89LCaKgEA6pG/A4KDoPHVocZYhepDGdAWKKMil8VC3yMcqpAgnCUtqmssOHuuApfttY3IVMf3gzbrGCoDQ3DGqoHGasGmvmNQq9LgnNYfXhYLzmn9UKILhsZqBgDUqr1Qo/ZGpZe2yThjywoRWGsCMaNK4wOtpRa13hr4kBll/npoVWboUQ091UCPGuhRDV+qgc5ahb4BtairrUWAugaBqmpEqqtRYaqCb201SF8LPdVCVVsNr/NmBIVUQKOyghkAA6QCzDWEqjPeqDVqcO64L6JGlaL6vBfU3lbUlHuj5KA/ACByrAY+3fWwmL3hExkEdYAfSOsPVmtRe86KihNlOP1xGkInp6DuXBV0/XojZNoEwNsX8NYD3n6Al9723ssXarUGagA+APo/3+wvpa06oBDC5SSZagFJptyr+uhRQKWGT98+qM7MRM70Wxrsp//d71C5b1/9sf/48fCOiUFNdjaCZsxAbW4uSlevhrnYuekbZ/j0iUHkn+6Dtk/Mb6bbWsvKjFNlFmw8Womvfq1G5hnzhR1i2/R2zqsDoAUwfELjfbQAIgFMddgfPdGxA8MHddChBr6ohk5JdOIpH/+/vTsPkqM87zj+fWZ2Z/bQSmglJASSQNxXjJEAI5tgLAinK4BDUsQpmwKqCMQYG8ekILhiTKWIsE0SO1AhoAiQjcHchuKQsFDwUQaEsRACDAhxaY0E0gppV7szuzPz5I9+d9V7zGp2R3uM9vep6pp33u5+++1XPbWP3vft7lpC0GMZ6sjSWJUlkc90B0J1lu3eb1q6g9r6LJ5rpz6Ukaaj9HPJglelo0ClOgQtk+ux1B5RIFNdFwKaHWmLpatS9TSE/aakorya6igAaqiqYepOHj9goalqgCl9XuEuIpVGwdQgNLVEwdTeE/Ye5ZpUhuy6d2hZtpTOjRv55J57h+048UAKoGXZsu5064oVQyqz8bwz2OOsBaT3HVrg/Nib7Ty5tp0n1lbmU6cTFKgjEwUvIbipJcshEzvY3NJOXcjvHQDVWYYZ6Q46s5nuvImJLJOS0f5VhQzmhZLr4dUJCEFKd9CTasBSE7p7a7p7cYqlu3t3egVHiV0bBIvI+KVgahCaWpuYVjtNtxYHuU2boknTy5fTtnIlW352z7Aer3rGNKZ/7St0frSZ/NYWJp1yApvufoStTz3LjKsuofbIQ+h4bz3rr/8x1VMbqZ4+lc4NH9O5cRM1B89h6lfOpeFz0ZSPQkcn+ZZWqhr3KOshhtmcc+kTzax4N7urTrOHbx1bw8n7VZNK9lNHd6zQgeUyJPJZErl2LJ8hkeuVzrdj3Z+xbUOaXDu5jijY6d4vX6SXJwv0c/lHvTz1PQIWS00PwU88EBognarfETiFAMiq0tEkdhGRMUwT0AfhwqcuJO95lpyxZBfXamzr3LCB5iU/oXnx4rLKmXbJ37LHF08mUV87YABTyGSxdGrYntTc1lkgnTSSCcPdceDX72d58U8drNuS4/FYb9JBjVW81ZwrXlgJzjkwweWfIgpi8hkKne1U5TN8tK2dTVvbmFXXQWdHhr3THSTzGRL5TAh0MiRymVhQ1LVuR9AzqF4eS8Z6aGIBS6q+T69Nn/RA66rrQL08uz1NQBcpTj1Tg9DU2sS86fNGuxpDlt+2jTeP+8ywHqP2yIOZdcNVJCfUD7mMRE26T17BnY48HH3bBtpzw/EfgGguTz0ZZsbm5tR+kmV2IkM9WWot5MWHtWLpuVM6qbUOErlYQJTLYOs7YH3fI84uVpOqmj4Bi9VPLWEYq1g6lJFMqZdHRGQYKJgqUWehk41tG8fs5HMvFGhZvpyGBQuwZJLtv/sd71940S49Rt1RhzHrhqvoWL+B9IH7DrnnqOv9YLmCc+DNHw5q3yR5GmITkut6pMPcnhD0xCcs1/bZtud+tWRJWulBmieqdgQq3QFLQ2yS8s6GsYqkq+s0l0dEpMIomCrRhu0bKHhh2IKpjQtvpPnOO0lOnsyBzyxn6yOPsOF70b3PjRddxNTLLiXZ0EBnUxPbn3uOD6/9zpCPtf8dPyA1e2/aVr3G5nsfY8+L/4aag+ZgZnguh1UNfFnUHLTfgOuznQXO/Ol6trS09duDEw9oLk/2DHq602Fyc+9AKG2DG3LLJ2shVUcitaMnx1KNgx/G6pGegFVp3pyIiEQUTJWonMcieD7PH484svv7hAULaH3mmX63zW/ZwhtHz+2R17x48ZDnK6Vm7sWcRQtJpPr+8a8/6hDqD58NuQytH79HnWUpdLST72gn7Vm8s533NrVyz6rmfoOi+J1e8VvX68iy3Dy677sEnZ6kUF1HSz7FdmqY1jiZmvo9+r9ja6dDWrF0dR3JhF4xKSIiw6vigykzOx34EZAEFrn7wuE4TtdjEfZpKB5M5Vta2HTzzTTfNfAE9WKB1FC1XHYa7xXgC/s496/e3Od29U2Lr+iRt1e6k85se49engnhM8GOi8KIHml0TXgDyXZP00aadk+znRraSdPmaTYzkTbStBVqok/StHkN1bUTuOL0o9iaT1NdM4F2SzN9ypTYhOcoUKoOvTxpYOoubRkREZHhV9HBlJklgVuAvyCa4rvSzB519/4fEV2GptYmkpZket103J23P27l8dUbuHjunnzw2fkllTHp2NlsXfl+9/dpp86kZlqS+n0SkMvgne2Qy0BnBnLt4TODsZO5PFvu4DiArXBxMkkbNX2Cnk0+KcorpNnetiMQ6to2nu7arysoaiPNyX+2L//15XmkC86eycH19nQFSHq9qoiI7I4q+tEIZjYfuM7dTwvfrwFw938rts9QH42w5vDDSJZ+FzoAe8/fwsSZ7Viv+cQFN9pJRcFL6O3Zke4b0PTuCeratk8eaXJU8fvvnMK8f/1lj2MuOHQa1599BCfcuOMhlqcfsRfXn30E0yb2HY/rmiQuIgJ6NILIQCq6ZwrYB/gg9n090OfefzO7BLgEYPbsYjekD6yUQOqhcz5PS7KeNk/TTgh2cmm253oGRRlSRINoQ/P2DWeSTBj5gpNM9F/OuwvPGlR+bwqkRERESlPpwVRJ3P024DaIeqaGUsZhf3ydtj/8ga0PPcRe3/1uv3e8fasjz2Or/8Q/PbAagKevPJGZk+s47F+e6rHdZ+Y08vw7zX32b0hXseiCYzhkrwZaMjlmNdYNWKdigZSIiIiMHA3ziYjITmmYT6S4Sr9vfCVwkJnNMbMUcD7w6CjXSURERMaRih7mc/ecmV0OLCV6NMJid391lKslIiIi40hFB1MA7v4E8MRo10NERETGp0of5hMREREZVQqmRERERMqgYEpERESkDAqmRERERMqgYEpERESkDBX90M6hMLOPgfeGuPtUYNMurM7uSu1UGrVTadROOzcSbbSvu+85zMcQqUjjLpgqh5m9qCcA75zaqTRqp9KonXZObSQyujTMJyIiIlIGBVMiIiIiZVAwNTi3jXYFKoTaqTRqp9KonXZObSQyijRnSkRERKQM6pkSERERKYOCKREREZEyKJgqkZmdbmZvmNlaM7t6tOszEszsXTN7xcxWmdmLIa/RzJ42s7fC5+SQb2b249A+q81sbqycC8L2b5nZBbH8eaH8tWFfG/mzHDwzW2xmH5nZmljesLdLsWOMVUXa6TozawrX1CozOzO27ppwzm+Y2Wmx/H5/e2Y2x8yeD/k/N7NUyE+H72vD+v1G5owHz8xmmdkKM3vNzF41s2+EfF1PIpXE3bXsZAGSwNvA/kAKeBk4fLTrNQLn/S4wtVfe94GrQ/pq4MaQPhN4EjDgeOD5kN8IrAufk0N6clj3QtjWwr5njPY5l9guJwJzgTUj2S7FjjFWlyLtdB3w7X62PTz8rtLAnPB7Sw702wPuA84P6VuBy0L6H4BbQ/p84Oej3RYDtNEMYG5INwBvhrbQ9aRFSwUt6pkqzXHAWndf5+4dwL3A2aNcp9FyNnBXSN8FnBPLX+KR54A9zGwGcBrwtLs3u/sW4Gng9LBuors/5+4OLImVNaa5+6+A5l7ZI9EuxY4xJhVpp2LOBu5196y7vwOsJfrd9fvbC70rC4AHwv6927yrnR4ATh6rvZ7u/qG7vxTSLcDrwD7oehKpKAqmSrMP8EHs+/qQt7tzYJmZ/d7MLgl50939w5DeAEwP6WJtNFD++n7yK9VItEuxY1Say8MQ1eLY0NJg22kK8Im753rl9ygrrN8ath/TwnDk0cDz6HoSqSgKpmQgJ7j7XOAM4GtmdmJ8Zfifrp6t0ctItEsFt/1/AwcAnwY+BG4a3eqMDWY2AXgQ+Ka7b4uv0/UkMvYpmCpNEzAr9n1myNutuXtT+PwIeJhoyGVjGDogfH4UNi/WRgPlz+wnv1KNRLsUO0bFcPeN7p539wJwO9E1BYNvp81EQ1xVvfJ7lBXWTwrbj0lmVk0USN3t7g+FbF1PIhVEwVRpVgIHhbuHUkSTWh8d5ToNKzOrN7OGrjRwKrCG6Ly77hS6APhFSD8KfDXcbXQ8sDUMISwFTjWzyWFI51RgaVi3zcyOD/NZvhorqxKNRLsUO0bF6PrjHZxLdE1BdG7nhzvx5gAHEU2c7ve3F3pSVgDnhf17t3lXO50HPBO2H3PCv/H/Aq+7+7/HVul6Eqkkoz0DvlIWorto3iS6s+ja0a7PCJzv/kR3Tr0MvNp1zkRzT5YDbwG/BBpDvgG3hPZ5BTgmVtZFRBOK1wIXxvKPIfpj+jZwM+GJ/GN9Ae4hGqLqJJqDcvFItEuxY4zVpUg7/SS0w2qiP+YzYttfG875DWJ3dhb77YVr9IXQfvcD6ZBfE76vDev3H+22GKCNTiAaXlsNrArLmbqetGiprEWvkxEREREpg4b5RERERMqgYEpERESkDAqmRERERMqgYEpERESkDAqmRERERMqgYEp2e2bmZnZT7Pu3zey6XVT2nWZ23s63LLm8f95VZe3kOCeZ2WeHsN+7ZjZ1OOokIlKpFEzJeJAFvjTWgoDY07vjBh1MmVlyCIc/CRh0MCUiIn0pmJLxIAfcBlzZe0XvniUzaw2fJ5nZs2b2CzNbZ2YLzezvzOwFM3vFzA6IFXOKmb1oZm+a2RfD/kkz+4GZrQwv9f37WLm/NrNHgdd61WUhUGtmq8zs7pD3SHjR9Kuxl01jZq1mdpOZvQzMN7OLw/FfMLPbzezmsN2eZvZgqMdKM/tceKHupcCV4Vh/3t92Yf8pZrYsHH8R0UMjRUQkpr//GYvsjm4BVpvZ9wexz1HAYUAzsA5Y5O7Hmdk3gK8D3wzb7Uf0jrkDgBVmdiDRazu2uvuxZpYGfmtmy8L2c4Ej3f2d+MHc/Wozu9zdPx3Lvsjdm82sFlhpZg+6+2agHnje3f/RzPYGfhrKbQGeIXpyPcCPgP9w99+Y2WyiV4wcZma3Aq3u/kMAM/tZ7+3CuX8X+I27X29mZxE9xVxERGIUTMm44O7bzGwJcAXQXuJuKz16txlm9jbQFQy9Anwhtt19Hr249y0zWwccSvRutE/Fer0mEb1vrgN4oXcgNYArzOzckJ4VytgM5IlejgtRIPesuzeHut4PHBzWnQIcHr2WDYCJZjahn+MU2+5E4EsA7v64mW0psd4iIuOGgikZT/4TeAm4I5aXIwx3m1kCSMXWZWPpQux7gZ6/nd7vZHKi4bCvu/vS+AozOwnYXkplw7anAPPdvc3M/o/ovXMAGXfPl1BMAjje3TO9yh7qdiIi0ovmTMm4EXpu7qPnUNW7wLyQ/kugeghF/7WZJcI8qv2JXtS7FLjMzKoBzOxgM6svoazOrn2IerO2hEDqUOD4IvusBD5vZpPDpPa/iq1bRjQkSahH1xBiC9BQwna/Ar4c8s4AJpdwDiIi44qCKRlvbgLid/XdThSIvAzMp8Reo17eB14AngQuDb07i4gmmL9kZmuA/6G0nuDbiOZ23Q08BVSZ2evAQuC5/nZw9ybghlCH3xIFiFvD6iuAY8Ik+NeIJp4DPAac2zUBfYDtvgecaGavEg33vV9Kg4iIjCfm3nuEQkQqjZlNcPfW0DP1MLDY3R8e7XqJiIwH6pkS2T1cZ2argDXAO8Ajo1wfEZFxQz1TIiIiImVQz5SIiIhIGRRMiYiIiJRBwZSIiIhIGRRMiYiIiJRBwZSIiIhIGf4fGyMusCRB0pIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7vCf0C89Oito" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/sklift/__init__.py b/sklift/__init__.py index abeeedb..f0ede3d 100644 --- a/sklift/__init__.py +++ b/sklift/__init__.py @@ -1 +1 @@ -__version__ = '0.4.0' +__version__ = '0.4.1' diff --git a/sklift/datasets/datasets.py b/sklift/datasets/datasets.py index 1cc8a1f..ae5c24b 100644 --- a/sklift/datasets/datasets.py +++ b/sklift/datasets/datasets.py @@ -262,7 +262,7 @@ def fetch_x5(data_home=None, dest_subdir=None, download_if_missing=True): :func:`.fetch_megafon`: Load and return the MegaFon Uplift Competition dataset (classification). """ - url_train = 'https://timds.s3.eu-central-1.amazonaws.com/uplift_train.csv.gz' + url_train = 'https://sklift.s3.eu-west-2.amazonaws.com/uplift_train.csv.gz' file_train = url_train.split('/')[-1] csv_train_path = _get_data(data_home=data_home, url=url_train, dest_subdir=dest_subdir, dest_filename=file_train, @@ -277,7 +277,7 @@ def fetch_x5(data_home=None, dest_subdir=None, download_if_missing=True): train = train.drop([target_col, treatment_col], axis=1) - url_clients = 'https://timds.s3.eu-central-1.amazonaws.com/clients.csv.gz' + url_clients = 'https://sklift.s3.eu-west-2.amazonaws.com/clients.csv.gz' file_clients = url_clients.split('/')[-1] csv_clients_path = _get_data(data_home=data_home, url=url_clients, dest_subdir=dest_subdir, dest_filename=file_clients, @@ -285,7 +285,7 @@ def fetch_x5(data_home=None, dest_subdir=None, download_if_missing=True): clients = pd.read_csv(csv_clients_path) clients_features = list(clients.columns) - url_purchases = 'https://timds.s3.eu-central-1.amazonaws.com/purchases.csv.gz' + url_purchases = 'https://sklift.s3.eu-west-2.amazonaws.com/purchases.csv.gz' file_purchases = url_purchases.split('/')[-1] csv_purchases_path = _get_data(data_home=data_home, url=url_purchases, dest_subdir=dest_subdir, dest_filename=file_purchases, diff --git a/sklift/tests/test_datasets.py b/sklift/tests/test_datasets.py index cf38206..a6dc418 100644 --- a/sklift/tests/test_datasets.py +++ b/sklift/tests/test_datasets.py @@ -34,24 +34,23 @@ def test_fetch_lenta(lenta_dataset): assert data.target.shape == lenta_dataset['target.shape'] assert data.treatment.shape == lenta_dataset['treatment.shape'] - -# @pytest.fixture -# def x5_dataset() -> dict: -# data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'], +#@pytest.fixture +#def x5_dataset() -> dict: +# data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'], # 'data.keys': ['clients', 'train', 'purchases'], 'clients.shape': (400162, 5), -# 'train.shape': (200039, 1), 'target.shape': (200039,), 'treatment.shape': (200039,)} -# return data -# +# 'train.shape': (200039, 1), 'target.shape': (200039,), 'treatment.shape': (200039,)} +# return data + # -# def test_fetch_x5(x5_dataset): -# data = fetch_x5() -# assert isinstance(data, sklearn.utils.Bunch) -# assert set(data.keys()) == set(x5_dataset['keys']) -# assert set(data.data.keys()) == set(x5_dataset['data.keys']) -# assert data.data.clients.shape == x5_dataset['clients.shape'] -# assert data.data.train.shape == x5_dataset['train.shape'] -# assert data.target.shape == x5_dataset['target.shape'] -# assert data.treatment.shape == x5_dataset['treatment.shape'] +#def test_fetch_x5(x5_dataset): +# data = fetch_x5() +# assert isinstance(data, sklearn.utils.Bunch) +# assert set(data.keys()) == set(x5_dataset['keys']) +# assert set(data.data.keys()) == set(x5_dataset['data.keys']) +# assert data.data.clients.shape == x5_dataset['clients.shape'] +# assert data.data.train.shape == x5_dataset['train.shape'] +# assert data.target.shape == x5_dataset['target.shape'] +# assert data.treatment.shape == x5_dataset['treatment.shape'] @pytest.fixture @@ -85,6 +84,14 @@ def test_fetch_criteo10( assert data.target.shape == target_shape assert data.treatment.shape == treatment_shape +@pytest.mark.parametrize( + 'target_col, treatment_col', + [('visit','new_trmnt'), ('new_target','treatment')] + ) +def test_fetch_criteo_errors(target_col, treatment_col): + with pytest.raises(ValueError): + fetch_criteo(target_col=target_col, treatment_col=treatment_col) + @pytest.fixture def hillstrom_dataset() -> dict: @@ -111,6 +118,10 @@ def test_fetch_hillstrom( assert data.target.shape == target_shape assert data.treatment.shape == hillstrom_dataset['treatment.shape'] +def test_fetch_hillstrom_error(): + with pytest.raises(ValueError): + fetch_hillstrom(target_col='new_target') + @pytest.fixture def megafon_dataset() -> dict: diff --git a/sklift/tests/test_metrics.py b/sklift/tests/test_metrics.py index 4d011bf..47fc4f5 100644 --- a/sklift/tests/test_metrics.py +++ b/sklift/tests/test_metrics.py @@ -7,10 +7,11 @@ from sklearn.utils._testing import assert_array_almost_equal +from ..metrics import make_uplift_scorer from ..metrics import uplift_curve, uplift_auc_score, perfect_uplift_curve from ..metrics import qini_curve, qini_auc_score, perfect_qini_curve from ..metrics import (uplift_at_k, response_rate_by_percentile, - weighted_average_uplift, uplift_by_percentile, treatment_balance_curve) + weighted_average_uplift, uplift_by_percentile, treatment_balance_curve, average_squared_deviation) def make_predictions(binary): @@ -221,6 +222,12 @@ def test_perfect_qini_curve_hard(): assert_array_almost_equal(x_actual, np.array([0., 0., 3.])) assert_array_almost_equal(y_actual, np.array([0.0, 0.0, 0.0])) + +def test_perfect_qini_curve_error(): + y_true, uplift, treatment = make_predictions(binary=True) + with pytest.raises(TypeError): + perfect_qini_curve(y_true, treatment, negative_effect=5) + def test_qini_auc_score(): @@ -255,11 +262,33 @@ def test_qini_auc_score(): treatment = [1, 0, 1] assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 0.75) +def test_qini_auc_score_error(): + y_true = [1, 0] + uplift = [0.1, 0.3] + treatment = [0, 1] + with pytest.raises(TypeError): + qini_auc_score(y_true, uplift, treatment, negative_effect=5) + def test_uplift_at_k(): y_true, uplift, treatment = make_predictions(binary=True) assert_array_almost_equal(uplift_at_k(y_true, uplift, treatment, strategy='by_group', k=1), np.array([0.])) + #assert_array_almost_equal(uplift_at_k(y_true, uplift, treatment, strategy='overall', k=2), np.array([0.])) + +@pytest.mark.parametrize( + "strategy, k", + [ + ('new_strategy', 1), + ('by_group', -0.5), + ('by_group', '1'), + ('by_group', 2) + ] +) +def test_uplift_at_k_errors(strategy, k): + y_true, uplift, treatment = make_predictions(binary=True) + with pytest.raises(ValueError): + uplift_at_k(y_true, uplift, treatment, strategy, k) @pytest.mark.parametrize( @@ -277,6 +306,19 @@ def test_response_rate_by_percentile(strategy, group, response_rate): assert_array_almost_equal(response_rate_by_percentile(y_true, uplift, treatment, group, strategy, bins=1), response_rate) +@pytest.mark.parametrize( + "strategy, group, bins", + [ + ('new_strategy', 'control', 1), + ('by_group', 'ctrl', 1), + ('by_group', 'control', 0.5), + ('by_group', 'control', 9999) + ] +) +def test_response_rate_by_percentile_errors(strategy, group, bins): + y_true, uplift, treatment = make_predictions(binary=True) + with pytest.raises(ValueError): + response_rate_by_percentile(y_true, uplift, treatment, group=group, strategy=strategy, bins=bins) @pytest.mark.parametrize( "strategy, weighted_average", @@ -289,7 +331,21 @@ def test_weighted_average_uplift(strategy, weighted_average): y_true, uplift, treatment = make_predictions(binary=True) assert_array_almost_equal(weighted_average_uplift(y_true, uplift, treatment, strategy, bins=1), weighted_average) + +@pytest.mark.parametrize( + "strategy, bins", + [ + ('new_strategy', 1), + ('by_group', 0.5), + ('by_group', 9999) + ] +) +def test_weighted_average_uplift_errors(strategy, bins): + y_true, uplift, treatment = make_predictions(binary=True) + with pytest.raises(ValueError): + weighted_average_uplift(y_true, uplift, treatment, strategy=strategy, bins=bins) + @pytest.mark.parametrize( "strategy, bins, std, total, string_percentiles, data", @@ -307,6 +363,22 @@ def test_uplift_by_percentile(strategy, bins, std, total, string_percentiles, da assert_array_almost_equal( uplift_by_percentile(y_true, uplift, treatment, strategy, bins, std, total, string_percentiles), data) + +@pytest.mark.parametrize( + "strategy, bins, std, total, string_percentiles", + [ + ('new_strategy', 1, True, True, True), + ('by_group', 0.5, True, True, True), + ('by_group', 9999, True, True, True), + ('by_group', 1, 2, True, True), + ('by_group', 1, True, True, 2), + ('by_group', 1, True, 2, True) + ] +) +def test_uplift_by_percentile_errors(strategy, bins, std, total, string_percentiles): + y_true, uplift, treatment = make_predictions(binary=True) + with pytest.raises(ValueError): + uplift_by_percentile(y_true, uplift, treatment, strategy, bins, std, total, string_percentiles) def test_treatment_balance_curve(): @@ -314,4 +386,45 @@ def test_treatment_balance_curve(): idx, balance = treatment_balance_curve(uplift, treatment, winsize=2) assert_array_almost_equal(idx, np.array([1., 100.])) - assert_array_almost_equal(balance, np.array([1., 0.5])) \ No newline at end of file + assert_array_almost_equal(balance, np.array([1., 0.5])) + +@pytest.mark.parametrize( + "strategy", + [ + ('overall'), + ('by_group') + ] +) +def test_average_squared_deviation(strategy): + y_true, uplift, treatment = make_predictions(binary=True) + assert (average_squared_deviation(y_true, uplift, treatment, y_true, uplift, treatment, strategy, bins=1) == 0) + +@pytest.mark.parametrize( + "strategy, bins", + [ + ('new_strategy', 1), + ('by_group', 0.5), + ('by_group', 9999) + ] +) +def test_average_squared_deviation_errors(strategy, bins): + y_true, uplift, treatment = make_predictions(binary=True) + with pytest.raises(ValueError): + average_squared_deviation(y_true, uplift, treatment, y_true, uplift, treatment, strategy=strategy, bins=bins) + +def test_metric_name_error(): + with pytest.raises(ValueError): + make_uplift_scorer('new_scorer', [0, 1]) + +def test_make_scorer_error(): + with pytest.raises(TypeError): + make_uplift_scorer('qini_auc_score', []) + + + + + + + + + \ No newline at end of file diff --git a/sklift/tests/test_models.py b/sklift/tests/test_models.py index 2c9943a..1afa939 100644 --- a/sklift/tests/test_models.py +++ b/sklift/tests/test_models.py @@ -1,4 +1,5 @@ import pytest +import numpy as np from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler @@ -43,3 +44,50 @@ def test_shape_regression(model, random_xy_dataset_regr): assert model.fit(X, y, treat).predict(X).shape[0] == y.shape[0] pipe = Pipeline(steps=[("scaler", StandardScaler()), ("clf", model)]) assert pipe.fit(X, y, clf__treatment=treat).predict(X).shape[0] == y.shape[0] + +@pytest.mark.parametrize( + "model", + [ + SoloModel(LogisticRegression(), method='dummy'), + SoloModel(LogisticRegression(), method='treatment_interaction'), + ] +) +def test_solomodel_fit_error(model): + X, y, treatment = [[1., 0., 0.],[1., 0., 0.],[1., 0., 0.]], [1., 2., 3.], [0., 1., 0.] + with pytest.raises(TypeError): + model.fit(X, y, treatment) + +@pytest.mark.parametrize( + "model", + [ + SoloModel(LogisticRegression(), method='dummy'), + SoloModel(LogisticRegression(), method='treatment_interaction'), + ] +) +def test_solomodel_pred_error(model): + X_train, y_train, treat_train = (np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]]), + np.array([0.0, 0.0, 1.0]), np.array([0.0, 1.0, 1.0])) + model.fit(X_train, y_train, treat_train) + with pytest.raises(TypeError): + model.predict(1) + +@pytest.mark.parametrize("method", ['method']) +def test_solomodel_method_error(method): + with pytest.raises(ValueError): + SoloModel(LogisticRegression(), method=method) + +def test_classtransformation_fit_error(): + X, y, treatment = [[1., 0., 0.],[1., 0., 0.],[1., 0., 0.]], [1., 2., 3.], [0., 1., 0.] + with pytest.raises(ValueError): + ClassTransformation(LogisticRegression()).fit(X, y, treatment) + +@pytest.mark.parametrize("method", ['method']) +def test_twomodels_method_error(method): + with pytest.raises(ValueError): + TwoModels(LinearRegression(), LinearRegression(), method=method) + +def test_same_estimator_error(): + est = LinearRegression() + with pytest.raises(ValueError): + TwoModels(est, est) + diff --git a/sklift/tests/test_viz.py b/sklift/tests/test_viz.py index 165deaf..b4b8157 100644 --- a/sklift/tests/test_viz.py +++ b/sklift/tests/test_viz.py @@ -3,7 +3,7 @@ from numpy.testing import assert_allclose -from ..viz import plot_qini_curve, plot_uplift_curve, plot_uplift_preds, plot_uplift_by_percentile +from ..viz import plot_qini_curve, plot_uplift_curve, plot_uplift_preds, plot_uplift_by_percentile, plot_treatment_balance_curve from ..metrics import qini_curve, perfect_qini_curve, uplift_curve, perfect_uplift_curve from ..viz import UpliftCurveDisplay @@ -51,8 +51,6 @@ def test_plot_qini_curve(random, perfect, negative_effect): assert_allclose(viz.x_perfect, x_perfect) assert_allclose(viz.y_perfect, y_perfect) - import matplotlib as mpl - assert isinstance(viz.line_, mpl.lines.Line2D) assert isinstance(viz.ax_, mpl.axes.Axes) assert isinstance(viz.figure_, mpl.figure.Figure) @@ -62,7 +60,8 @@ def test_plot_qini_curve(random, perfect, negative_effect): "qini_auc, estimator_name, expected_label", [ (0.61, None, "plot_qini_curve = 0.61"), - (0.61, "first", "first (plot_qini_curve = 0.61)") + (0.61, "first", "first (plot_qini_curve = 0.61)"), + (None, "None", "None") ] ) def test_default_labels(qini_auc, estimator_name, expected_label): @@ -77,8 +76,6 @@ def test_default_labels(qini_auc, estimator_name, expected_label): assert disp.line_.get_label() == expected_label -from ..viz import plot_uplift_curve -from ..metrics import uplift_curve, perfect_uplift_curve @pytest.mark.parametrize("random", [True, False]) @pytest.mark.parametrize("perfect", [True, False]) @@ -104,8 +101,6 @@ def test_plot_uplift_curve(random, perfect): assert_allclose(viz.x_perfect, x_perfect) assert_allclose(viz.y_perfect, y_perfect) - import matplotlib as mpl - assert isinstance(viz.line_, mpl.lines.Line2D) assert isinstance(viz.ax_, mpl.axes.Axes) assert isinstance(viz.figure_, mpl.figure.Figure) @@ -115,7 +110,8 @@ def test_plot_uplift_curve(random, perfect): "uplift_auc, estimator_name, expected_label", [ (0.75, None, "plot_uplift_curve = 0.75"), - (0.75, "first", "first (plot_uplift_curve = 0.75)") + (0.75, "first", "first (plot_uplift_curve = 0.75)"), + (None, "None", "None") ] ) def test_default_labels(uplift_auc, estimator_name, expected_label): @@ -130,16 +126,19 @@ def test_default_labels(uplift_auc, estimator_name, expected_label): assert disp.line_.get_label() == expected_label + def test_plot_uplift_preds(): trmnt_preds = np.array([1,1,0,1,1,1]) ctrl_preds = np.array([0,1,0,1,0,1]) - + viz = plot_uplift_preds(trmnt_preds, ctrl_preds, log=True, bins=5) - - import matplotlib as mpl + assert isinstance(viz[0], mpl.axes.Axes) assert isinstance(viz[1], mpl.axes.Axes) assert isinstance(viz[2], mpl.axes.Axes) + + with pytest.raises(ValueError): + plot_uplift_preds(trmnt_preds, ctrl_preds, log=True, bins=0) def test_plot_uplift_by_percentile(): y_true, uplift, treatment = make_predictions() @@ -157,8 +156,36 @@ def test_plot_uplift_by_percentile(): assert viz[1].get_title() == "Response rate by percentile" assert isinstance(viz[0], mpl.axes.Axes) assert isinstance(viz[1], mpl.axes.Axes) + viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='bar', bins=1, string_percentiles=True) -def plot_treatment_balance_curve(): + assert viz[0].get_title() == "Uplift by percentile\nweighted average uplift = 0.5000" + assert viz[1].get_xlabel() == "Percentile" + assert viz[1].get_title() == "Response rate by percentile" + assert isinstance(viz[0], mpl.axes.Axes) + assert isinstance(viz[1], mpl.axes.Axes) + + viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='line', bins=1, string_percentiles=False) + assert isinstance(viz, mpl.axes.Axes) + + +@pytest.mark.parametrize( + "strategy, kind, bins, string_percentiles", + [ + ("new_strategy", "bar", 1, False), + ("by_group", "new_bar", 1, False), + ("by_group", "bar", 0, False), + ("by_group", "bar", 100, False), + ("by_group", "bar", 1, 5) + + ] +) +def test_plot_uplift_by_percentile_errors(strategy, kind, bins, string_percentiles): + y_true, uplift, treatment = make_predictions() + with pytest.raises(ValueError): + viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy=strategy, kind=kind, bins=bins, string_percentiles=string_percentiles) + + +def test_plot_treatment_balance_curve(): y_true, uplift, treatment = make_predictions() viz = plot_treatment_balance_curve(uplift, treatment, winsize=0.5) @@ -166,4 +193,9 @@ def plot_treatment_balance_curve(): assert viz.get_title() == "Treatment balance curve" assert viz.get_xlabel() == "Percentage targeted" assert viz.get_ylabel() == "Balance: treatment / (treatment + control)" - assert isinstance(viz, mpl.axes.Axes) \ No newline at end of file + assert isinstance(viz, mpl.axes.Axes) + +def test_plot_treatment_balance_errors(): + y_true, uplift, treatment = make_predictions() + with pytest.raises(ValueError): + viz = plot_treatment_balance_curve(uplift, treatment, winsize=5) \ No newline at end of file