diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 44640b3d..1b66b8fb 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -10,16 +10,16 @@ on:
- main
workflow_dispatch:
jobs:
- unit_tests:
- name: ${{ matrix.python_version }} unit tests ${{ matrix.type_of_tests }}
+ tests:
+ name: ${{ matrix.python_version }} ${{ matrix.type_of_tests }} tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.11"]
- type_of_tests: ["unit tests", "integration tests"]
+ type_of_tests: ["unit", "integration"]
exclude:
- python-version: "3.8"
- type_of_tests: "integration tests"
+ type_of_tests: "integration"
steps:
- uses: actions/checkout@v3
- name: Set up python ${{ matrix.python-version }}
@@ -40,6 +40,7 @@ jobs:
if: (steps.cache.outputs.cache-hit == 'true') && ( github.event.pull_request.title != 'Automated Latest Dependency Updates')
run: python -m pip install --no-dependencies .
- name: Run unit tests
+ if: ${{ matrix.type_of_tests != 'integration tests' }}
run: make unit-tests
- name: Run integration tests
if: ${{ matrix.type_of_tests == 'integration tests' }}
diff --git a/Examples/chicago_example.ipynb b/Examples/chicago_example.ipynb
deleted file mode 100644
index a368af84..00000000
--- a/Examples/chicago_example.ipynb
+++ /dev/null
@@ -1,1191 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "7efd117b",
- "metadata": {
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "import warnings\n",
- "\n",
- "# warnings.filterwarnings(\"ignore\")\n",
- "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
- "\n",
- "import copy\n",
- "import json\n",
- "import pandas as pd\n",
- "import os\n",
- "import sys\n",
- "import featuretools as ft\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "import matplotlib\n",
- "\n",
- "%matplotlib inline\n",
- "\n",
- "sys.path.append(\"../../\")\n",
- "from Trane import trane as trane\n",
- "from datetime import datetime, timedelta"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8d67ebc9",
- "metadata": {},
- "source": [
- "### Upload of the dataset and metadata"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "f1b249d7",
- "metadata": {},
- "outputs": [],
- "source": [
- "df = pd.read_csv(\"./chicago-bike/data/bike-sampled.csv\", sep=\",\")\n",
- "df[\"date\"] = df[\"date\"].apply(lambda x: datetime.strptime(x, \"%Y-%m-%d\"))\n",
- "df = df.sort_values(by=[\"date\"])\n",
- "df = df.fillna(0)\n",
- "meta = trane.TableMeta(json.loads(open(\"./chicago-bike/data/meta.json\").read()))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "2eef38b2",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " date | \n",
- " hour | \n",
- " usertype | \n",
- " gender | \n",
- " tripduration | \n",
- " temperature | \n",
- " from_station_id | \n",
- " dpcapacity_start | \n",
- " to_station_id | \n",
- " dpcapacity_end | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 103805 | \n",
- " 2017-01-02 | \n",
- " 0 | \n",
- " Subscriber | \n",
- " Male | \n",
- " 10.900000 | \n",
- " 30.0 | \n",
- " 130 | \n",
- " 15.0 | \n",
- " 119 | \n",
- " 19.0 | \n",
- "
\n",
- " \n",
- " 102617 | \n",
- " 2017-01-02 | \n",
- " 15 | \n",
- " Subscriber | \n",
- " Male | \n",
- " 5.766667 | \n",
- " 37.9 | \n",
- " 332 | \n",
- " 19.0 | \n",
- " 153 | \n",
- " 19.0 | \n",
- "
\n",
- " \n",
- " 102618 | \n",
- " 2017-01-02 | \n",
- " 15 | \n",
- " Subscriber | \n",
- " Female | \n",
- " 18.550000 | \n",
- " 37.9 | \n",
- " 5 | \n",
- " 23.0 | \n",
- " 176 | \n",
- " 27.0 | \n",
- "
\n",
- " \n",
- " 102619 | \n",
- " 2017-01-02 | \n",
- " 15 | \n",
- " Subscriber | \n",
- " Male | \n",
- " 7.166667 | \n",
- " 37.9 | \n",
- " 313 | \n",
- " 19.0 | \n",
- " 340 | \n",
- " 15.0 | \n",
- "
\n",
- " \n",
- " 102620 | \n",
- " 2017-01-02 | \n",
- " 15 | \n",
- " Subscriber | \n",
- " Male | \n",
- " 2.883333 | \n",
- " 37.9 | \n",
- " 84 | \n",
- " 19.0 | \n",
- " 133 | \n",
- " 27.0 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 3304 | \n",
- " 2017-01-31 | \n",
- " 10 | \n",
- " Subscriber | \n",
- " Female | \n",
- " 6.966667 | \n",
- " 37.9 | \n",
- " 247 | \n",
- " 19.0 | \n",
- " 247 | \n",
- " 19.0 | \n",
- "
\n",
- " \n",
- " 3305 | \n",
- " 2017-01-31 | \n",
- " 10 | \n",
- " Subscriber | \n",
- " Male | \n",
- " 6.483333 | \n",
- " 37.9 | \n",
- " 425 | \n",
- " 15.0 | \n",
- " 426 | \n",
- " 19.0 | \n",
- "
\n",
- " \n",
- " 3306 | \n",
- " 2017-01-31 | \n",
- " 10 | \n",
- " Subscriber | \n",
- " Female | \n",
- " 8.250000 | \n",
- " 37.9 | \n",
- " 175 | \n",
- " 19.0 | \n",
- " 45 | \n",
- " 15.0 | \n",
- "
\n",
- " \n",
- " 3299 | \n",
- " 2017-01-31 | \n",
- " 10 | \n",
- " Subscriber | \n",
- " Male | \n",
- " 16.266667 | \n",
- " 37.9 | \n",
- " 202 | \n",
- " 15.0 | \n",
- " 317 | \n",
- " 23.0 | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 2017-01-31 | \n",
- " 23 | \n",
- " Subscriber | \n",
- " Male | \n",
- " 3.316667 | \n",
- " 35.1 | \n",
- " 230 | \n",
- " 19.0 | \n",
- " 131 | \n",
- " 15.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
103806 rows × 10 columns
\n",
- "
"
- ],
- "text/plain": [
- " date hour usertype gender tripduration temperature \\\n",
- "103805 2017-01-02 0 Subscriber Male 10.900000 30.0 \n",
- "102617 2017-01-02 15 Subscriber Male 5.766667 37.9 \n",
- "102618 2017-01-02 15 Subscriber Female 18.550000 37.9 \n",
- "102619 2017-01-02 15 Subscriber Male 7.166667 37.9 \n",
- "102620 2017-01-02 15 Subscriber Male 2.883333 37.9 \n",
- "... ... ... ... ... ... ... \n",
- "3304 2017-01-31 10 Subscriber Female 6.966667 37.9 \n",
- "3305 2017-01-31 10 Subscriber Male 6.483333 37.9 \n",
- "3306 2017-01-31 10 Subscriber Female 8.250000 37.9 \n",
- "3299 2017-01-31 10 Subscriber Male 16.266667 37.9 \n",
- "0 2017-01-31 23 Subscriber Male 3.316667 35.1 \n",
- "\n",
- " from_station_id dpcapacity_start to_station_id dpcapacity_end \n",
- "103805 130 15.0 119 19.0 \n",
- "102617 332 19.0 153 19.0 \n",
- "102618 5 23.0 176 27.0 \n",
- "102619 313 19.0 340 15.0 \n",
- "102620 84 19.0 133 27.0 \n",
- "... ... ... ... ... \n",
- "3304 247 19.0 247 19.0 \n",
- "3305 425 15.0 426 19.0 \n",
- "3306 175 19.0 45 15.0 \n",
- "3299 202 15.0 317 23.0 \n",
- "0 230 19.0 131 15.0 \n",
- "\n",
- "[103806 rows x 10 columns]"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1bbdae66",
- "metadata": {},
- "source": [
- "### Defining entity column, time column and cutoff strategy"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "4916b1ac",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "b3b557696b4e42d09a750ae46984a0ed",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- " 0%| | 0/1702 [00:00, ?it/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Success/Attempt = 735/1702\n"
- ]
- }
- ],
- "source": [
- "entity = \"usertype\"\n",
- "time = \"date\"\n",
- "cutoff = \"1h\"\n",
- "cutoff_base = pd.Timestamp(datetime.strptime(\"2017-01-02\", \"%Y-%m-%d\"))\n",
- "cutoff_end = pd.Timestamp(datetime.strptime(\"2017-01-31\", \"%Y-%m-%d\"))\n",
- "cutoff_strategy = trane.CutoffStrategy(entity, cutoff, cutoff_base, cutoff_end)\n",
- "\n",
- "problem_generator = trane.PredictionProblemGenerator(\n",
- " table_meta=meta, entity_col=entity, cutoff_strategy=cutoff_strategy, time_col=time\n",
- ")\n",
- "\n",
- "problems = problem_generator.generate(df, generate_thresholds=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "cce81137",
- "metadata": {},
- "source": [
- "### Generating prediction problems"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "fe494410",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "9b5c5cc12cf34b2190436f0ab957b94b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- " 0%| | 0/1702 [00:00, ?it/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Success/Attempt = 735/1702\n"
- ]
- }
- ],
- "source": [
- "problem_generator = trane.PredictionProblemGenerator(\n",
- " table_meta=meta, entity_col=entity, time_col=time, cutoff_strategy=cutoff_strategy\n",
- ")\n",
- "\n",
- "problems = problem_generator.generate(df, generate_thresholds=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "f0bc9b88",
- "metadata": {},
- "source": [
- "### Labeling the prediction tasks"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "afd64f8a",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "For each predict the number of records in next 1h days\n",
- "For each predict the number of records with greater than 13.05 in next 1h days\n",
- "For each predict the number of records with greater than 8.033333333333333 in next 1h days\n",
- "For each predict the number of records with greater than 5.116666666666666 in next 1h days\n",
- "For each predict the number of records with greater than 37.9 in next 1h days\n",
- "For each predict the number of records with greater than 34.0 in next 1h days\n",
- "For each predict the number of records with greater than 26.1 in next 1h days\n",
- "For each predict the number of records with greater than 27.0 in next 1h days\n",
- "For each predict the number of records with greater than 19.0 in next 1h days\n",
- "For each predict the number of records with greater than 15.0 in next 1h days\n",
- "For each predict the number of records with greater than 27.0 in next 1h days\n",
- "For each predict the number of records with greater than 19.0 in next 1h days\n",
- "For each predict the number of records with greater than 15.0 in next 1h days\n",
- "For each predict the number of records with equal to 17 in next 1h days\n",
- "For each predict the number of records with equal to 8 in next 1h days\n",
- "For each predict the number of records with equal to 16 in next 1h days\n",
- "For each predict the number of records with equal to Male in next 1h days\n",
- "For each predict the number of records with equal to Female in next 1h days\n",
- "For each predict the number of records with not equal to 17 in next 1h days\n",
- "For each predict the number of records with not equal to 8 in next 1h days\n",
- "For each predict the number of records with not equal to 16 in next 1h days\n",
- "For each predict the number of records with not equal to Male in next 1h days\n",
- "For each predict the number of records with not equal to Female in next 1h days\n",
- "For each predict the number of records with less than 5.15 in next 1h days\n",
- "For each predict the number of records with less than 8.1 in next 1h days\n",
- "For each predict the number of records with less than 12.75 in next 1h days\n",
- "For each predict the number of records with less than 27.0 in next 1h days\n",
- "For each predict the number of records with less than 35.1 in next 1h days\n",
- "For each predict the number of records with less than 39.0 in next 1h days\n",
- "For each predict the number of records with less than 16.0 in next 1h days\n",
- "For each predict the number of records with less than 20.0 in next 1h days\n",
- "For each predict the number of records with less than 28.0 in next 1h days\n",
- "For each predict the number of records with less than 16.0 in next 1h days\n",
- "For each predict the number of records with less than 20.0 in next 1h days\n",
- "For each predict the number of records with less than 28.0 in next 1h days\n",
- "For each predict the total in all related records in next 1h days\n",
- "For each predict the total in all related records in next 1h days\n",
- "For each predict the total in all related records in next 1h days\n",
- "For each predict the total in all related records in next 1h days\n",
- "For each predict the total in all related records with greater than 12.116666666666667 in next 1h days\n",
- "For each predict the total in all related records with greater than 8.25 in next 1h days\n",
- "For each predict the total in all related records with greater than 5.1 in next 1h days\n",
- "For each predict the total in all related records with greater than 12.383333333333333 in next 1h days\n",
- "For each predict the total in all related records with greater than 7.833333333333332 in next 1h days\n",
- "For each predict the total in all related records with greater than 5.183333333333334 in next 1h days\n",
- "For each predict the total in all related records with greater than 12.166666666666664 in next 1h days\n",
- "For each predict the total in all related records with greater than 8.116666666666667 in next 1h days\n",
- "For each predict the total in all related records with greater than 5.15 in next 1h days\n",
- "For each predict the total in all related records with greater than 12.833333333333336 in next 1h days\n",
- "For each predict the total in all related records with greater than 7.95 in next 1h days\n",
- "For each predict the total in all related records with greater than 5.216666666666667 in next 1h days\n",
- "For each predict the total in all related records with greater than 37.9 in next 1h days\n",
- "For each predict the total in all related records with greater than 35.1 in next 1h days\n",
- "For each predict the total in all related records with greater than 26.6 in next 1h days\n",
- "For each predict the total in all related records with greater than 37.9 in next 1h days\n",
- "For each predict the total in all related records with greater than 34.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 26.6 in next 1h days\n",
- "For each predict the total in all related records with greater than 37.9 in next 1h days\n",
- "For each predict the total in all related records with greater than 34.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 26.6 in next 1h days\n",
- "For each predict the total in all related records with greater than 37.9 in next 1h days\n",
- "For each predict the total in all related records with greater than 34.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 26.6 in next 1h days\n",
- "For each predict the total in all related records with greater than 23.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 27.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 27.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 27.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 27.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 27.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 27.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 27.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 19.0 in next 1h days\n",
- "For each predict the total in all related records with greater than 15.0 in next 1h days\n",
- "For each predict the total in all related records with equal to 17 in next 1h days\n",
- "For each predict the total in all related records with equal to 8 in next 1h days\n",
- "For each predict the total in all related records with equal to 16 in next 1h days\n",
- "For each predict the total in all related records with equal to 17 in next 1h days\n",
- "For each predict the total in all related records with equal to 8 in next 1h days\n",
- "For each predict the total in all related records with equal to 16 in next 1h days\n",
- "For each predict the total in all related records with equal to 17 in next 1h days\n",
- "For each predict the total in all related records with equal to 8 in next 1h days\n",
- "For each predict the total in all related records with equal to 16 in next 1h days\n",
- "For each predict the total in all related records with equal to 17 in next 1h days\n",
- "For each predict the total in all related records with