address deprecation warnings (#69)

Nixtla · Mar 6, 2024 · a66dcea · a66dcea
1 parent 081d5a4
commit a66dcea
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 22 deletions.
diff --git a/nbs/preprocessing.ipynb b/nbs/preprocessing.ipynb
@@ -35,7 +35,8 @@
     "import pandas as pd\n",
     "\n",
     "from utilsforecast.compat import DataFrame, pl, pl_DataFrame, pl_Series\n",
-    "from utilsforecast.processing import group_by, repeat"
+    "from utilsforecast.processing import group_by, repeat\n",
+    "from utilsforecast.validation import _is_int_dtype"
    ]
   },
   {
@@ -156,7 +157,7 @@
     "        starts = _determine_bound_pl(start, times_by_id, 'min')\n",
     "        ends = _determine_bound_pl(end, times_by_id, 'max')\n",
     "        grid = pl_DataFrame({id_col: times_by_id[id_col]})\n",
-    "        if starts.is_integer():\n",
+    "        if _is_int_dtype(starts):\n",
     "            grid = grid.with_columns(\n",
     "                pl.int_ranges(starts, ends + freq, step=freq, eager=True).alias(time_col)\n",
     "            )\n",
@@ -186,8 +187,10 @@
     "                raise NotImplementedError('Multiple of a business day')\n",
     "            freq = 'D'\n",
     "        if offset.n > 1:\n",
-    "            freq = freq.replace(str(offset.n), '')        \n",
-    "        if not hasattr(offset, 'delta'):\n",
+    "            freq = freq.replace(str(offset.n), '')\n",
+    "        try:\n",
+    "            pd.Timedelta(offset)\n",
+    "        except ValueError:\n",
     "            # irregular freq, try using first letter of abbreviation\n",
     "            # such as MS = 'Month Start' -> 'M', YS = 'Year Start' -> 'Y'\n",
     "            freq = freq[0]\n",
@@ -241,7 +244,7 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/preprocessing.py#L55){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/preprocessing.py#L56){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### fill_gaps\n",
        "\n",
@@ -267,7 +270,7 @@
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/preprocessing.py#L55){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/utilsforecast/blob/main/utilsforecast/preprocessing.py#L56){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### fill_gaps\n",
        "\n",
@@ -1238,8 +1241,8 @@
      "data": {
       "text/html": [
        "<div><style>\n",
-       ".dataframe > thead > tr > th,\n",
-       ".dataframe > tbody > tr > td {\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
        "  text-align: right;\n",
        "  white-space: pre-wrap;\n",
        "}\n",
@@ -1290,8 +1293,8 @@
      "data": {
       "text/html": [
        "<div><style>\n",
-       ".dataframe > thead > tr > th,\n",
-       ".dataframe > tbody > tr > td {\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
        "  text-align: right;\n",
        "  white-space: pre-wrap;\n",
        "}\n",
@@ -1342,8 +1345,8 @@
      "data": {
       "text/html": [
        "<div><style>\n",
-       ".dataframe > thead > tr > th,\n",
-       ".dataframe > tbody > tr > td {\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
        "  text-align: right;\n",
        "  white-space: pre-wrap;\n",
        "}\n",
@@ -1394,8 +1397,8 @@
      "data": {
       "text/html": [
        "<div><style>\n",
-       ".dataframe > thead > tr > th,\n",
-       ".dataframe > tbody > tr > td {\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
        "  text-align: right;\n",
        "  white-space: pre-wrap;\n",
        "}\n",
@@ -1446,8 +1449,8 @@
      "data": {
       "text/html": [
        "<div><style>\n",
-       ".dataframe > thead > tr > th,\n",
-       ".dataframe > tbody > tr > td {\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
        "  text-align: right;\n",
        "  white-space: pre-wrap;\n",
        "}\n",
@@ -1496,8 +1499,8 @@
      "data": {
       "text/html": [
        "<div><style>\n",
-       ".dataframe > thead > tr > th,\n",
-       ".dataframe > tbody > tr > td {\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
        "  text-align: right;\n",
        "  white-space: pre-wrap;\n",
        "}\n",
@@ -1617,8 +1620,13 @@
     "        assert max_dates[0] == expected_end\n",
     "\n",
     "n_periods = 100\n",
-    "for freq in ['Y', 'YS', 'M', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2Y', '3YS', '30min', 'B']:\n",
-    "    if isinstance(freq, str):         \n",
+    "freqs = ['YE', 'YS', 'ME', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2YE', '3YS', '30min', 'B']\n",
+    "try:\n",
+    "    pd.tseries.frequencies.to_offset('YE')\n",
+    "except ValueError:\n",
+    "    freqs = [f.replace('YE', 'Y').replace('ME', 'M') for f in freqs if isinstance(f, str)]\n",
+    "for freq in freqs:\n",
+    "    if isinstance(freq, (pd.offsets.BaseOffset, str)):         \n",
     "        dates = pd.date_range('1900-01-01', periods=n_periods, freq=freq)\n",
     "        offset = pd.tseries.frequencies.to_offset(freq)\n",
     "    else:\n",

diff --git a/utilsforecast/preprocessing.py b/utilsforecast/preprocessing.py
@@ -13,6 +13,7 @@
 
 from .compat import DataFrame, pl, pl_DataFrame, pl_Series
 from .processing import group_by, repeat
+from .validation import _is_int_dtype
 
 # %% ../nbs/preprocessing.ipynb 4
 def _determine_bound(bound, freq, times_by_id, agg) -> np.ndarray:
@@ -100,7 +101,7 @@ def fill_gaps(
         starts = _determine_bound_pl(start, times_by_id, "min")
         ends = _determine_bound_pl(end, times_by_id, "max")
         grid = pl_DataFrame({id_col: times_by_id[id_col]})
-        if starts.is_integer():
+        if _is_int_dtype(starts):
             grid = grid.with_columns(
                 pl.int_ranges(starts, ends + freq, step=freq, eager=True).alias(
                     time_col
@@ -133,7 +134,9 @@ def fill_gaps(
             freq = "D"
         if offset.n > 1:
             freq = freq.replace(str(offset.n), "")
-        if not hasattr(offset, "delta"):
+        try:
+            pd.Timedelta(offset)
+        except ValueError:
             # irregular freq, try using first letter of abbreviation
             # such as MS = 'Month Start' -> 'M', YS = 'Year Start' -> 'Y'
             freq = freq[0]