Skip to content

Commit

Permalink
support hours in fill_gaps (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Mar 26, 2024
1 parent a094f50 commit c4d6229
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 16 deletions.
18 changes: 10 additions & 8 deletions nbs/preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,16 @@
" return grid.join(df, on=[id_col, time_col], how='left')\n",
" if isinstance(freq, str):\n",
" offset = pd.tseries.frequencies.to_offset(freq)\n",
" if 'min' in freq:\n",
" if isinstance(offset.base, pd.offsets.Minute):\n",
" # minutes are represented as 'm' in numpy\n",
" freq = freq.replace('min', 'm')\n",
" elif 'B' in freq:\n",
" # business day\n",
" if freq != 'B':\n",
" freq = 'm'\n",
" elif isinstance(offset.base, pd.offsets.BusinessDay):\n",
" if offset.n != 1:\n",
" raise NotImplementedError('Multiple of a business day')\n",
" freq = 'D'\n",
" elif isinstance(offset.base, pd.offsets.Hour):\n",
" # hours are represented as 'h' in numpy\n",
" freq = 'h'\n",
" if offset.n > 1:\n",
" freq = freq.replace(str(offset.n), '')\n",
" try:\n",
Expand All @@ -208,7 +210,7 @@
" )\n",
" uids = np.repeat(times_by_id.index, sizes) \n",
" if isinstance(freq, str):\n",
" if offset.base.name == 'B':\n",
" if isinstance(offset.base, pd.offsets.BusinessDay):\n",
" # data was generated daily, we need to keep only business days \n",
" bdays = np.is_busday(times)\n",
" uids = uids[bdays]\n",
Expand Down Expand Up @@ -1620,11 +1622,11 @@
" assert max_dates[0] == expected_end\n",
"\n",
"n_periods = 100\n",
"freqs = ['YE', 'YS', 'ME', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2YE', '3YS', '30min', 'B']\n",
"freqs = ['YE', 'YS', 'ME', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2YE', '3YS', '30min', 'B', '1h']\n",
"try:\n",
" pd.tseries.frequencies.to_offset('YE')\n",
"except ValueError:\n",
" freqs = [f.replace('YE', 'Y').replace('ME', 'M') for f in freqs if isinstance(f, str)]\n",
" freqs = [f.replace('YE', 'Y').replace('ME', 'M').replace('h', 'H') for f in freqs if isinstance(f, str)]\n",
"for freq in freqs:\n",
" if isinstance(freq, (pd.offsets.BaseOffset, str)): \n",
" dates = pd.date_range('1900-01-01', periods=n_periods, freq=freq)\n",
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[DEFAULT]
repo = utilsforecast
lib_name = utilsforecast
version = 0.1.1
version = 0.1.2
min_python = 3.8
license = apache2
black_formatting = True
Expand Down
2 changes: 1 addition & 1 deletion utilsforecast/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.1"
__version__ = "0.1.2"
14 changes: 8 additions & 6 deletions utilsforecast/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,16 @@ def fill_gaps(
return grid.join(df, on=[id_col, time_col], how="left")
if isinstance(freq, str):
offset = pd.tseries.frequencies.to_offset(freq)
if "min" in freq:
if isinstance(offset.base, pd.offsets.Minute):
# minutes are represented as 'm' in numpy
freq = freq.replace("min", "m")
elif "B" in freq:
# business day
if freq != "B":
freq = "m"
elif isinstance(offset.base, pd.offsets.BusinessDay):
if offset.n != 1:
raise NotImplementedError("Multiple of a business day")
freq = "D"
elif isinstance(offset.base, pd.offsets.Hour):
# hours are represented as 'h' in numpy
freq = "h"
if offset.n > 1:
freq = freq.replace(str(offset.n), "")
try:
Expand All @@ -152,7 +154,7 @@ def fill_gaps(
)
uids = np.repeat(times_by_id.index, sizes)
if isinstance(freq, str):
if offset.base.name == "B":
if isinstance(offset.base, pd.offsets.BusinessDay):
# data was generated daily, we need to keep only business days
bdays = np.is_busday(times)
uids = uids[bdays]
Expand Down

0 comments on commit c4d6229

Please sign in to comment.