Skip to content

Commit

Permalink
feat: add forecast bias metric (#122)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Sep 13, 2024
1 parent 5dbd994 commit 1e3ae5b
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 15 deletions.
2 changes: 1 addition & 1 deletion nbs/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
" models : list of str, optional (default=None)\n",
" Names of the models to evaluate.\n",
" If `None` will use every column in the dataframe after removing id, time and target.\n",
" train_df :pandas, polars, dask or spark DataFrame, optional (default=None)\n",
" train_df : pandas, polars, dask or spark DataFrame, optional (default=None)\n",
" Training set. Used to evaluate metrics such as `mase`.\n",
" level : list of int, optional (default=None)\n",
" Prediction interval levels. Used to compute losses that rely on quantiles.\n",
Expand Down
100 changes: 100 additions & 0 deletions nbs/losses.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,106 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"@_base_docstring\n",
"def bias(\n",
" df: DFType,\n",
" models: List[str],\n",
" id_col: str = 'unique_id',\n",
" target_col: str = 'y',\n",
") -> DFType:\n",
" \"\"\"Forecast estimator bias.\n",
" \n",
" Defined as prediction - actual\"\"\"\n",
" if isinstance(df, pd.DataFrame):\n",
" res = (df[models].sub(df[target_col], axis=0)).groupby(df[id_col], observed=True).mean()\n",
" res.index.name = id_col\n",
" res = res.reset_index()\n",
" else:\n",
" def gen_expr(model):\n",
" return pl.col(model).sub(pl.col(target_col)).alias(model)\n",
"\n",
" res = _pl_agg_expr(df, models, id_col, gen_expr)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"---\n",
"\n",
"#### bias\n",
"\n",
"> bias (df:~DFType, models:List[str], id_col:str='unique_id',\n",
"> target_col:str='y')\n",
"\n",
"*Forecast estimator bias.\n",
"\n",
"Defined as prediction - actual*\n",
"\n",
"| | **Type** | **Default** | **Details** |\n",
"| -- | -------- | ----------- | ----------- |\n",
"| df | DFType | | Input dataframe with id, actual values and predictions. |\n",
"| models | List | | Columns that identify the models predictions. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
"| target_col | str | y | Column that contains the target. |\n",
"| **Returns** | **DFType** | | **dataframe with one row per id and one column per model.** |"
],
"text/plain": [
"---\n",
"\n",
"#### bias\n",
"\n",
"> bias (df:~DFType, models:List[str], id_col:str='unique_id',\n",
"> target_col:str='y')\n",
"\n",
"*Forecast estimator bias.\n",
"\n",
"Defined as prediction - actual*\n",
"\n",
"| | **Type** | **Default** | **Details** |\n",
"| -- | -------- | ----------- | ----------- |\n",
"| df | DFType | | Input dataframe with id, actual values and predictions. |\n",
"| models | List | | Columns that identify the models predictions. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
"| target_col | str | y | Column that contains the target. |\n",
"| **Returns** | **DFType** | | **dataframe with one row per id and one column per model.** |"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"show_doc(bias, title_level=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| polars\n",
"pd_vs_pl(\n",
" bias(series, models),\n",
" bias(series_pl, models),\n",
" models,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[DEFAULT]
repo = utilsforecast
lib_name = utilsforecast
version = 0.2.4
version = 0.2.5
min_python = 3.8
license = apache2
black_formatting = True
Expand Down
2 changes: 1 addition & 1 deletion utilsforecast/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.4"
__version__ = "0.2.5"
1 change: 1 addition & 0 deletions utilsforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
'utilsforecast.losses': { 'utilsforecast.losses._base_docstring': ('losses.html#_base_docstring', 'utilsforecast/losses.py'),
'utilsforecast.losses._pl_agg_expr': ('losses.html#_pl_agg_expr', 'utilsforecast/losses.py'),
'utilsforecast.losses._zero_to_nan': ('losses.html#_zero_to_nan', 'utilsforecast/losses.py'),
'utilsforecast.losses.bias': ('losses.html#bias', 'utilsforecast/losses.py'),
'utilsforecast.losses.calibration': ('losses.html#calibration', 'utilsforecast/losses.py'),
'utilsforecast.losses.coverage': ('losses.html#coverage', 'utilsforecast/losses.py'),
'utilsforecast.losses.mae': ('losses.html#mae', 'utilsforecast/losses.py'),
Expand Down
2 changes: 1 addition & 1 deletion utilsforecast/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def evaluate(
models : list of str, optional (default=None)
Names of the models to evaluate.
If `None` will use every column in the dataframe after removing id, time and target.
train_df :pandas, polars, dask or spark DataFrame, optional (default=None)
train_df : pandas, polars, dask or spark DataFrame, optional (default=None)
Training set. Used to evaluate metrics such as `mase`.
level : list of int, optional (default=None)
Prediction interval levels. Used to compute losses that rely on quantiles.
Expand Down
49 changes: 38 additions & 11 deletions utilsforecast/losses.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/losses.ipynb.

# %% auto 0
__all__ = ['mae', 'mse', 'rmse', 'mape', 'smape', 'mase', 'rmae', 'quantile_loss', 'mqloss', 'coverage', 'calibration',
__all__ = ['mae', 'mse', 'rmse', 'bias', 'mape', 'smape', 'mase', 'rmae', 'quantile_loss', 'mqloss', 'coverage', 'calibration',
'scaled_crps']

# %% ../nbs/losses.ipynb 3
Expand Down Expand Up @@ -141,15 +141,42 @@ def rmse(
res = res.with_columns(*[pl.col(c).pow(0.5) for c in models])
return res

# %% ../nbs/losses.ipynb 30
# %% ../nbs/losses.ipynb 27
@_base_docstring
def bias(
df: DFType,
models: List[str],
id_col: str = "unique_id",
target_col: str = "y",
) -> DFType:
"""Forecast estimator bias.
Defined as prediction - actual"""
if isinstance(df, pd.DataFrame):
res = (
(df[models].sub(df[target_col], axis=0))
.groupby(df[id_col], observed=True)
.mean()
)
res.index.name = id_col
res = res.reset_index()
else:

def gen_expr(model):
return pl.col(model).sub(pl.col(target_col)).alias(model)

res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 33
def _zero_to_nan(series: Union[pd.Series, "pl.Expr"]) -> Union[pd.Series, "pl.Expr"]:
if isinstance(series, pd.Series):
res = series.replace(0, np.nan)
else:
res = pl.when(series == 0).then(float("nan")).otherwise(series.abs())
return res

# %% ../nbs/losses.ipynb 31
# %% ../nbs/losses.ipynb 34
@_base_docstring
def mape(
df: DFType,
Expand Down Expand Up @@ -187,7 +214,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 35
# %% ../nbs/losses.ipynb 38
@_base_docstring
def smape(
df: DFType,
Expand Down Expand Up @@ -225,7 +252,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 41
# %% ../nbs/losses.ipynb 44
def mase(
df: DFType,
models: List[str],
Expand Down Expand Up @@ -293,7 +320,7 @@ def gen_expr(model):
res = _pl_agg_expr(full_df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 46
# %% ../nbs/losses.ipynb 49
def rmae(
df: DFType,
models: List[str],
Expand Down Expand Up @@ -347,7 +374,7 @@ def gen_expr(model, baseline) -> pl_Expr:
res = res.select([id_col, *exprs])
return res

# %% ../nbs/losses.ipynb 52
# %% ../nbs/losses.ipynb 55
def quantile_loss(
df: DFType,
models: Dict[str, str],
Expand Down Expand Up @@ -409,7 +436,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, list(models.items()), id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 58
# %% ../nbs/losses.ipynb 61
def mqloss(
df: DFType,
models: Dict[str, List[str]],
Expand Down Expand Up @@ -468,7 +495,7 @@ def mqloss(
res = ufp.assign_columns(res, model, model_res[model])
return res

# %% ../nbs/losses.ipynb 64
# %% ../nbs/losses.ipynb 67
def coverage(
df: DFType,
models: List[str],
Expand Down Expand Up @@ -527,7 +554,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 68
# %% ../nbs/losses.ipynb 71
def calibration(
df: DFType,
models: Dict[str, str],
Expand Down Expand Up @@ -577,7 +604,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, list(models.items()), id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 72
# %% ../nbs/losses.ipynb 75
def scaled_crps(
df: DFType,
models: Dict[str, List[str]],
Expand Down

0 comments on commit 1e3ae5b

Please sign in to comment.