Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add forecast bias metric #122

Merged
merged 2 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nbs/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
" models : list of str, optional (default=None)\n",
" Names of the models to evaluate.\n",
" If `None` will use every column in the dataframe after removing id, time and target.\n",
" train_df :pandas, polars, dask or spark DataFrame, optional (default=None)\n",
" train_df : pandas, polars, dask or spark DataFrame, optional (default=None)\n",
" Training set. Used to evaluate metrics such as `mase`.\n",
" level : list of int, optional (default=None)\n",
" Prediction interval levels. Used to compute losses that rely on quantiles.\n",
Expand Down
100 changes: 100 additions & 0 deletions nbs/losses.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,106 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"@_base_docstring\n",
"def bias(\n",
" df: DFType,\n",
" models: List[str],\n",
" id_col: str = 'unique_id',\n",
" target_col: str = 'y',\n",
") -> DFType:\n",
" \"\"\"Forecast estimator bias.\n",
" \n",
" Defined as prediction - actual\"\"\"\n",
" if isinstance(df, pd.DataFrame):\n",
" res = (df[models].sub(df[target_col], axis=0)).groupby(df[id_col], observed=True).mean()\n",
" res.index.name = id_col\n",
" res = res.reset_index()\n",
" else:\n",
" def gen_expr(model):\n",
" return pl.col(model).sub(pl.col(target_col)).alias(model)\n",
"\n",
" res = _pl_agg_expr(df, models, id_col, gen_expr)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"---\n",
"\n",
"#### bias\n",
"\n",
"> bias (df:~DFType, models:List[str], id_col:str='unique_id',\n",
"> target_col:str='y')\n",
"\n",
"*Forecast estimator bias.\n",
"\n",
"Defined as prediction - actual*\n",
"\n",
"| | **Type** | **Default** | **Details** |\n",
"| -- | -------- | ----------- | ----------- |\n",
"| df | DFType | | Input dataframe with id, actual values and predictions. |\n",
"| models | List | | Columns that identify the models predictions. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
"| target_col | str | y | Column that contains the target. |\n",
"| **Returns** | **DFType** | | **dataframe with one row per id and one column per model.** |"
],
"text/plain": [
"---\n",
"\n",
"#### bias\n",
"\n",
"> bias (df:~DFType, models:List[str], id_col:str='unique_id',\n",
"> target_col:str='y')\n",
"\n",
"*Forecast estimator bias.\n",
"\n",
"Defined as prediction - actual*\n",
"\n",
"| | **Type** | **Default** | **Details** |\n",
"| -- | -------- | ----------- | ----------- |\n",
"| df | DFType | | Input dataframe with id, actual values and predictions. |\n",
"| models | List | | Columns that identify the models predictions. |\n",
"| id_col | str | unique_id | Column that identifies each serie. |\n",
"| target_col | str | y | Column that contains the target. |\n",
"| **Returns** | **DFType** | | **dataframe with one row per id and one column per model.** |"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"show_doc(bias, title_level=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| polars\n",
"pd_vs_pl(\n",
" bias(series, models),\n",
" bias(series_pl, models),\n",
" models,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[DEFAULT]
repo = utilsforecast
lib_name = utilsforecast
version = 0.2.4
version = 0.2.5
min_python = 3.8
license = apache2
black_formatting = True
Expand Down
2 changes: 1 addition & 1 deletion utilsforecast/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.4"
__version__ = "0.2.5"
1 change: 1 addition & 0 deletions utilsforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
'utilsforecast.losses': { 'utilsforecast.losses._base_docstring': ('losses.html#_base_docstring', 'utilsforecast/losses.py'),
'utilsforecast.losses._pl_agg_expr': ('losses.html#_pl_agg_expr', 'utilsforecast/losses.py'),
'utilsforecast.losses._zero_to_nan': ('losses.html#_zero_to_nan', 'utilsforecast/losses.py'),
'utilsforecast.losses.bias': ('losses.html#bias', 'utilsforecast/losses.py'),
'utilsforecast.losses.calibration': ('losses.html#calibration', 'utilsforecast/losses.py'),
'utilsforecast.losses.coverage': ('losses.html#coverage', 'utilsforecast/losses.py'),
'utilsforecast.losses.mae': ('losses.html#mae', 'utilsforecast/losses.py'),
Expand Down
2 changes: 1 addition & 1 deletion utilsforecast/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def evaluate(
models : list of str, optional (default=None)
Names of the models to evaluate.
If `None` will use every column in the dataframe after removing id, time and target.
train_df :pandas, polars, dask or spark DataFrame, optional (default=None)
train_df : pandas, polars, dask or spark DataFrame, optional (default=None)
Training set. Used to evaluate metrics such as `mase`.
level : list of int, optional (default=None)
Prediction interval levels. Used to compute losses that rely on quantiles.
Expand Down
49 changes: 38 additions & 11 deletions utilsforecast/losses.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/losses.ipynb.

# %% auto 0
__all__ = ['mae', 'mse', 'rmse', 'mape', 'smape', 'mase', 'rmae', 'quantile_loss', 'mqloss', 'coverage', 'calibration',
__all__ = ['mae', 'mse', 'rmse', 'bias', 'mape', 'smape', 'mase', 'rmae', 'quantile_loss', 'mqloss', 'coverage', 'calibration',
'scaled_crps']

# %% ../nbs/losses.ipynb 3
Expand Down Expand Up @@ -141,15 +141,42 @@ def rmse(
res = res.with_columns(*[pl.col(c).pow(0.5) for c in models])
return res

# %% ../nbs/losses.ipynb 30
# %% ../nbs/losses.ipynb 27
@_base_docstring
def bias(
df: DFType,
models: List[str],
id_col: str = "unique_id",
target_col: str = "y",
) -> DFType:
"""Forecast estimator bias.

Defined as prediction - actual"""
if isinstance(df, pd.DataFrame):
res = (
(df[models].sub(df[target_col], axis=0))
.groupby(df[id_col], observed=True)
.mean()
)
res.index.name = id_col
res = res.reset_index()
else:

def gen_expr(model):
return pl.col(model).sub(pl.col(target_col)).alias(model)

res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 33
def _zero_to_nan(series: Union[pd.Series, "pl.Expr"]) -> Union[pd.Series, "pl.Expr"]:
if isinstance(series, pd.Series):
res = series.replace(0, np.nan)
else:
res = pl.when(series == 0).then(float("nan")).otherwise(series.abs())
return res

# %% ../nbs/losses.ipynb 31
# %% ../nbs/losses.ipynb 34
@_base_docstring
def mape(
df: DFType,
Expand Down Expand Up @@ -187,7 +214,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 35
# %% ../nbs/losses.ipynb 38
@_base_docstring
def smape(
df: DFType,
Expand Down Expand Up @@ -225,7 +252,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 41
# %% ../nbs/losses.ipynb 44
def mase(
df: DFType,
models: List[str],
Expand Down Expand Up @@ -293,7 +320,7 @@ def gen_expr(model):
res = _pl_agg_expr(full_df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 46
# %% ../nbs/losses.ipynb 49
def rmae(
df: DFType,
models: List[str],
Expand Down Expand Up @@ -347,7 +374,7 @@ def gen_expr(model, baseline) -> pl_Expr:
res = res.select([id_col, *exprs])
return res

# %% ../nbs/losses.ipynb 52
# %% ../nbs/losses.ipynb 55
def quantile_loss(
df: DFType,
models: Dict[str, str],
Expand Down Expand Up @@ -409,7 +436,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, list(models.items()), id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 58
# %% ../nbs/losses.ipynb 61
def mqloss(
df: DFType,
models: Dict[str, List[str]],
Expand Down Expand Up @@ -468,7 +495,7 @@ def mqloss(
res = ufp.assign_columns(res, model, model_res[model])
return res

# %% ../nbs/losses.ipynb 64
# %% ../nbs/losses.ipynb 67
def coverage(
df: DFType,
models: List[str],
Expand Down Expand Up @@ -527,7 +554,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, models, id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 68
# %% ../nbs/losses.ipynb 71
def calibration(
df: DFType,
models: Dict[str, str],
Expand Down Expand Up @@ -577,7 +604,7 @@ def gen_expr(model):
res = _pl_agg_expr(df, list(models.items()), id_col, gen_expr)
return res

# %% ../nbs/losses.ipynb 72
# %% ../nbs/losses.ipynb 75
def scaled_crps(
df: DFType,
models: Dict[str, List[str]],
Expand Down
Loading