Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Minor] Make LaggedRegressorsConfig a dataclass and Clean up model config #1640

Merged
merged 4 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions neuralprophet/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

@dataclass
class Model:
lagged_reg_layers: Optional[List[int]]
quantiles: Optional[List[float]] = None

def setup_quantiles(self):
Expand Down Expand Up @@ -346,7 +345,7 @@
log.error("Invalid growth for global_local mode '{}'. Set to 'global'".format(self.trend_global_local))
self.trend_global_local = "global"

if self.trend_local_reg < 0:

Check failure on line 348 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Operator "<" not supported for "None" (reportOptionalOperand)
log.error("Invalid negative trend_local_reg '{}'. Set to False".format(self.trend_local_reg))
self.trend_local_reg = False

Expand Down Expand Up @@ -395,13 +394,13 @@
log.error("Invalid global_local mode '{}'. Set to 'global'".format(self.global_local))
self.global_local = "global"

self.periods = OrderedDict(

Check failure on line 397 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

No overloads for "__init__" match the provided arguments (reportCallIssue)
{

Check failure on line 398 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Argument of type "dict[str, Season]" cannot be assigned to parameter "iterable" of type "Iterable[list[bytes]]" in function "__init__" (reportArgumentType)
"yearly": Season(
resolution=6,
period=365.25,
arg=self.yearly_arg,
global_local=(

Check failure on line 403 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Argument of type "SeasonGlobalLocalMode | Literal['auto']" cannot be assigned to parameter "global_local" of type "SeasonGlobalLocalMode" in function "__init__" (reportArgumentType)
self.yearly_global_local
if self.yearly_global_local in ["global", "local"]
else self.global_local
Expand All @@ -412,7 +411,7 @@
resolution=3,
period=7,
arg=self.weekly_arg,
global_local=(

Check failure on line 414 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Argument of type "SeasonGlobalLocalMode | Literal['auto']" cannot be assigned to parameter "global_local" of type "SeasonGlobalLocalMode" in function "__init__" (reportArgumentType)
self.weekly_global_local
if self.weekly_global_local in ["global", "local"]
else self.global_local
Expand All @@ -423,7 +422,7 @@
resolution=6,
period=1,
arg=self.daily_arg,
global_local=(

Check failure on line 425 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Argument of type "SeasonGlobalLocalMode | Literal['auto']" cannot be assigned to parameter "global_local" of type "SeasonGlobalLocalMode" in function "__init__" (reportArgumentType)
self.daily_global_local if self.daily_global_local in ["global", "local"] else self.global_local
),
condition_name=None,
Expand All @@ -431,7 +430,7 @@
}
)

assert self.seasonality_local_reg >= 0, "Invalid seasonality_local_reg '{}'.".format(self.seasonality_local_reg)

Check failure on line 433 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Operator ">=" not supported for "None" (reportOptionalOperand)

if self.seasonality_local_reg is True:
log.warning("seasonality_local_reg = True. Default seasonality_local_reg value set to 1")
Expand All @@ -449,7 +448,7 @@
resolution=resolution,
period=period,
arg=arg,
global_local=global_local if global_local in ["global", "local"] else self.global_local,

Check failure on line 451 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Argument of type "str" cannot be assigned to parameter "global_local" of type "SeasonGlobalLocalMode" in function "__init__"   Type "str" is not assignable to type "SeasonGlobalLocalMode"     "str" is not assignable to type "Literal['global']"     "str" is not assignable to type "Literal['local']"     "str" is not assignable to type "Literal['glocal']" (reportArgumentType)
condition_name=condition_name,
)

Expand Down Expand Up @@ -499,15 +498,21 @@
as_scalar: bool
normalize: Union[bool, str]
n_lags: int
lagged_reg_layers: Optional[List[int]]

def __post_init__(self):
if self.reg_lambda is not None:
if self.reg_lambda < 0:
raise ValueError("regularization must be >= 0")


ConfigLaggedRegressors = OrderedDictType[str, LaggedRegressor]
@dataclass
class ConfigLaggedRegressors:
layers: Optional[List[int]] = field(default_factory=list)
# List of hidden layers for shared NN across LaggedReg. The default value is ``[]``, which initializes no hidden layers.
regressors: OrderedDict[LaggedRegressor] = field(init=False)

Check failure on line 512 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Too few type arguments provided for "OrderedDict"; expected 2 but received 1 (reportInvalidTypeArguments)

def __post_init__(self):
self.regressors = None


@dataclass
Expand All @@ -521,8 +526,7 @@
class ConfigFutureRegressors:
model: str
regressors_layers: Optional[List[int]]

regressors: OrderedDict = field(init=False) # contains RegressorConfig objects
regressors: OrderedDict = field(init=False) # contains Regressor objects

def __post_init__(self):
self.regressors = None
Expand Down
22 changes: 11 additions & 11 deletions neuralprophet/data/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ def _reshape_raw_predictions_to_forecst_df(
lagged_components = [
"ar",
]
if config_lagged_regressors is not None:
for name in config_lagged_regressors.keys():
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
for name in config_lagged_regressors.regressors.keys():
lagged_components.append(f"lagged_regressor_{name}")
for comp in lagged_components:
if comp in components:
Expand Down Expand Up @@ -362,8 +362,8 @@ def _validate_column_name(
if seasons and config_seasonality is not None:
if name in config_seasonality.periods:
raise ValueError(f"Name {name!r} already used for a seasonality.")
if covariates and config_lagged_regressors is not None:
if name in config_lagged_regressors:
if covariates and config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
if name in config_lagged_regressors.regressors.keys():
raise ValueError(f"Name {name!r} already used for an added covariate.")
if regressors and config_regressors.regressors is not None:
if name in config_regressors.regressors.keys():
Expand Down Expand Up @@ -410,7 +410,7 @@ def _check_dataframe(
df, regressors_to_remove, lag_regressors_to_remove = df_utils.check_dataframe(
df=df,
check_y=check_y,
covariates=model.config_lagged_regressors if exogenous else None,
covariates=model.config_lagged_regressors.regressors if exogenous else None,
regressors=model.config_regressors.regressors if exogenous else None,
events=model.config_events if exogenous else None,
seasonalities=model.config_seasonality if exogenous else None,
Expand All @@ -423,12 +423,12 @@ def _check_dataframe(
model.config_regressors.regressors.pop(reg)
if model.config_regressors.regressors is not None and len(model.config_regressors.regressors) == 0:
model.config_regressors.regressors = None
if model.config_lagged_regressors is not None:
if model.config_lagged_regressors is not None and model.config_lagged_regressors.regressors is not None:
for reg in lag_regressors_to_remove:
log.warning(f"Removing lagged regressor {reg} because it is not present in the data.")
model.config_lagged_regressors.pop(reg)
if len(model.config_lagged_regressors) == 0:
model.config_lagged_regressors = None
model.config_lagged_regressors.regressors.pop(reg)
if len(model.config_lagged_regressors.regressors) == 0:
model.config_lagged_regressors.regressors = None
return df


Expand Down Expand Up @@ -528,8 +528,8 @@ def _handle_missing_data(
data_columns = []
if n_lags > 0:
data_columns.append("y")
if config_lagged_regressors is not None:
data_columns.extend(config_lagged_regressors.keys())
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
data_columns.extend(config_lagged_regressors.regressors.keys())
if config_regressors is not None and config_regressors.regressors is not None:
data_columns.extend(config_regressors.regressors.keys())
if config_events is not None:
Expand Down
12 changes: 6 additions & 6 deletions neuralprophet/df_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ def get_max_num_lags(n_lags: int, config_lagged_regressors: Optional[ConfigLagge
int
Maximum number of lags between the autoregression lags and the covariates lags.
"""
if config_lagged_regressors is not None:
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
# log.debug("config_lagged_regressors exists")
return max([n_lags] + [val.n_lags for key, val in config_lagged_regressors.items()])
return max([n_lags] + [val.n_lags for key, val in config_lagged_regressors.regressors.items()])
else:
# log.debug("config_lagged_regressors does not exist")
# log.debug("config_lagged_regressors.regressors does not exist")
return n_lags


Expand Down Expand Up @@ -203,11 +203,11 @@ def data_params_definition(
norm_type=normalize,
)

if config_lagged_regressors is not None:
for covar in config_lagged_regressors.keys():
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
for covar in config_lagged_regressors.regressors.keys():
if covar not in df.columns:
raise ValueError(f"Lagged regressor {covar} not found in DataFrame.")
norm_type_lag = config_lagged_regressors[covar].normalize
norm_type_lag = config_lagged_regressors.regressors[covar].normalize
if local_run_despite_global:
if len(df[covar].unique()) < 2:
norm_type_lag = "soft"
Expand Down
20 changes: 9 additions & 11 deletions neuralprophet/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,6 @@ def __init__(

# Model
self.config_model = configure.Model(
lagged_reg_layers=lagged_reg_layers,
quantiles=quantiles,
)
self.config_model.setup_quantiles()
Expand Down Expand Up @@ -554,8 +553,11 @@ def __init__(
self.config_events: Optional[configure.ConfigEvents] = None
self.config_country_holidays: Optional[configure.ConfigCountryHolidays] = None

# Extra Regressors
self.config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None
# Lagged Regressors
self.config_lagged_regressors = configure.ConfigLaggedRegressors(
layers=lagged_reg_layers,
)
# Future Regressors
self.config_regressors = configure.ConfigFutureRegressors(
model=future_regressors_model,
regressors_layers=future_regressors_layers,
Expand Down Expand Up @@ -604,8 +606,6 @@ def add_lagged_regressor(
optional, specify whether this regressor will benormalized prior to fitting.
if ``auto``, binary regressors will not be normalized.
"""
lagged_reg_layers = self.config_model.lagged_reg_layers

if n_lags == 0 or n_lags is None:
raise ValueError(
f"Received n_lags {n_lags} for lagged regressor {names}. Please set n_lags > 0 or use options 'scalar' or 'auto'."
Expand Down Expand Up @@ -640,14 +640,13 @@ def add_lagged_regressor(
config_lagged_regressors=self.config_lagged_regressors,
config_regressors=self.config_regressors,
)
if self.config_lagged_regressors is None:
self.config_lagged_regressors = OrderedDict()
self.config_lagged_regressors[name] = configure.LaggedRegressor(
if self.config_lagged_regressors.regressors is None:
self.config_lagged_regressors.regressors = OrderedDict()
self.config_lagged_regressors.regressors[name] = configure.LaggedRegressor(
reg_lambda=regularization,
normalize=normalize,
as_scalar=only_last_value,
n_lags=n_lags,
lagged_reg_layers=lagged_reg_layers,
)
return self

Expand Down Expand Up @@ -1036,7 +1035,7 @@ def fit(
self.config_events,
self.config_country_holidays,
self.config_trend,
self.config_lagged_regressors,
self.config_lagged_regressors.regressors,
]
)
if reg_enabled:
Expand Down Expand Up @@ -2675,7 +2674,6 @@ def _init_model(self):
n_lags=self.n_lags,
max_lags=self.max_lags,
ar_layers=self.config_ar.ar_layers,
lagged_reg_layers=self.config_model.lagged_reg_layers,
metrics=self.metrics,
id_list=self.id_list,
num_trends_modelled=self.num_trends_modelled,
Expand Down
17 changes: 9 additions & 8 deletions neuralprophet/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,10 @@ def check_if_configured(m, components, error_flag=False): # move to utils
if "autoregression" in components and not m.config_ar.n_lags > 0:
components.remove("autoregression")
invalid_components.append("autoregression")
if "lagged_regressors" in components and m.config_lagged_regressors is None:
components.remove("lagged_regressors")
invalid_components.append("lagged_regressors")
if "lagged_regressors" in components:
if m.config_lagged_regressors is None or m.config_lagged_regressors.regressors is None:
components.remove("lagged_regressors")
invalid_components.append("lagged_regressors")
if "events" in components and (m.config_events is None and m.config_country_holidays is None):
components.remove("events")
invalid_components.append("events")
Expand All @@ -209,7 +210,7 @@ def check_if_configured(m, components, error_flag=False): # move to utils
return components


def get_valid_configuration( # move to utils
def get_valid_configuration(
m, components=None, df_name=None, valid_set=None, validator=None, forecast_in_focus=None, quantile=0.5
):
"""Validate and adapt the selected components to be plotted.
Expand Down Expand Up @@ -382,7 +383,7 @@ def get_valid_configuration( # move to utils
if "lagged_regressors" in components:
if validator == "plot_components":
if forecast_in_focus is None:
for name in m.config_lagged_regressors.keys():
for name in m.config_lagged_regressors.regressors.keys():
plot_components.append(
{
"plot_name": f'Lagged Regressor "{name}"',
Expand All @@ -392,16 +393,16 @@ def get_valid_configuration( # move to utils
}
)
else:
for name in m.config_lagged_regressors.keys():
for name in m.config_lagged_regressors.regressors.keys():
plot_components.append(
{
"plot_name": f'Lagged Regressor "{name}" ({forecast_in_focus})-ahead',
"comp_name": f"lagged_regressor_{name}{forecast_in_focus}",
}
)
elif validator == "plot_parameters":
for name in m.config_lagged_regressors.keys():
if m.config_lagged_regressors[name].as_scalar:
for name in m.config_lagged_regressors.regressors.keys():
if m.config_lagged_regressors.regressors[name].as_scalar:
lagged_scalar_regressors.append((name, m.model.get_covar_weights()[name].detach().numpy()))
else:
plot_components.append(
Expand Down
12 changes: 8 additions & 4 deletions neuralprophet/time_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ def tabularize_univariate_datetime_single_index(
inputs["lags"] = lags

# COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
if config_lagged_regressors is not None: # and max_lags > 0:
if (
config_lagged_regressors is not None and config_lagged_regressors.regressors is not None
): # and max_lags > 0:
inputs["covariates"] = self.get_sample_lagged_regressors(
df_tensors=df_tensors, origin_index=origin_index, config_lagged_regressors=config_lagged_regressors
)
Expand Down Expand Up @@ -651,9 +653,11 @@ def create_nan_mask(
valid_origins &= y_lags_valid

# LAGGED REGRESSORS
if config_lagged_regressors is not None: # and max_lags > 0:
if (
config_lagged_regressors is not None and config_lagged_regressors.regressors is not None
): # and max_lags > 0:
reg_lags_valid = torch.ones(tensor_length, dtype=torch.bool)
for name, lagged_regressor in config_lagged_regressors.items():
for name, lagged_regressor in config_lagged_regressors.regressors.items():
n_reg_lags = lagged_regressor.n_lags
if n_reg_lags > 0:
# boolean vector, starting at origin_index = n_lags -1
Expand Down Expand Up @@ -724,7 +728,7 @@ def get_sample_targets(self, df_tensors, origin_index, n_forecasts, max_lags, pr
def get_sample_lagged_regressors(self, df_tensors, origin_index, config_lagged_regressors):
lagged_regressors = OrderedDict({})
# Future TODO: optimize this computation for many lagged_regressors
for name, lagged_regressor in config_lagged_regressors.items():
for name, lagged_regressor in config_lagged_regressors.regressors.items():
covar_lags = lagged_regressor.n_lags
assert covar_lags > 0
# Indexing tensors instead of DataFrame
Expand Down
34 changes: 14 additions & 20 deletions neuralprophet/time_net.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import math

Check failure on line 2 in neuralprophet/time_net.py

View workflow job for this annotation

GitHub Actions / flake8

'math' imported but unused
from collections import OrderedDict
from functools import reduce
from typing import Dict, List, Optional, Union
Expand Down Expand Up @@ -56,7 +56,6 @@
n_lags: int = 0,
max_lags: int = 0,
ar_layers: Optional[List[int]] = [],
lagged_reg_layers: Optional[List[int]] = [],
compute_components_flag: bool = False,
metrics: Optional[np_types.CollectMetricsMode] = {},
id_list: List[str] = ["__df__"],
Expand Down Expand Up @@ -99,14 +98,6 @@
----
The default value is ``[]``, which initializes no hidden layers.

lagged_reg_layers : list
List of hidden layers (for covariate-Net).

Note
----
The default value is ``[]``, which initializes no hidden layers.


compute_components_flag : bool
Flag whether to compute the components of the model or not.
metrics : dict
Expand Down Expand Up @@ -283,12 +274,11 @@
nn.init.kaiming_normal_(lay.weight, mode="fan_in")

# Lagged regressors
self.lagged_reg_layers = lagged_reg_layers
self.config_lagged_regressors = config_lagged_regressors
if self.config_lagged_regressors is not None:
if self.config_lagged_regressors is not None and self.config_lagged_regressors.regressors is not None:
covar_net_layers = []
d_inputs = sum([covar.n_lags for _, covar in self.config_lagged_regressors.items()])
for d_hidden_i in self.lagged_reg_layers:
d_inputs = sum([covar.n_lags for _, covar in self.config_lagged_regressors.regressors.items()])
for d_hidden_i in self.config_lagged_regressors.layers:
covar_net_layers.append(nn.Linear(d_inputs, d_hidden_i, bias=True))
covar_net_layers.append(nn.ReLU())
d_inputs = d_hidden_i
Expand Down Expand Up @@ -325,16 +315,16 @@
"""
Get attributions of covariates network w.r.t. the model input.
"""
if self.config_lagged_regressors is not None:
if self.config_lagged_regressors is not None and self.config_lagged_regressors.regressors is not None:
# Accumulate the lags of the covariates
covar_splits = np.add.accumulate(
[covar.n_lags for _, covar in self.config_lagged_regressors.items()][:-1]
[covar.n_lags for _, covar in self.config_lagged_regressors.regressors.items()][:-1]
).tolist()
# If actual covariates are provided, use them to compute the attributions
if covar_input is not None:
covar_input = torch.cat([covar for _, covar in covar_input.items()], axis=1)
# Calculate the attributions w.r.t. the inputs
if self.lagged_reg_layers == []:
if self.config_lagged_regressors.layers == []:
attributions = self.covar_net[0].weight
else:
attributions = interprete_model(self, "covar_net", "forward_covar_net", covar_input)
Expand All @@ -345,7 +335,7 @@
axis=1,
)
# Combine attributions and covariate name
covar_attributions = dict(zip(self.config_lagged_regressors.keys(), attributions_split))
covar_attributions = dict(zip(self.config_lagged_regressors.regressors.keys(), attributions_split))
else:
covar_attributions = None
return covar_attributions
Expand Down Expand Up @@ -692,7 +682,11 @@
)
if self.n_lags > 0 and "lags" in inputs:
components["ar"] = components_raw["lags"]
if self.config_lagged_regressors is not None and "covariates" in inputs:
if (
self.config_lagged_regressors is not None
and self.config_lagged_regressors.regressors is not None
and "covariates" in inputs
):
# Combined forward pass
all_covariates = components_raw["covariates"]
# Calculate the contribution of each covariate on each forecast
Expand Down Expand Up @@ -1040,11 +1034,11 @@
A simple, general purpose, fully connected network
"""

def __init__(self, d_inputs, d_outputs, lagged_reg_layers=[]):
def __init__(self, d_inputs, d_outputs, layers=[]):
# Perform initialization of the pytorch superclass
super(DeepNet, self).__init__()
layers = []
for d_hidden_i in lagged_reg_layers:
for d_hidden_i in layers:
layers.append(nn.Linear(d_inputs, d_hidden_i, bias=True))
layers.append(nn.ReLU())
d_inputs = d_hidden_i
Expand Down
2 changes: 1 addition & 1 deletion tests/test_regularization.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def test_regularization_lagged_regressor():
lagged_regressors_config = dict(lagged_regressors)

weights = m.model.get_covar_weights()
for name in m.config_lagged_regressors.keys():
for name in m.config_lagged_regressors.regressors.keys():
weight_average = np.average(weights[name].detach().numpy())

lagged_regressor_weight = lagged_regressors_config[name]
Expand Down
Loading